feat: add DuckDB chunking file lock test and improve connection handling

flarco · flarco · commit 8d7c1f6996fc · 2026-03-11T09:45:03.000-03:00
diff --git a/cmd/sling/tests/pipelines/p.19.duckdb_chunk_lock.yaml b/cmd/sling/tests/pipelines/p.19.duckdb_chunk_lock.yaml
@@ -0,0 +1,56 @@
+# Test DuckDB chunking file lock (issue #717)
+# When using chunk_size with DuckDB target, ClearTableForChunkLoadWithRange
+# must close its connection before chunk tasks open theirs, otherwise
+# DuckDB's exclusive file lock causes "Could not set lock on file" errors.
+
+steps:
+  # 1. Create source table in Postgres
+  - id: setup
+    connection: postgres
+    query: |
+      DROP TABLE IF EXISTS public.test_duckdb_chunk_lock;
+      CREATE TABLE public.test_duckdb_chunk_lock AS
+      SELECT generate_series AS id, 'name_' || generate_series AS name
+      FROM generate_series(1, 1000);
+
+  - log: "Created source table with 1000 rows"
+
+  # 2. Run replication with chunking to DuckDB target
+  - replication:
+      source: postgres
+      target: DUCKDB
+      defaults:
+        mode: full-refresh
+        target_options:
+          use_bulk: false
+      env:
+        SLING_THREADS: 1
+      streams:
+        public.test_duckdb_chunk_lock:
+          object: main.test_duckdb_chunk_lock
+          primary_key: [id]
+          update_key: id
+          source_options:
+            chunk_size: 250
+    on_failure: abort
+
+  - log: "Replication with chunking completed"
+
+  # 3. Verify all 1000 rows arrived
+  - connection: DUCKDB
+    query: SELECT count(*) as cnt FROM main.test_duckdb_chunk_lock
+    into: result
+
+  - log: "DuckDB row count: {store.result[0].cnt}"
+
+  - check: int_parse(store.result[0].cnt) == 1000
+    failure_message: "Expected 1000 rows, got {store.result[0].cnt}"
+
+  - log: "SUCCESS: DuckDB chunking with chunk_size works (issue #717)"
+
+  # 4. Cleanup
+  - connection: DUCKDB
+    query: DROP TABLE IF EXISTS main.test_duckdb_chunk_lock;
+
+  - connection: postgres
+    query: DROP TABLE IF EXISTS public.test_duckdb_chunk_lock;
diff --git a/cmd/sling/tests/suite.cli.yaml b/cmd/sling/tests/suite.cli.yaml
@@ -2103,4 +2103,14 @@
   run: 'sling run -d -r cmd/sling/tests/replications/r.108.oracle_chunk_custom_sql.yaml'
   streams: 2
   output_contains:
-    - 'execution succeeded'
+    - 'execution succeeded'
+
+# DuckDB file locking prevents concurrent chunk writes (issue #717)
+# When using chunk_size with DuckDB target, multiple threads try to open the same
+# .duckdb file simultaneously, causing "Could not set lock on file" errors.
+- id: 219
+  name: 'DuckDB chunking file lock (issue #717)'
+  run: 'sling run -d -p cmd/sling/tests/pipelines/p.19.duckdb_chunk_lock.yaml'
+  group: duckdb
+  output_contains:
+    - 'SUCCESS: DuckDB chunking with chunk_size works (issue #717)'
diff --git a/core/sling/config.go b/core/sling/config.go
@@ -317,6 +317,7 @@ func (cfg *Config) ClearTableForChunkLoadWithRange() (err error) {
 		if err != nil {
 			return g.Error(err, "could not connect to target conn for preparing final table for chunk loading")
 		}
+		defer dbConn.Close()
 
 		switch cfg.Mode {
 		case FullRefreshMode:
diff --git a/core/sling/replication.go b/core/sling/replication.go
@@ -768,7 +768,7 @@ func (rd *ReplicationConfig) ProcessChunks() (err error) {
 		if chunkExpr != "" {
 			// no update_key needed for chunking by expression
 		} else if stream.config.UpdateKey == "" {
-			return g.Error(err, "did not provide update_key for stream chunking: %s", stream.name)
+			return g.Error("did not provide update_key for stream chunking: %s", stream.name)
 		} else if stream.config.Mode == IncrementalMode {
 			// need to get the max value target side if the table exists
 			var tempCfg Config

Original file line number	Diff line number	Diff line change
`@@ -317,6 +317,7 @@ func (cfg *Config) ClearTableForChunkLoadWithRange() (err error) {`
`317`	`317`	`if err != nil {`
`318`	`318`	`return g.Error(err, "could not connect to target conn for preparing final table for chunk loading")`
`319`	`319`	`}`
	`320`	`+ defer dbConn.Close()`
`320`	`321`
`321`	`322`	`switch cfg.Mode {`
`322`	`323`	`case FullRefreshMode:`