From 8053f0888955eb78db480bc4191b359dc1dcbc31 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Mon, 30 Mar 2026 10:05:18 -0400 Subject: [PATCH] Cap DuckDB memory during parquet compaction to 2 GB (#758) The in-memory DuckDB connection used for archive compaction was using the default memory_limit (80% of physical RAM), causing multi-GB spikes when decompressing large parquet archives. With 800 MB of compressed archives, this easily hits 5-7 GB. Set memory_limit=2GB so DuckDB spills to disk instead of consuming all available RAM. Also set preserve_insertion_order=false to reduce memory pressure since compaction has no meaningful row order. Co-Authored-By: Claude Opus 4.6 (1M context) --- Lite/Services/ArchiveService.cs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Lite/Services/ArchiveService.cs b/Lite/Services/ArchiveService.cs index 31d2d5d..9f5f0bb 100644 --- a/Lite/Services/ArchiveService.cs +++ b/Lite/Services/ArchiveService.cs @@ -309,6 +309,14 @@ private void CompactParquetFiles() using var con = new DuckDBConnection("DataSource=:memory:"); con.Open(); + /* Cap memory to avoid multi-GB spikes decompressing large parquet archives. + DuckDB will spill excess to its temp directory automatically. */ + using (var pragma = con.CreateCommand()) + { + pragma.CommandText = "SET memory_limit = '2GB'; SET preserve_insertion_order = false;"; + pragma.ExecuteNonQuery(); + } + var totalMerged = 0; var totalRemoved = 0;