From 782a4ddeb7a1d06b7303f62252ad0a1911967e96 Mon Sep 17 00:00:00 2001 From: iequidoo Date: Sun, 17 Aug 2025 13:42:40 -0300 Subject: [PATCH 1/2] feat: Trigger housekeeping after opening db Some migrations want housekeeping to run. Also if housekeeping failed before, fixing the reason and restarting the program is the most natural way to retry it. --- src/sql.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/sql.rs b/src/sql.rs index 69fefc7d39..009a8e8b6f 100644 --- a/src/sql.rs +++ b/src/sql.rs @@ -277,6 +277,12 @@ impl Sql { info!(context, "Opened database {:?}.", self.dbfile); *self.is_encrypted.write().await = Some(passphrase_nonempty); + // Some migrations want housekeeping to run. Also if housekeeping failed before, fixing the + // reason and restarting the program is the most natural way to retry it. + context + .set_config_internal(Config::LastHousekeeping, None) + .await?; + // setup debug logging if there is an entry containing its id if let Some(xdc_id) = self .get_raw_config_u32(Config::DebugLogging.as_ref()) From 6b45bf2a255e2e6ba36cd14fa69d9da3fea92e04 Mon Sep 17 00:00:00 2001 From: iequidoo Date: Sun, 17 Aug 2025 09:33:34 -0300 Subject: [PATCH 2/2] feat: Disable wal_autocheckpoint From https://www.sqlite.org/wal.html: > The default strategy is to allow successive write transactions to grow the WAL until the WAL becomes about 1000 pages in size, then to run a checkpoint operation for each subsequent COMMIT until the WAL is reset to be smaller than 1000 pages. By default, the checkpoint will be run automatically by the same thread that does the COMMIT that pushes the WAL over its size limit. This has the effect of causing most COMMIT operations to be very fast but an occasional COMMIT (those that trigger a checkpoint) to be much slower. And while autocheckpoint runs in the `PASSIVE` mode and thus doesn't block concurrent readers and writers, in our design it blocks writers because it's done under `write_mutex` locked and thus may cause the app to stuck for noticeable time. Let's disable autocheckpointing then, we can't rely on it anyway. Instead, run a `TRUNCATE` checkpoint from `inbox_loop()` if the WAL is >= 4K pages and a `PASSIVE` checkpoint otherwise. --- src/context.rs | 2 +- src/scheduler.rs | 7 ++++++- src/sql.rs | 28 ++++++++++++++++++++++------ src/sql/sql_tests.rs | 2 +- 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/src/context.rs b/src/context.rs index 7e2bf94175..484167e055 100644 --- a/src/context.rs +++ b/src/context.rs @@ -410,7 +410,7 @@ impl Context { /// Changes encrypted database passphrase. pub async fn change_passphrase(&self, passphrase: String) -> Result<()> { - self.sql.change_passphrase(passphrase).await?; + self.sql.change_passphrase(self, passphrase).await?; Ok(()) } diff --git a/src/scheduler.rs b/src/scheduler.rs index ed86783328..451c6ae932 100644 --- a/src/scheduler.rs +++ b/src/scheduler.rs @@ -25,7 +25,7 @@ use crate::location; use crate::log::{LogExt, error, info, warn}; use crate::message::MsgId; use crate::smtp::{Smtp, send_smtp_messages}; -use crate::sql; +use crate::sql::{self, Sql}; use crate::stats::maybe_send_stats; use crate::tools::{self, duration_to_str, maybe_add_time_based_warnings, time, time_elapsed}; use crate::{constants, stats}; @@ -498,6 +498,11 @@ async fn inbox_fetch_idle(ctx: &Context, imap: &mut Imap, mut session: Session) last_housekeeping_time.saturating_add(constants::HOUSEKEEPING_PERIOD); if next_housekeeping_time <= time() { sql::housekeeping(ctx).await.log_err(ctx).ok(); + } else { + let force_truncate = false; + if let Err(err) = Sql::wal_checkpoint(ctx, force_truncate).await { + warn!(ctx, "wal_checkpoint() failed: {err:#}."); + } } } Err(err) => { diff --git a/src/sql.rs b/src/sql.rs index 009a8e8b6f..a1e5575dc3 100644 --- a/src/sql.rs +++ b/src/sql.rs @@ -298,7 +298,11 @@ impl Sql { /// The database must already be encrypted and the passphrase cannot be empty. /// It is impossible to turn encrypted database into unencrypted /// and vice versa this way, use import/export for this. - pub async fn change_passphrase(&self, passphrase: String) -> Result<()> { + pub(crate) async fn change_passphrase( + &self, + _context: &Context, + passphrase: String, + ) -> Result<()> { let mut lock = self.pool.write().await; let pool = lock.take().context("SQL connection pool is not open")?; @@ -683,8 +687,12 @@ impl Sql { &self.config_cache } - /// Runs a checkpoint operation in TRUNCATE mode, so the WAL file is truncated to 0 bytes. - pub(crate) async fn wal_checkpoint(context: &Context) -> Result<()> { + /// Runs a WAL checkpoint operation. + /// + /// * `force_truncate` - Force TRUNCATE mode to truncate the WAL file to 0 bytes, otherwise only + /// run PASSIVE mode if the WAL isn't too large. NB: Truncating blocks all db connections for + /// some time. + pub(crate) async fn wal_checkpoint(context: &Context, force_truncate: bool) -> Result<()> { let t_start = Time::now(); let lock = context.sql.pool.read().await; let Some(pool) = lock.as_ref() else { @@ -695,13 +703,19 @@ impl Sql { // Do as much work as possible without blocking anybody. let query_only = true; let conn = pool.get(query_only).await?; - tokio::task::block_in_place(|| { + let pages_total = tokio::task::block_in_place(|| { // Execute some transaction causing the WAL file to be opened so that the // `wal_checkpoint()` can proceed, otherwise it fails when called the first time, // see https://sqlite.org/forum/forumpost/7512d76a05268fc8. conn.query_row("PRAGMA table_list", [], |_| Ok(()))?; - conn.query_row("PRAGMA wal_checkpoint(PASSIVE)", [], |_| Ok(())) + conn.query_row("PRAGMA wal_checkpoint(PASSIVE)", [], |row| { + let pages_total: i64 = row.get(1)?; + Ok(pages_total) + }) })?; + if !force_truncate && pages_total < 4096 { + return Ok(()); + } // Kick out writers. const _: () = assert!(Sql::N_DB_CONNECTIONS > 1, "Deadlock possible"); @@ -772,6 +786,7 @@ fn new_connection(path: &Path, passphrase: &str) -> Result { PRAGMA busy_timeout = 0; -- fail immediately PRAGMA soft_heap_limit = 8388608; -- 8 MiB limit, same as set in Android SQLiteDatabase. PRAGMA foreign_keys=on; + PRAGMA wal_autocheckpoint=N; ", )?; @@ -880,7 +895,8 @@ pub async fn housekeeping(context: &Context) -> Result<()> { // bigger than 200M) and also make sure we truncate the WAL periodically. Auto-checkponting does // not normally truncate the WAL (unless the `journal_size_limit` pragma is set), see // https://www.sqlite.org/wal.html. - if let Err(err) = Sql::wal_checkpoint(context).await { + let force_truncate = true; + if let Err(err) = Sql::wal_checkpoint(context, force_truncate).await { warn!(context, "wal_checkpoint() failed: {err:#}."); debug_assert!(false); } diff --git a/src/sql/sql_tests.rs b/src/sql/sql_tests.rs index 60ee39e5fd..c551f6ceab 100644 --- a/src/sql/sql_tests.rs +++ b/src/sql/sql_tests.rs @@ -263,7 +263,7 @@ async fn test_sql_change_passphrase() -> Result<()> { sql.open(&t, "foo".to_string()) .await .context("failed to open the database second time")?; - sql.change_passphrase("bar".to_string()) + sql.change_passphrase(&t, "bar".to_string()) .await .context("failed to change passphrase")?;