diff --git a/mysql-test/suite/innodb/r/MDEV-39006.result b/mysql-test/suite/innodb/r/MDEV-39006.result new file mode 100644 index 0000000000000..1ca5d861b64cb --- /dev/null +++ b/mysql-test/suite/innodb/r/MDEV-39006.result @@ -0,0 +1,22 @@ +SET GLOBAL innodb_max_dirty_pages_pct=0.0; +SET GLOBAL innodb_max_dirty_pages_pct_lwm=0.0; +SET GLOBAL innodb_max_dirty_pages_pct=99; +SET GLOBAL innodb_max_dirty_pages_pct_lwm=99; +CREATE TABLE t1 (f1 INTEGER, f2 VARCHAR(1024)) ENGINE=InnoDB; +CREATE TABLE ten (f1 INTEGER) ENGINE=InnoDB; +INSERT INTO ten VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10); +INSERT INTO t1 (f2) SELECT REPEAT('x', 1024) FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4; +SET GLOBAL debug_dbug = "+d,sst_disable_writes_now,sst_force_lsn_advance"; +SET GLOBAL innodb_max_dirty_pages_pct_lwm=99; +SET GLOBAL debug_dbug = ""; +SET GLOBAL innodb_max_dirty_pages_pct_lwm=0.0; +SET GLOBAL innodb_max_dirty_pages_pct=0.0; +SET GLOBAL innodb_max_dirty_pages_pct=99; +SET GLOBAL debug_dbug = "+d,sst_enable_writes_now"; +SET GLOBAL innodb_max_dirty_pages_pct_lwm=99; +SET GLOBAL debug_dbug = ""; +DROP TABLE t1; +DROP TABLE ten; +Warnings: +Warning 1210 innodb_max_dirty_pages_pct cannot be set lower than innodb_max_dirty_pages_pct_lwm. +Warning 1210 Lowering innodb_max_dirty_page_pct_lwm to 90.000000 diff --git a/mysql-test/suite/innodb/t/MDEV-39006.test b/mysql-test/suite/innodb/t/MDEV-39006.test new file mode 100644 index 0000000000000..d3f681fe5d0e7 --- /dev/null +++ b/mysql-test/suite/innodb/t/MDEV-39006.test @@ -0,0 +1,99 @@ +# +# MDEV-39006: simple non-Galera reproducer for the InnoDB page-cleaner +# write leak after ha_disable_internal_writes(true). +# +# This test bypasses Galera entirely. It drives sst_disable_innodb_writes() +# / sst_enable_innodb_writes() through DBUG hooks (sst_disable_writes_now / +# sst_enable_writes_now) installed at the bottom of +# innodb_max_dirty_pages_pct_lwm_update(). The same setter wakes the page +# cleaner, so each `SET GLOBAL innodb_max_dirty_pages_pct_lwm = ...` is the +# single trigger for both the test action and the cleaner wake-up. +# +# The sst_force_lsn_advance DBUG injection (inside sst_disable_innodb_writes) +# deterministically advances LSN past the log_checkpoint_low() early-return +# threshold so the page cleaner's idle-path call has work to do. +# +# Without the wsrep_sst_disable_writes gate in +# storage/innobase/buf/buf0flu.cc:buf_flush_page_cleaner(), the page +# cleaner's idle-path log_checkpoint_low() trips +# ut_ad(!recv_no_log_write) (debug) or appends a fresh FILE_CHECKPOINT +# record to ib_logfile0 (release). With the gate, the cleaner skips +# both and the snapshot stays byte-identical. +# + +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/have_wsrep.inc + +--let $datadir= `SELECT @@datadir` +--let $innodb_max_dirty_pages_pct = `SELECT @@innodb_max_dirty_pages_pct` +--let $innodb_max_dirty_pages_pct_lwm = `SELECT @@innodb_max_dirty_pages_pct_lwm` + +# Drain any baseline dirty pages first, so the buffer pool is clean +# before we accumulate test dirty pages. +SET GLOBAL innodb_max_dirty_pages_pct=0.0; +SET GLOBAL innodb_max_dirty_pages_pct_lwm=0.0; +--let $wait_condition = SELECT variable_value = 0 FROM information_schema.global_status WHERE variable_name = 'INNODB_BUFFER_POOL_PAGES_DIRTY' +--source include/wait_condition.inc + +# Raise the threshold so the page cleaner does not flush proactively +# while we build up dirty pages. +SET GLOBAL innodb_max_dirty_pages_pct=99; +SET GLOBAL innodb_max_dirty_pages_pct_lwm=99; + +CREATE TABLE t1 (f1 INTEGER, f2 VARCHAR(1024)) ENGINE=InnoDB; +CREATE TABLE ten (f1 INTEGER) ENGINE=InnoDB; +INSERT INTO ten VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10); +INSERT INTO t1 (f2) SELECT REPEAT('x', 1024) FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4; + +# Arm two DBUG hooks on innodb_max_dirty_pages_pct_lwm_update(): +# sst_disable_writes_now -> calls ha_disable_internal_writes(true) +# sst_force_lsn_advance -> writes one FILE_MODIFY redo record from +# inside sst_disable_innodb_writes() so LSN +# moves past the log_checkpoint_low() +# early-return threshold. +SET GLOBAL debug_dbug = "+d,sst_disable_writes_now,sst_force_lsn_advance"; + +# Trigger the disable hook. Setting lwm to 99 (its current value) is enough +# to invoke innodb_max_dirty_pages_pct_lwm_update(), which fires the DBUG +# block and drives ha_disable_internal_writes(true) -> sst_disable_innodb_writes +# (where sst_force_lsn_advance also fires). +SET GLOBAL innodb_max_dirty_pages_pct_lwm=99; + +SET GLOBAL debug_dbug = ""; + +# Snapshot of datadir BEFORE waking the page cleaner. Excludes transient +# SST/PID artefacts so the test isolates engine-level writes. +--exec find $datadir -type f ! -name 'tables_flushed' ! -name 'backup_sst_complete' ! -name 'wsrep_sst*.pid' ! -name 'wsrep_sst*.log' ! -name 'stunnel.conf' -exec md5sum {} \; 2>/dev/null | sort -u | md5sum > $MYSQLTEST_VARDIR/tmp/innodb_before + +# Wake the page cleaner. With the production fix in +# buf_flush_page_cleaner() this is a no-op; without it the cleaner +# enters log_checkpoint_low() past the early-return because the +# injection bumped LSN, then either trips ut_ad(!recv_no_log_write) +# (debug) or writes FILE_CHECKPOINT to ib_logfile0 (release). +SET GLOBAL innodb_max_dirty_pages_pct_lwm=0.0; +SET GLOBAL innodb_max_dirty_pages_pct=0.0; +--sleep 2 + +# Snapshot of datadir AFTER the page cleaner has had a chance to run. +--exec find $datadir -type f ! -name 'tables_flushed' ! -name 'backup_sst_complete' ! -name 'wsrep_sst*.pid' ! -name 'wsrep_sst*.log' ! -name 'stunnel.conf' -exec md5sum {} \; 2>/dev/null | sort -u | md5sum > $MYSQLTEST_VARDIR/tmp/innodb_after + +# The two snapshots must be identical: no datadir writes while +# wsrep_sst_disable_writes is set. +--diff_files $MYSQLTEST_VARDIR/tmp/innodb_before $MYSQLTEST_VARDIR/tmp/innodb_after + +# Re-enable internal writes via the matching DBUG hook. Clears +# recv_no_log_write and resumes normal page-cleaner operation. +# (raise pct first so the lwm setter does not warn about lwm > pct.) +SET GLOBAL innodb_max_dirty_pages_pct=99; +SET GLOBAL debug_dbug = "+d,sst_enable_writes_now"; +SET GLOBAL innodb_max_dirty_pages_pct_lwm=99; +SET GLOBAL debug_dbug = ""; + +DROP TABLE t1; +DROP TABLE ten; + +--disable_query_log +--eval SET GLOBAL innodb_max_dirty_pages_pct = $innodb_max_dirty_pages_pct +--eval SET GLOBAL innodb_max_dirty_pages_pct_lwm = $innodb_max_dirty_pages_pct_lwm +--enable_query_log diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index e1d153b0cc9aa..46e24221ec1c6 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -44,6 +44,12 @@ Created 11/11/1995 Heikki Tuuri #include "lzo/lzo1x.h" #include "snappy-c.h" +#ifdef WITH_WSREP +extern Atomic_relaxed wsrep_sst_disable_writes; +#else +constexpr bool wsrep_sst_disable_writes= false; +#endif + /** Number of pages flushed via LRU. Protected by buf_pool.mutex. Also included in buf_pool.stat.n_pages_written. */ ulint buf_lru_flush_page_count; @@ -2012,6 +2018,7 @@ static void log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) noexcept log_sys.latch.wr_lock(); } + ut_ad(!recv_no_log_write); ut_ad(oldest_lsn > log_sys.last_checkpoint_lsn); ut_ad(log_sys.get_flushed_lsn() >= flush_lsn); @@ -2544,23 +2551,28 @@ static void buf_flush_page_cleaner() noexcept pthread_cond_broadcast(&buf_pool.done_flush_LRU); pthread_cond_broadcast(&buf_pool.done_flush_list); mysql_mutex_unlock(&buf_pool.flush_list_mutex); - buf_dblwr.flush_buffered_writes(); - - do + /* Skip the doublewrite flush and checkpoint while a Galera SST donor + has called ha_disable_internal_writes(true). */ + if (UNIV_LIKELY(!wsrep_sst_disable_writes)) { - if (recv_recovery_is_on()) - continue; - IF_DBUG(if (log_sys.last_checkpoint_lsn && - srv_shutdown_state < SRV_SHUTDOWN_CLEANUP && - (_db_keyword_(nullptr, "ib_log_checkpoint_avoid", 1) || - _db_keyword_(nullptr, "ib_log_checkpoint_avoid_hard", 1))) - continue,); - if (log_sys.check_for_checkpoint() || - (!srv_startup_is_before_trx_rollback_phase && - srv_operation <= SRV_OPERATION_EXPORT_RESTORED)) - log_checkpoint(); + buf_dblwr.flush_buffered_writes(); + + do + { + if (recv_recovery_is_on()) + continue; + IF_DBUG(if (log_sys.last_checkpoint_lsn && + srv_shutdown_state < SRV_SHUTDOWN_CLEANUP && + (_db_keyword_(nullptr, "ib_log_checkpoint_avoid", 1) || + _db_keyword_(nullptr, "ib_log_checkpoint_avoid_hard", 1))) + continue,); + if (log_sys.check_for_checkpoint() || + (!srv_startup_is_before_trx_rollback_phase && + srv_operation <= SRV_OPERATION_EXPORT_RESTORED)) + log_checkpoint(); + } + while (false); } - while (false); if (UNIV_UNLIKELY(srv_shutdown_state >= SRV_SHUTDOWN_LAST_PHASE)) { diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 1b9dd6228aacc..83c160b42a7f9 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -3009,6 +3009,26 @@ ATTRIBUTE_NOINLINE ATTRIBUTE_COLD void mtr_t::name_write() noexcept mtr.commit_files(); } +#ifdef UNIV_DEBUG +/** A debug helper function to append a single FILE_MODIFY redo record so that +log_sys.get_lsn() advances past + last_checkpoint_lsn + SIZE_OF_FILE_CHECKPOINT + 8*is_encrypted(). +The record refers to a non-predefined dummy tablespace and is flushed to +ib_logfile0 before recv_no_log_write is set; recovery from a later +checkpoint will skip past it. */ +ATTRIBUTE_COLD void debug_advance_lsn_via_file_modify() noexcept +{ + log_sys.latch.wr_lock(); + mtr_t mtr; + mtr.start(); + mtr.log_file_op(FILE_MODIFY, SRV_SPACE_ID_UPPER_BOUND - 1, + "./mdev39006_fake.ibd"); + const lsn_t commit_lsn= mtr.commit_files(); + log_sys.latch.wr_unlock(); + log_write_up_to(commit_lsn ? commit_lsn : log_sys.get_lsn(), true); +} +#endif + /** On a log checkpoint, reset fil_names_dirty_and_write() flags and write out FILE_MODIFY if needed, and write FILE_CHECKPOINT. @param lsn checkpoint LSN diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 3a53231cbb459..1f0050a05fb94 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1798,10 +1798,29 @@ static void sst_disable_innodb_writes() buf_flush_page_cleaner(). Let us prevent that by invoking another checkpoint (which will write the FILE_CHECKPOINT record). */ log_make_checkpoint(); + + bool skip_verify_checkpoint= false; + DBUG_EXECUTE_IF("sst_force_lsn_advance", + { + /* Test-only: write a single FILE_MODIFY redo record so that + log_sys.get_lsn() advances past + last_checkpoint_lsn + SIZE_OF_FILE_CHECKPOINT + 8*is_encrypted(). + A subsequent buf_flush_page_cleaner() wake-up will then enter + log_checkpoint_low() past its early-return and, absent the + wsrep_sst_disable_writes gate in buf0flu.cc, fire the + recv_no_log_write tripwire (debug) or write a FILE_CHECKPOINT + record to ib_logfile0 (release). With the gate in place this + extra LSN gets checkpointed past once sst_enable_innodb_writes() + runs, so it does not survive past the SST window. */ + skip_verify_checkpoint= true; + debug_advance_lsn_via_file_modify(); + }); + ut_d(recv_no_log_write= true); /* If this were not a no-op, an assertion would fail due to recv_no_log_write. */ - ut_d(log_make_checkpoint()); + if (!skip_verify_checkpoint) + ut_d(log_make_checkpoint()); } static void sst_enable_innodb_writes() @@ -17502,6 +17521,15 @@ innodb_max_dirty_pages_pct_lwm_update( mysql_mutex_lock(&buf_pool.flush_list_mutex); buf_pool.page_cleaner_wakeup(); mysql_mutex_unlock(&buf_pool.flush_list_mutex); + +#ifdef WITH_WSREP + /* To drive ha_disable_internal_writes() through this setter. */ + DBUG_EXECUTE_IF("sst_disable_writes_now", + ha_disable_internal_writes(true);); + DBUG_EXECUTE_IF("sst_enable_writes_now", + ha_disable_internal_writes(false);); +#endif /* WITH_WSREP */ + mysql_mutex_lock(&LOCK_global_system_variables); } diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 72d5942c36c51..e3aafc6fdb03c 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -1805,6 +1805,14 @@ and write out FILE_MODIFY if needed, and write FILE_CHECKPOINT. @return current LSN */ ATTRIBUTE_COLD lsn_t fil_names_clear(lsn_t lsn) noexcept; +#ifdef UNIV_DEBUG +/** Append a single FILE_MODIFY redo record so that log_sys.get_lsn() advances +past last_checkpoint_lsn + SIZE_OF_FILE_CHECKPOINT + 8*is_encrypted(). +The record refers to a non-predefined dummy tablespace and is flushed to +ib_logfile0; recovery from a later checkpoint skips past it. */ +ATTRIBUTE_COLD void debug_advance_lsn_via_file_modify() noexcept; +#endif + #ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH void test_make_filepath(); #endif /* UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH */