Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions mysql-test/suite/innodb/r/MDEV-39006.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
SET GLOBAL innodb_max_dirty_pages_pct=0.0;
SET GLOBAL innodb_max_dirty_pages_pct_lwm=0.0;
SET GLOBAL innodb_max_dirty_pages_pct=99;
SET GLOBAL innodb_max_dirty_pages_pct_lwm=99;
CREATE TABLE t1 (f1 INTEGER, f2 VARCHAR(1024)) ENGINE=InnoDB;
CREATE TABLE ten (f1 INTEGER) ENGINE=InnoDB;
INSERT INTO ten VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
INSERT INTO t1 (f2) SELECT REPEAT('x', 1024) FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4;
SET GLOBAL debug_dbug = "+d,sst_disable_writes_now,sst_force_lsn_advance";
SET GLOBAL innodb_max_dirty_pages_pct_lwm=99;
SET GLOBAL debug_dbug = "";
SET GLOBAL innodb_max_dirty_pages_pct_lwm=0.0;
SET GLOBAL innodb_max_dirty_pages_pct=0.0;
SET GLOBAL innodb_max_dirty_pages_pct=99;
SET GLOBAL debug_dbug = "+d,sst_enable_writes_now";
SET GLOBAL innodb_max_dirty_pages_pct_lwm=99;
SET GLOBAL debug_dbug = "";
DROP TABLE t1;
DROP TABLE ten;
Warnings:
Warning 1210 innodb_max_dirty_pages_pct cannot be set lower than innodb_max_dirty_pages_pct_lwm.
Warning 1210 Lowering innodb_max_dirty_page_pct_lwm to 90.000000
99 changes: 99 additions & 0 deletions mysql-test/suite/innodb/t/MDEV-39006.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#
# MDEV-39006: simple non-Galera reproducer for the InnoDB page-cleaner
# write leak after ha_disable_internal_writes(true).
#
# This test bypasses Galera entirely. It drives sst_disable_innodb_writes()
# / sst_enable_innodb_writes() through DBUG hooks (sst_disable_writes_now /
# sst_enable_writes_now) installed at the bottom of
# innodb_max_dirty_pages_pct_lwm_update(). The same setter wakes the page
# cleaner, so each `SET GLOBAL innodb_max_dirty_pages_pct_lwm = ...` is the
# single trigger for both the test action and the cleaner wake-up.
#
# The sst_force_lsn_advance DBUG injection (inside sst_disable_innodb_writes)
# deterministically advances LSN past the log_checkpoint_low() early-return
# threshold so the page cleaner's idle-path call has work to do.
#
# Without the wsrep_sst_disable_writes gate in
# storage/innobase/buf/buf0flu.cc:buf_flush_page_cleaner(), the page
# cleaner's idle-path log_checkpoint_low() trips
# ut_ad(!recv_no_log_write) (debug) or appends a fresh FILE_CHECKPOINT
# record to ib_logfile0 (release). With the gate, the cleaner skips
# both and the snapshot stays byte-identical.
#

--source include/have_innodb.inc
--source include/have_debug.inc
--source include/have_wsrep.inc

--let $datadir= `SELECT @@datadir`
--let $innodb_max_dirty_pages_pct = `SELECT @@innodb_max_dirty_pages_pct`
--let $innodb_max_dirty_pages_pct_lwm = `SELECT @@innodb_max_dirty_pages_pct_lwm`

# Drain any baseline dirty pages first, so the buffer pool is clean
# before we accumulate test dirty pages.
SET GLOBAL innodb_max_dirty_pages_pct=0.0;
SET GLOBAL innodb_max_dirty_pages_pct_lwm=0.0;
--let $wait_condition = SELECT variable_value = 0 FROM information_schema.global_status WHERE variable_name = 'INNODB_BUFFER_POOL_PAGES_DIRTY'
--source include/wait_condition.inc

# Raise the threshold so the page cleaner does not flush proactively
# while we build up dirty pages.
SET GLOBAL innodb_max_dirty_pages_pct=99;
SET GLOBAL innodb_max_dirty_pages_pct_lwm=99;

CREATE TABLE t1 (f1 INTEGER, f2 VARCHAR(1024)) ENGINE=InnoDB;
CREATE TABLE ten (f1 INTEGER) ENGINE=InnoDB;
INSERT INTO ten VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
INSERT INTO t1 (f2) SELECT REPEAT('x', 1024) FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4;

# Arm two DBUG hooks on innodb_max_dirty_pages_pct_lwm_update():
# sst_disable_writes_now -> calls ha_disable_internal_writes(true)
# sst_force_lsn_advance -> writes one FILE_MODIFY redo record from
# inside sst_disable_innodb_writes() so LSN
# moves past the log_checkpoint_low()
# early-return threshold.
SET GLOBAL debug_dbug = "+d,sst_disable_writes_now,sst_force_lsn_advance";

# Trigger the disable hook. Setting lwm to 99 (its current value) is enough
# to invoke innodb_max_dirty_pages_pct_lwm_update(), which fires the DBUG
# block and drives ha_disable_internal_writes(true) -> sst_disable_innodb_writes
# (where sst_force_lsn_advance also fires).
SET GLOBAL innodb_max_dirty_pages_pct_lwm=99;

SET GLOBAL debug_dbug = "";

# Snapshot of datadir BEFORE waking the page cleaner. Excludes transient
# SST/PID artefacts so the test isolates engine-level writes.
--exec find $datadir -type f ! -name 'tables_flushed' ! -name 'backup_sst_complete' ! -name 'wsrep_sst*.pid' ! -name 'wsrep_sst*.log' ! -name 'stunnel.conf' -exec md5sum {} \; 2>/dev/null | sort -u | md5sum > $MYSQLTEST_VARDIR/tmp/innodb_before

# Wake the page cleaner. With the production fix in
# buf_flush_page_cleaner() this is a no-op; without it the cleaner
# enters log_checkpoint_low() past the early-return because the
# injection bumped LSN, then either trips ut_ad(!recv_no_log_write)
# (debug) or writes FILE_CHECKPOINT to ib_logfile0 (release).
SET GLOBAL innodb_max_dirty_pages_pct_lwm=0.0;
SET GLOBAL innodb_max_dirty_pages_pct=0.0;
--sleep 2
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any other way to see that page cleaner had a change to run? This introduces a race, page cleaner thread might not get change to run during this 2 second wait leading to fact that this test is not testing intended behaviour.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The page cleaner is a background thread spawned via std::thread, with no THD and no current_thd, it cannot host a per-THD DEBUG_SYNC action. We can increase sleep time to 5 second.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no counter available for dirty_pages that we could query?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no existing status variable that increments per page-cleaner idle iteration. Innodb_buffer_pool_pages_dirty is already 0 throughout the SST window and never changes; pages_flushed, dblwr_writes, MONITOR_FLUSH_BACKGROUND_COUNT only increment on actual flush work, which the cleaner's set_almost_idle: block doesn't do.

The cleaner does flip buf_pool.page_cleaner_idle() right before reaching set_almost_idle:. That predicate is exactly what we'd want to observe - it goes false on page_cleaner_wakeup() and true again once the cleaner has fully completed an iteration. But it's not exposed as a status variable, sysvar, or INFORMATION_SCHEMA row anywhere. Adding it as a status variable is the cleanest fix.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hemantdangi-gc I do not think you need any new test cases and any of this debug injection. Current test cases already cover code path effected. See e.g. https://es-jenkins.mariadb.net/job/Test-Package//725056/artifact/test-MTR-custom.log/*view*

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that current test covers issue and new test which is just forced injection can be removed.

But from where you got https://es-jenkins.mariadb.net/job/Test-Package//725056/artifact/test-MTR-custom.log/*view. Is this link from MDEV-3006 jenkins run? The link you provided is build on revision: 363280c179e6c8f7a62405b69121c276b7b3dfe3 but the current patch is on revision: "7106d8d72144117f71ef5feac56374207aab0de9".


# Snapshot of datadir AFTER the page cleaner has had a chance to run.
--exec find $datadir -type f ! -name 'tables_flushed' ! -name 'backup_sst_complete' ! -name 'wsrep_sst*.pid' ! -name 'wsrep_sst*.log' ! -name 'stunnel.conf' -exec md5sum {} \; 2>/dev/null | sort -u | md5sum > $MYSQLTEST_VARDIR/tmp/innodb_after

# The two snapshots must be identical: no datadir writes while
# wsrep_sst_disable_writes is set.
--diff_files $MYSQLTEST_VARDIR/tmp/innodb_before $MYSQLTEST_VARDIR/tmp/innodb_after

# Re-enable internal writes via the matching DBUG hook. Clears
# recv_no_log_write and resumes normal page-cleaner operation.
# (raise pct first so the lwm setter does not warn about lwm > pct.)
SET GLOBAL innodb_max_dirty_pages_pct=99;
SET GLOBAL debug_dbug = "+d,sst_enable_writes_now";
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we clear dbug_dbug before setting max_dirty_pages* values?

SET GLOBAL innodb_max_dirty_pages_pct_lwm=99;
SET GLOBAL debug_dbug = "";

DROP TABLE t1;
DROP TABLE ten;

--disable_query_log
--eval SET GLOBAL innodb_max_dirty_pages_pct = $innodb_max_dirty_pages_pct
--eval SET GLOBAL innodb_max_dirty_pages_pct_lwm = $innodb_max_dirty_pages_pct_lwm
--enable_query_log
42 changes: 27 additions & 15 deletions storage/innobase/buf/buf0flu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ Created 11/11/1995 Heikki Tuuri
#include "lzo/lzo1x.h"
#include "snappy-c.h"

#ifdef WITH_WSREP
extern Atomic_relaxed<bool> wsrep_sst_disable_writes;
#else
constexpr bool wsrep_sst_disable_writes= false;
#endif

/** Number of pages flushed via LRU. Protected by buf_pool.mutex.
Also included in buf_pool.stat.n_pages_written. */
ulint buf_lru_flush_page_count;
Expand Down Expand Up @@ -2012,6 +2018,7 @@ static void log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) noexcept
log_sys.latch.wr_lock();
}

ut_ad(!recv_no_log_write);
ut_ad(oldest_lsn > log_sys.last_checkpoint_lsn);
ut_ad(log_sys.get_flushed_lsn() >= flush_lsn);

Expand Down Expand Up @@ -2544,23 +2551,28 @@ static void buf_flush_page_cleaner() noexcept
pthread_cond_broadcast(&buf_pool.done_flush_LRU);
pthread_cond_broadcast(&buf_pool.done_flush_list);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
buf_dblwr.flush_buffered_writes();

do
/* Skip the doublewrite flush and checkpoint while a Galera SST donor
has called ha_disable_internal_writes(true). */
if (UNIV_LIKELY(!wsrep_sst_disable_writes))
{
if (recv_recovery_is_on())
continue;
IF_DBUG(if (log_sys.last_checkpoint_lsn &&
srv_shutdown_state < SRV_SHUTDOWN_CLEANUP &&
(_db_keyword_(nullptr, "ib_log_checkpoint_avoid", 1) ||
_db_keyword_(nullptr, "ib_log_checkpoint_avoid_hard", 1)))
continue,);
if (log_sys.check_for_checkpoint() ||
(!srv_startup_is_before_trx_rollback_phase &&
srv_operation <= SRV_OPERATION_EXPORT_RESTORED))
log_checkpoint();
buf_dblwr.flush_buffered_writes();

do
{
if (recv_recovery_is_on())
continue;
IF_DBUG(if (log_sys.last_checkpoint_lsn &&
srv_shutdown_state < SRV_SHUTDOWN_CLEANUP &&
(_db_keyword_(nullptr, "ib_log_checkpoint_avoid", 1) ||
_db_keyword_(nullptr, "ib_log_checkpoint_avoid_hard", 1)))
continue,);
if (log_sys.check_for_checkpoint() ||
(!srv_startup_is_before_trx_rollback_phase &&
srv_operation <= SRV_OPERATION_EXPORT_RESTORED))
log_checkpoint();
}
while (false);
}
while (false);

if (UNIV_UNLIKELY(srv_shutdown_state >= SRV_SHUTDOWN_LAST_PHASE))
{
Expand Down
20 changes: 20 additions & 0 deletions storage/innobase/fil/fil0fil.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3009,6 +3009,26 @@ ATTRIBUTE_NOINLINE ATTRIBUTE_COLD void mtr_t::name_write() noexcept
mtr.commit_files();
}

#ifdef UNIV_DEBUG
/** A debug helper function to append a single FILE_MODIFY redo record so that
log_sys.get_lsn() advances past
last_checkpoint_lsn + SIZE_OF_FILE_CHECKPOINT + 8*is_encrypted().
The record refers to a non-predefined dummy tablespace and is flushed to
ib_logfile0 before recv_no_log_write is set; recovery from a later
checkpoint will skip past it. */
ATTRIBUTE_COLD void debug_advance_lsn_via_file_modify() noexcept
{
log_sys.latch.wr_lock();
mtr_t mtr;
mtr.start();
mtr.log_file_op(FILE_MODIFY, SRV_SPACE_ID_UPPER_BOUND - 1,
"./mdev39006_fake.ibd");
const lsn_t commit_lsn= mtr.commit_files();
log_sys.latch.wr_unlock();
log_write_up_to(commit_lsn ? commit_lsn : log_sys.get_lsn(), true);
}
#endif

/** On a log checkpoint, reset fil_names_dirty_and_write() flags
and write out FILE_MODIFY if needed, and write FILE_CHECKPOINT.
@param lsn checkpoint LSN
Expand Down
30 changes: 29 additions & 1 deletion storage/innobase/handler/ha_innodb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1798,10 +1798,29 @@ static void sst_disable_innodb_writes()
buf_flush_page_cleaner(). Let us prevent that by invoking another
checkpoint (which will write the FILE_CHECKPOINT record). */
log_make_checkpoint();

bool skip_verify_checkpoint= false;
DBUG_EXECUTE_IF("sst_force_lsn_advance",
{
/* Test-only: write a single FILE_MODIFY redo record so that
log_sys.get_lsn() advances past
last_checkpoint_lsn + SIZE_OF_FILE_CHECKPOINT + 8*is_encrypted().
A subsequent buf_flush_page_cleaner() wake-up will then enter
log_checkpoint_low() past its early-return and, absent the
wsrep_sst_disable_writes gate in buf0flu.cc, fire the
recv_no_log_write tripwire (debug) or write a FILE_CHECKPOINT
record to ib_logfile0 (release). With the gate in place this
extra LSN gets checkpointed past once sst_enable_innodb_writes()
runs, so it does not survive past the SST window. */
skip_verify_checkpoint= true;
debug_advance_lsn_via_file_modify();
});
Comment on lines +1803 to +1817
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This causes a compilation failure on many debug builders, because the low-level member function mtr_t::log_file_op() is not intended to be invoked directly, but only via other functions, such as mtr_t::name_write().

Is there really no other way to trigger this scenario? Could we write a dummy FILE_CHECKPOINT record instead, like innodb_log_file_size_update() does?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missed that as it run successfully on local.
Thanks for checking issue, I added a dummy function to resolve the issue as mtr_t::name_write() and innodb_log_file_size_update() needed some more changes.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this piece of code can be removed?


ut_d(recv_no_log_write= true);
/* If this were not a no-op, an assertion would fail due to
recv_no_log_write. */
ut_d(log_make_checkpoint());
if (!skip_verify_checkpoint)
ut_d(log_make_checkpoint());
}

static void sst_enable_innodb_writes()
Expand Down Expand Up @@ -17502,6 +17521,15 @@ innodb_max_dirty_pages_pct_lwm_update(
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_pool.page_cleaner_wakeup();
mysql_mutex_unlock(&buf_pool.flush_list_mutex);

#ifdef WITH_WSREP
/* To drive ha_disable_internal_writes() through this setter. */
DBUG_EXECUTE_IF("sst_disable_writes_now",
ha_disable_internal_writes(true););
DBUG_EXECUTE_IF("sst_enable_writes_now",
ha_disable_internal_writes(false););
#endif /* WITH_WSREP */

mysql_mutex_lock(&LOCK_global_system_variables);
}

Expand Down
8 changes: 8 additions & 0 deletions storage/innobase/include/fil0fil.h
Original file line number Diff line number Diff line change
Expand Up @@ -1805,6 +1805,14 @@ and write out FILE_MODIFY if needed, and write FILE_CHECKPOINT.
@return current LSN */
ATTRIBUTE_COLD lsn_t fil_names_clear(lsn_t lsn) noexcept;

#ifdef UNIV_DEBUG
/** Append a single FILE_MODIFY redo record so that log_sys.get_lsn() advances
past last_checkpoint_lsn + SIZE_OF_FILE_CHECKPOINT + 8*is_encrypted().
The record refers to a non-predefined dummy tablespace and is flushed to
ib_logfile0; recovery from a later checkpoint skips past it. */
ATTRIBUTE_COLD void debug_advance_lsn_via_file_modify() noexcept;
#endif

#ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
void test_make_filepath();
#endif /* UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH */
Expand Down