diff --git a/cdb2api/cdb2api.c b/cdb2api/cdb2api.c index 941dc0d942..92c78f81c3 100644 --- a/cdb2api/cdb2api.c +++ b/cdb2api/cdb2api.c @@ -365,6 +365,7 @@ MAKE_CDB2API_TEST_SWITCH(fail_timeout_sockpool_recv) MAKE_CDB2API_TEST_SWITCH(fail_timeout_sockpool_send) MAKE_CDB2API_TEST_SWITCH(fail_ssl_negotiation_once) MAKE_CDB2API_TEST_SWITCH(fail_sslio_close_in_local_cache) +MAKE_CDB2API_TEST_SWITCH(fail_local_cache_peek) #define MAKE_CDB2API_TEST_COUNTER(name) \ static int name; \ @@ -380,6 +381,7 @@ MAKE_CDB2API_TEST_COUNTER(num_tcp_connects) MAKE_CDB2API_TEST_COUNTER(num_cache_lru_evicts) MAKE_CDB2API_TEST_COUNTER(num_cache_hits) MAKE_CDB2API_TEST_COUNTER(num_cache_misses) +MAKE_CDB2API_TEST_COUNTER(num_stale_cache_rejects) MAKE_CDB2API_TEST_COUNTER(num_sockpool_recv) MAKE_CDB2API_TEST_COUNTER(num_sockpool_send) MAKE_CDB2API_TEST_COUNTER(num_sockpool_recv_timeouts) @@ -3244,18 +3246,39 @@ static COMDB2BUF *cdb2_socket_pool_get(cdb2_hndl_tp *hndl, const char *typestr, if (was_from_local_cache) { *was_from_local_cache = 0; sb = local_connection_cache_get(hndl, typestr); - if (sb != NULL) { + if (sb != NULL) *was_from_local_cache = 1; - LOG_CALL("%s(%s,%d,%p,%p[%d]): fd=%d\n", __func__, typestr, dbnum, port, was_from_local_cache, - (was_from_local_cache ? *was_from_local_cache : 0), cdb2buf_fileno(sb)); - return sb; + } + + if (sb == NULL) { + int fd = cdb2_socket_pool_get_ll(hndl, typestr, dbnum, port); + if (fd > 0) + sb = cdb2buf_open(fd, 0); + } + + /* Discard stale connections from either path: EOF, unexpected data, or socket error */ + if (sb != NULL) { + char peek; +#ifdef CDB2API_TEST + int ret = fail_local_cache_peek ? 0 : recv(cdb2buf_fileno(sb), &peek, 1, MSG_PEEK | MSG_DONTWAIT); +#else + int ret = recv(cdb2buf_fileno(sb), &peek, 1, MSG_PEEK | MSG_DONTWAIT); +#endif + if (ret != -1 || (errno != EAGAIN && errno != EWOULDBLOCK)) { + LOG_CALL("%s(%s): stale fd=%d ret=%d errno=%d\n", __func__, typestr, cdb2buf_fileno(sb), ret, errno); +#ifdef CDB2API_TEST + ++num_stale_cache_rejects; +#endif + cdb2buf_close(sb); + sb = NULL; + if (was_from_local_cache) + *was_from_local_cache = 0; } } - int fd = cdb2_socket_pool_get_ll(hndl, typestr, dbnum, port); LOG_CALL("%s(%s,%d,%p,%p[%d]): fd=%d\n", __func__, typestr, dbnum, port, was_from_local_cache, - (was_from_local_cache ? *was_from_local_cache : 0), fd); - return ((fd > 0) ? cdb2buf_open(fd, 0) : NULL); + (was_from_local_cache ? *was_from_local_cache : 0), cdb2buf_fileno(sb)); + return sb; } #ifndef CDB2API_SERVER diff --git a/cdb2api/cdb2api_test.h b/cdb2api/cdb2api_test.h index 855ab6cfaf..911596ef23 100644 --- a/cdb2api/cdb2api_test.h +++ b/cdb2api/cdb2api_test.h @@ -31,6 +31,7 @@ void set_fail_dbinfo_no_response(int); int get_num_cache_lru_evicts(void); int get_num_cache_hits(void); int get_num_cache_misses(void); +int get_num_stale_cache_rejects(void); void set_fail_next(int); void set_fail_read(int); @@ -76,6 +77,7 @@ void set_fail_ssl_new(int); void set_fail_ssl_negotiation_once(int); void set_fail_ssl_poll(int); void set_fail_sslio_close_in_local_cache(int); +void set_fail_local_cache_peek(int); void set_cdb2api_test_comdb2db_cfg(const char *); void set_cdb2api_test_single_cfg(const char *); diff --git a/tests/api_tst.test/runit b/tests/api_tst.test/runit index c20903bd5b..271b6e7fdc 100755 --- a/tests/api_tst.test/runit +++ b/tests/api_tst.test/runit @@ -108,6 +108,10 @@ ${TESTSBUILDDIR}/cdb2api_localcache_systable ${DBNAME} [[ $? -ne 0 ]] && echo "cdb2api_localcache_systable - fail" && exit 1 echo 'cdb2api_localcache_systable - pass' +${TESTSBUILDDIR}/cdb2api_stale_localcache ${DBNAME} +[[ $? -ne 0 ]] && echo "cdb2api_stale_localcache - fail" && exit 1 +echo 'cdb2api_stale_localcache - pass' + echo 'comdb2_feature:discard_unread_socket_data=on' >> ${cfg} ${TESTSBUILDDIR}/cdb2api_unread_data ${DBNAME} [[ $? -ne 0 ]] && echo "cdb2api_unread_data - fail" && exit 1 diff --git a/tests/tools/CMakeLists.txt b/tests/tools/CMakeLists.txt index bf0e9d3e42..bd0594c9d0 100644 --- a/tests/tools/CMakeLists.txt +++ b/tests/tools/CMakeLists.txt @@ -42,6 +42,7 @@ add_exe(cdb2api_effects_on_chunk_error cdb2api_effects_on_chunk_error.c) add_exe(cdb2api_enforce_timeout cdb2api_enforce_timeout.cpp) add_exe(cdb2api_hasql cdb2api_hasql.cpp) add_exe(cdb2api_localcache_systable cdb2api_localcache_systable.cpp) +add_exe(cdb2api_stale_localcache cdb2api_stale_localcache.cpp) add_exe(cdb2api_read_intrans_results cdb2api_read_intrans_results.c) add_exe(cdb2api_rte cdb2api_rte.cpp) add_exe(cdb2api_setoptions cdb2api_setoptions.cpp) diff --git a/tests/tools/cdb2api_stale_localcache.cpp b/tests/tools/cdb2api_stale_localcache.cpp new file mode 100644 index 0000000000..83aeb5a609 --- /dev/null +++ b/tests/tools/cdb2api_stale_localcache.cpp @@ -0,0 +1,71 @@ +/* Verify that stale connections in the local cache are detected and discarded. + * + * fail_local_cache_peek simulates recv() returning 0 (EOF from server), which + * is the same code path as real staleness. The test verifies that: + * - num_stale_cache_rejects increments on each stale detect + * - a new TCP connection is made when the cached one is discarded + * - normal cache hits resume once the injection is cleared + */ +#include +#include +#include + +#include +#include + +static void run_query(const char *dbname, const char *type) +{ + cdb2_hndl_tp *hndl = NULL; + int rc = cdb2_open(&hndl, dbname, type, 0); + assert(rc == CDB2_OK); + rc = cdb2_run_statement(hndl, "SELECT 1"); + assert(rc == CDB2_OK); + while ((rc = cdb2_next_record(hndl)) == CDB2_OK) + ; + assert(rc == CDB2_OK_DONE); + rc = cdb2_close(hndl); + assert(rc == CDB2_OK); +} + +int main(int argc, char **argv) +{ + signal(SIGPIPE, SIG_IGN); + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + const char *dbname = argv[1]; + char *conf = getenv("CDB2_CONFIG"); + if (conf) + cdb2_set_comdb2db_config(conf); + + set_fail_sockpool(-1); /* keep connections out of the external sockpool */ + set_local_connections_limit(10); + + /* First query: cache miss, connection goes into local cache on close */ + run_query(dbname, "default"); + assert(get_num_cache_misses() == 1); + assert(get_num_cache_hits() == 0); + assert(get_num_stale_cache_rejects() == 0); + + int connects_before = get_num_tcp_connects(); + + /* Simulate a stale cached socket (recv returns 0 = EOF) */ + set_fail_local_cache_peek(1); + + /* This query hits the cache, finds the socket stale, discards it, and + * opens a fresh TCP connection to the database */ + run_query(dbname, "default"); + assert(get_num_stale_cache_rejects() == 1); + assert(get_num_tcp_connects() > connects_before); + + /* Clear the injection; the fresh connection is now in the cache */ + set_fail_local_cache_peek(0); + + int hits_before = get_num_cache_hits(); + run_query(dbname, "default"); + assert(get_num_cache_hits() == hits_before + 1); + assert(get_num_stale_cache_rejects() == 1); /* no new stale rejects */ + + return 0; +}