From 782be25d8cad196498385da338335567bfce9281 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 6 Aug 2025 20:48:35 +0300 Subject: [PATCH] Refactor SLRU download interface between Postgres and the neon extension Move the responsibility of writing the SLRU segment contents to the neon extension. The neon extension hook now downloads the file and places it to the correct path, and returns a true/false to the caller to indicate if the file existed. On successful download, the caller retries opening the file. While we're at it, refactor the read_slru_segment function so that it is not part of the SMGR api anymore, but an independent hook. We were previously pretending it's part of the SMGR api and because of that, passed a dummy SMgrRelation, but it was a bit silly. --- src/backend/access/transam/slru.c | 136 +++++++++++------------------- src/backend/storage/smgr/smgr.c | 31 ++++--- src/backend/utils/init/globals.c | 1 - src/include/miscadmin.h | 6 -- src/include/storage/smgr.h | 8 +- 5 files changed, 74 insertions(+), 108 deletions(-) diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index e6c0e9ce7e0..3929f2d63ab 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -620,70 +620,28 @@ SimpleLruWritePage(SlruCtl ctl, int slotno) /* - * NEON: we do not want to include large pg_xact/multixact files in basebackup and prefer - * to download them on demand to reduce startup time. - * If SLRU segment is not found, we try to download it from page server + * NEON: we do not want to include large pg_xact/multixact files in the + * basebackup and prefer to download them on demand to reduce startup time. + * + * If SLRU segment is not found, we try to download it from the pageserver. + * + * Returns: + * true if the file was successfully downloaded + * false if the file was not found in pageserver + * ereports if some other error happened */ -static int -SimpleLruDownloadSegment(SlruCtl ctl, int pageno, char const* path) +static bool +SimpleLruDownloadSegment(SlruCtl ctl, int pageno, char const *path) { - int segno; - int fd = -1; - int n_blocks; - char* buffer; - - static SMgrRelationData dummy_smgr_rel = {0}; + int segno; /* If page is greater than latest written page, then do not try to download segment from server */ if (ctl->PagePrecedes(ctl->shared->latest_page_number, pageno)) - return -1; + return false; - if (!dummy_smgr_rel.smgr) - { - RelFileLocator rlocator = {0}; - dummy_smgr_rel.smgr = smgr(InvalidBackendId, rlocator); - } segno = pageno / SLRU_PAGES_PER_SEGMENT; - if (neon_use_communicator_worker) { - buffer = NULL; - } else { - buffer = palloc(BLCKSZ * SLRU_PAGES_PER_SEGMENT); - } - - n_blocks = smgr_read_slru_segment(&dummy_smgr_rel, path, segno, buffer); - if (n_blocks > 0) - { - fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY); - if (fd < 0) - { - slru_errcause = SLRU_OPEN_FAILED; - slru_errno = errno; - pfree(buffer); - return -1; - } - - if (!neon_use_communicator_worker) { - errno = 0; - pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE); - if (pg_pwrite(fd, buffer, n_blocks*BLCKSZ, 0) != n_blocks*BLCKSZ) - { - pgstat_report_wait_end(); - /* if write didn't set errno, assume problem is no disk space */ - if (errno == 0) - errno = ENOSPC; - slru_errcause = SLRU_WRITE_FAILED; - slru_errno = errno; - - CloseTransientFile(fd); - pfree(buffer); - return -1; - } - pgstat_report_wait_end(); - } - } - pfree(buffer); - return fd; + return smgr_read_slru_segment(path, segno); } /* @@ -702,29 +660,34 @@ SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno) int fd; bool result; off_t endpos; + bool attempted_download = false; /* update the stats counter of checked pages */ pgstat_count_slru_page_exists(ctl->shared->slru_stats_idx); SlruFileName(ctl, path, segno); + retry_after_download: fd = OpenTransientFile(path, O_RDONLY | PG_BINARY); if (fd < 0) { - /* expected: file doesn't exist */ - if (errno == ENOENT) - { - fd = SimpleLruDownloadSegment(ctl, pageno, path); - if (fd < 0) - return false; - } - else + if (errno == ENOENT && !attempted_download) { - /* report error normally */ - slru_errcause = SLRU_OPEN_FAILED; - slru_errno = errno; - SlruReportIOError(ctl, pageno, 0); + /* Try to download the file from the pageserver */ + attempted_download = true; + if (SimpleLruDownloadSegment(ctl, pageno, path)) + goto retry_after_download; + errno = ENOENT; } + + /* expected: file doesn't exist */ + if (errno == ENOENT) + return false; + + /* report error normally */ + slru_errcause = SLRU_OPEN_FAILED; + slru_errno = errno; + SlruReportIOError(ctl, pageno, 0); } if ((endpos = lseek(fd, 0, SEEK_END)) < 0) @@ -765,6 +728,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno) off_t offset = rpageno * BLCKSZ; char path[MAXPGPATH]; int fd; + bool attempted_download = false; SlruFileName(ctl, path, segno); @@ -775,33 +739,31 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno) * SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case * where the file doesn't exist, and return zeroes instead. */ + retry_after_download: fd = OpenTransientFile(path, O_RDONLY | PG_BINARY); if (fd < 0) { - if (errno != ENOENT) + if (errno == ENOENT && !attempted_download) + { + /* Try to download the file from the pageserver */ + attempted_download = true; + if (SimpleLruDownloadSegment(ctl, pageno, path)) + goto retry_after_download; + errno = ENOENT; + } + + if (errno != ENOENT || !InRecovery) { slru_errcause = SLRU_OPEN_FAILED; slru_errno = errno; return false; } - fd = SimpleLruDownloadSegment(ctl, pageno, path); - if (fd < 0) - { - if (!InRecovery) - { - slru_errcause = SLRU_OPEN_FAILED; - slru_errno = errno; - return false; - } - else - { - ereport(LOG, - (errmsg("file \"%s\" doesn't exist, reading as zeroes", - path))); - MemSet(shared->page_buffer[slotno], 0, BLCKSZ); - return true; - } - } + + ereport(LOG, + (errmsg("file \"%s\" doesn't exist, reading as zeroes", + path))); + MemSet(shared->page_buffer[slotno], 0, BLCKSZ); + return true; } errno = 0; diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 7123866e971..53169043b6c 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -49,6 +49,13 @@ static const f_smgr smgr_md = { .smgr_immedsync = mdimmedsync, }; +/* + * SLRU download isn't really part of the smgr API, as SLRUs are not + * relations. But we define this here anyway, to keep it close to the smgr + * hooks in Neon. + */ +read_slru_segment_hook_type read_slru_segment_hook; + /* * Each backend has a hashtable that stores all extant SMgrRelation objects. * In addition, "unowned" SMgrRelation objects are chained together in a list. @@ -776,22 +783,24 @@ smgr_end_unlogged_build(SMgrRelation reln) } /* - * NEON: we do not want to include large pg_xact/multixact files in basebackup and prefer - * to download them on demand to reduce startup time. - * If SLRU segment is not found, we try to download it from page server + * NEON: Attempt to download an SLRU file from remote storage. + * + * To reduce startup time, we don't want to include large pg_xact/multixact + * files in the basebackup. Instead, we have this hook to download them on + * demand. If an SLRU segment is not found, the code in slru.c calls this to + * check if it can be downloaded from the pageserver. * - * This function returns number of blocks in segment. Usually it should be SLRU_PAGES_PER_SEGMENT but in case - * of partial segment, it can be smaller. Zero value means that segment doesn't exist. - * From Postgres point of view empty segment is the same as absent segment. + * If the segment is found in remote storage, the hook writes it to the local + * file and returns 'true'. If the file is not found, returns 'false'. */ -int -smgr_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buffer) +bool +smgr_read_slru_segment(const char *path, int segno) { - return (*reln->smgr).smgr_read_slru_segment ? (*reln->smgr).smgr_read_slru_segment(reln, path, segno, buffer) : 0; + if (read_slru_segment_hook) + return read_slru_segment_hook(path, segno); + return false; } - - /* * AtEOXact_SMgr * diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index 95f3cb19f38..011ec18015a 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -113,7 +113,6 @@ bool IsPostmasterEnvironment = false; bool IsUnderPostmaster = false; bool IsBinaryUpgrade = false; bool IsBackgroundWorker = false; -bool neon_use_communicator_worker = false; bool ExitOnAnyError = false; diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 5d7d58b2d07..f5b954a2858 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -170,12 +170,6 @@ extern PGDLLIMPORT bool IsPostmasterEnvironment; extern PGDLLIMPORT bool IsUnderPostmaster; extern PGDLLIMPORT bool IsBackgroundWorker; extern PGDLLIMPORT bool IsBinaryUpgrade; -/* Whether the communicator worker is used or not. Defined here so that - * it is also accessible from the main postgres code easily without - * having to look up the value using strings and chain of other - * functions. - */ -extern PGDLLIMPORT bool neon_use_communicator_worker; extern PGDLLIMPORT bool ExitOnAnyError; diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 704ba0c0cae..5b4af0e4ac5 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -130,8 +130,6 @@ typedef struct f_smgr void (*smgr_start_unlogged_build) (SMgrRelation reln); void (*smgr_finish_unlogged_build_phase_1) (SMgrRelation reln); void (*smgr_end_unlogged_build) (SMgrRelation reln); - - int (*smgr_read_slru_segment) (SMgrRelation reln, const char *path, int segno, void* buffer); } f_smgr; typedef void (*smgr_init_hook_type) (void); @@ -141,6 +139,10 @@ extern PGDLLIMPORT smgr_shutdown_hook_type smgr_shutdown_hook; extern void smgr_init_standard(void); extern void smgr_shutdown_standard(void); +/* NEON: Hook for reading an SLRU segment from e.g. remote storage */ +typedef bool (*read_slru_segment_hook_type) (const char *path, int segno); +extern read_slru_segment_hook_type read_slru_segment_hook; + // Alternative implementation of calculate_database_size() typedef int64 (*dbsize_hook_type) (Oid dbOid); extern PGDLLIMPORT dbsize_hook_type dbsize_hook; @@ -192,6 +194,6 @@ extern void smgr_start_unlogged_build(SMgrRelation reln); extern void smgr_finish_unlogged_build_phase_1(SMgrRelation reln); extern void smgr_end_unlogged_build(SMgrRelation reln); -extern int smgr_read_slru_segment(SMgrRelation reln, const char *path, int segno, void* buffer); +extern bool smgr_read_slru_segment(const char *path, int segno); #endif /* SMGR_H */