Skip to content

Commit 3628a0b

Browse files
author
Heikki Linnakangas
committed
Refactor SLRU download interface between Postgres and the neon extension
Move the responsibility of writing the SLRU segment contents to the neon extension. The neon extension hook now downloads the file and places it to the correct path, and returns a true/false to the caller to indicate if the file existed. On successful download, the caller retries opening the file. While we're at it, refactor the read_slru_segment function so that it is not part of the SMGR api anymore, but an independent hook. We were previously pretending it's part of the SMGR api and because of that, passed a dummy SMgrRelation, but it was a bit silly.
1 parent f31add2 commit 3628a0b

File tree

5 files changed

+80
-113
lines changed

5 files changed

+80
-113
lines changed

src/backend/access/transam/slru.c

Lines changed: 49 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -619,70 +619,28 @@ SimpleLruWritePage(SlruCtl ctl, int slotno)
619619

620620

621621
/*
622-
* NEON: we do not want to include large pg_xact/multixact files in basebackup and prefer
623-
* to download them on demand to reduce startup time.
624-
* If SLRU segment is not found, we try to download it from page server
622+
* NEON: we do not want to include large pg_xact/multixact files in the
623+
* basebackup and prefer to download them on demand to reduce startup time.
624+
*
625+
* If SLRU segment is not found, we try to download it from the pageserver.
626+
*
627+
* Returns:
628+
* true if the file was successfully downloaded
629+
* false if the file was not found in pageserver
630+
* ereports if some other error happened
625631
*/
626-
static int
627-
SimpleLruDownloadSegment(SlruCtl ctl, int pageno, char const* path)
632+
static bool
633+
SimpleLruDownloadSegment(SlruCtl ctl, int pageno, char const *path)
628634
{
629-
int segno;
630-
int fd = -1;
631-
int n_blocks;
632-
char* buffer;
633-
634-
static SMgrRelationData dummy_smgr_rel = {0};
635+
int segno;
635636

636637
/* If page is greater than latest written page, then do not try to download segment from server */
637638
if (ctl->PagePrecedes(ctl->shared->latest_page_number, pageno))
638-
return -1;
639+
return false;
639640

640-
if (!dummy_smgr_rel.smgr)
641-
{
642-
RelFileNode rnode = {0};
643-
dummy_smgr_rel.smgr = smgr(InvalidBackendId, rnode);
644-
}
645641
segno = pageno / SLRU_PAGES_PER_SEGMENT;
646642

647-
if (neon_use_communicator_worker) {
648-
buffer = NULL;
649-
} else {
650-
buffer = palloc(BLCKSZ * SLRU_PAGES_PER_SEGMENT);
651-
}
652-
653-
n_blocks = smgr_read_slru_segment(&dummy_smgr_rel, path, segno, buffer);
654-
if (n_blocks > 0)
655-
{
656-
fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY);
657-
if (fd < 0)
658-
{
659-
slru_errcause = SLRU_OPEN_FAILED;
660-
slru_errno = errno;
661-
pfree(buffer);
662-
return -1;
663-
}
664-
665-
if (!neon_use_communicator_worker) {
666-
errno = 0;
667-
pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
668-
if (pg_pwrite(fd, buffer, n_blocks*BLCKSZ, 0) != n_blocks*BLCKSZ)
669-
{
670-
pgstat_report_wait_end();
671-
/* if write didn't set errno, assume problem is no disk space */
672-
if (errno == 0)
673-
errno = ENOSPC;
674-
slru_errcause = SLRU_WRITE_FAILED;
675-
slru_errno = errno;
676-
677-
CloseTransientFile(fd);
678-
pfree(buffer);
679-
return -1;
680-
}
681-
pgstat_report_wait_end();
682-
}
683-
}
684-
pfree(buffer);
685-
return fd;
643+
return smgr_read_slru_segment(path, segno);
686644
}
687645

688646
/*
@@ -701,29 +659,34 @@ SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno)
701659
int fd;
702660
bool result;
703661
off_t endpos;
662+
bool attempted_download = false;
704663

705664
/* update the stats counter of checked pages */
706665
pgstat_count_slru_page_exists(ctl->shared->slru_stats_idx);
707666

708667
SlruFileName(ctl, path, segno);
709668

669+
retry_after_download:
710670
fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
711671
if (fd < 0)
712672
{
713-
/* expected: file doesn't exist */
714-
if (errno == ENOENT)
715-
{
716-
fd = SimpleLruDownloadSegment(ctl, pageno, path);
717-
if (fd < 0)
718-
return false;
719-
}
720-
else
673+
if (errno == ENOENT && !attempted_download)
721674
{
722-
/* report error normally */
723-
slru_errcause = SLRU_OPEN_FAILED;
724-
slru_errno = errno;
725-
SlruReportIOError(ctl, pageno, 0);
675+
/* Try to download the file from the pageserver */
676+
attempted_download = true;
677+
if (SimpleLruDownloadSegment(ctl, pageno, path))
678+
goto retry_after_download;
679+
errno = ENOENT;
726680
}
681+
682+
/* expected: file doesn't exist */
683+
if (errno == ENOENT)
684+
return false;
685+
686+
/* report error normally */
687+
slru_errcause = SLRU_OPEN_FAILED;
688+
slru_errno = errno;
689+
SlruReportIOError(ctl, pageno, 0);
727690
}
728691

729692
if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
@@ -764,6 +727,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
764727
off_t offset = rpageno * BLCKSZ;
765728
char path[MAXPGPATH];
766729
int fd;
730+
bool attempted_download = false;
767731

768732
SlruFileName(ctl, path, segno);
769733

@@ -774,33 +738,31 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
774738
* SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
775739
* where the file doesn't exist, and return zeroes instead.
776740
*/
741+
retry_after_download:
777742
fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
778743
if (fd < 0)
779744
{
780-
if (errno != ENOENT)
745+
if (errno == ENOENT && !attempted_download)
746+
{
747+
/* Try to download the file from the pageserver */
748+
attempted_download = true;
749+
if (SimpleLruDownloadSegment(ctl, pageno, path))
750+
goto retry_after_download;
751+
errno = ENOENT;
752+
}
753+
754+
if (errno != ENOENT || !InRecovery)
781755
{
782756
slru_errcause = SLRU_OPEN_FAILED;
783757
slru_errno = errno;
784758
return false;
785759
}
786-
fd = SimpleLruDownloadSegment(ctl, pageno, path);
787-
if (fd < 0)
788-
{
789-
if (!InRecovery)
790-
{
791-
slru_errcause = SLRU_OPEN_FAILED;
792-
slru_errno = errno;
793-
return false;
794-
}
795-
else
796-
{
797-
ereport(LOG,
798-
(errmsg("file \"%s\" doesn't exist, reading as zeroes",
799-
path)));
800-
MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
801-
return true;
802-
}
803-
}
760+
761+
ereport(LOG,
762+
(errmsg("file \"%s\" doesn't exist, reading as zeroes",
763+
path)));
764+
MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
765+
return true;
804766
}
805767

806768
errno = 0;

src/backend/storage/smgr/smgr.c

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,13 @@ static const f_smgr smgr_md = {
4646
.smgr_immedsync = mdimmedsync,
4747
};
4848

49+
/*
50+
* SLRU download isn't really part of the smgr API, as SLRUs are not
51+
* relations. But we define this here anyway, to keep it close to the smgr
52+
* hooks in Neon.
53+
*/
54+
read_slru_segment_hook_type read_slru_segment_hook;
55+
4956
/*
5057
* Each backend has a hashtable that stores all extant SMgrRelation objects.
5158
* In addition, "unowned" SMgrRelation objects are chained together in a list.
@@ -538,22 +545,6 @@ smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
538545
buffer, skipFsync);
539546
}
540547

541-
/*
542-
* NEON: we do not want to include large pg_xact/multixact files in basebackup and prefer
543-
* to download them on demand to reduce startup time.
544-
* If SLRU segment is not found, we try to download it from page server
545-
*
546-
* This function returns number of blocks in segment. Usually it should be SLRU_PAGES_PER_SEGMENT but in case
547-
* of partial segment, it can be smaller. Zero value means that segment doesn't exist.
548-
* From Postgres point of view empty segment is the same as absent segment.
549-
*/
550-
int
551-
smgr_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buffer)
552-
{
553-
return (*reln->smgr).smgr_read_slru_segment ? (*reln->smgr).smgr_read_slru_segment(reln, path, segno, buffer) : 0;
554-
}
555-
556-
557548

558549
/*
559550
* smgrwriteback() -- Trigger kernel writeback for the supplied range of
@@ -740,6 +731,25 @@ smgr_end_unlogged_build(SMgrRelation reln)
740731
(*reln->smgr).smgr_end_unlogged_build(reln);
741732
}
742733

734+
/*
735+
* NEON: Attempt to download an SLRU file from remote storage.
736+
*
737+
* To reduce startup time, we don't want to include large pg_xact/multixact
738+
* files in the basebackup. Instead, we have this hook to download them on
739+
* demand. If an SLRU segment is not found, the code in slru.c calls this to
740+
* check if it can be downloaded from the pageserver.
741+
*
742+
* If the segment is found in remote storage, the hook writes it to the local
743+
* file and returns 'true'. If the file is not found, returns 'false'.
744+
*/
745+
bool
746+
smgr_read_slru_segment(const char *path, int segno)
747+
{
748+
if (read_slru_segment_hook)
749+
return read_slru_segment_hook(path, segno);
750+
return false;
751+
}
752+
743753
/*
744754
* AtEOXact_SMgr
745755
*

src/backend/utils/init/globals.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,6 @@ bool IsPostmasterEnvironment = false;
112112
bool IsUnderPostmaster = false;
113113
bool IsBinaryUpgrade = false;
114114
bool IsBackgroundWorker = false;
115-
bool neon_use_communicator_worker = false;
116115

117116
bool ExitOnAnyError = false;
118117

src/include/miscadmin.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -169,12 +169,6 @@ extern PGDLLIMPORT bool IsPostmasterEnvironment;
169169
extern PGDLLIMPORT bool IsUnderPostmaster;
170170
extern PGDLLIMPORT bool IsBackgroundWorker;
171171
extern PGDLLIMPORT bool IsBinaryUpgrade;
172-
/* Whether the communicator worker is used or not. Defined here so that
173-
* it is also accessible from the main postgres code easily without
174-
* having to look up the value using strings and chain of other
175-
* functions.
176-
*/
177-
extern PGDLLIMPORT bool neon_use_communicator_worker;
178172

179173
extern PGDLLIMPORT bool ExitOnAnyError;
180174

src/include/storage/smgr.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,6 @@ typedef struct f_smgr
128128
void (*smgr_start_unlogged_build) (SMgrRelation reln);
129129
void (*smgr_finish_unlogged_build_phase_1) (SMgrRelation reln);
130130
void (*smgr_end_unlogged_build) (SMgrRelation reln);
131-
132-
int (*smgr_read_slru_segment) (SMgrRelation reln, const char *path, int segno, void* buffer);
133131
} f_smgr;
134132

135133
typedef void (*smgr_init_hook_type) (void);
@@ -139,6 +137,10 @@ extern PGDLLIMPORT smgr_shutdown_hook_type smgr_shutdown_hook;
139137
extern void smgr_init_standard(void);
140138
extern void smgr_shutdown_standard(void);
141139

140+
/* NEON: Hook for reading an SLRU segment from e.g. remote storage */
141+
typedef bool (*read_slru_segment_hook_type) (const char *path, int segno);
142+
extern read_slru_segment_hook_type read_slru_segment_hook;
143+
142144
// Alternative implementation of calculate_database_size()
143145
typedef int64 (*dbsize_hook_type) (Oid dbOid);
144146
extern PGDLLIMPORT dbsize_hook_type dbsize_hook;
@@ -184,6 +186,6 @@ extern void smgr_start_unlogged_build(SMgrRelation reln);
184186
extern void smgr_finish_unlogged_build_phase_1(SMgrRelation reln);
185187
extern void smgr_end_unlogged_build(SMgrRelation reln);
186188

187-
extern int smgr_read_slru_segment(SMgrRelation reln, const char *path, int segno, void* buffer);
189+
extern bool smgr_read_slru_segment(const char *path, int segno);
188190

189191
#endif /* SMGR_H */

0 commit comments

Comments
 (0)