Skip to content

Commit c93daf0

Browse files
author
Heikki Linnakangas
committed
Refactor SLRU download interface between Postgres and the neon extension
Move the responsibility of writing the SLRU segment contents to the neon extension. The neon extension hook now downloads the file and places it to the correct path, and returns a true/false to the caller to indicate if the file existed. On successful download, the caller retries opening the file. While we're at it, refactor the read_slru_segment function so that it is not part of the SMGR api anymore, but an independent hook. We were previously pretending it's part of the SMGR api and because of that, passed a dummy SMgrRelation, but it was a bit silly.
1 parent db68e55 commit c93daf0

File tree

5 files changed

+74
-108
lines changed

5 files changed

+74
-108
lines changed

src/backend/access/transam/slru.c

Lines changed: 49 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -736,71 +736,29 @@ SimpleLruWritePage(SlruCtl ctl, int slotno)
736736

737737

738738
/*
739-
* NEON: we do not want to include large pg_xact/multixact files in basebackup and prefer
740-
* to download them on demand to reduce startup time.
741-
* If SLRU segment is not found, we try to download it from page server
739+
* NEON: we do not want to include large pg_xact/multixact files in the
740+
* basebackup and prefer to download them on demand to reduce startup time.
741+
*
742+
* If SLRU segment is not found, we try to download it from the pageserver.
743+
*
744+
* Returns:
745+
* true if the file was successfully downloaded
746+
* false if the file was not found in pageserver
747+
* ereports if some other error happened
742748
*/
743-
static int
744-
SimpleLruDownloadSegment(SlruCtl ctl, int pageno, char const* path)
749+
static bool
750+
SimpleLruDownloadSegment(SlruCtl ctl, int pageno, char const *path)
745751
{
746752
SlruShared shared = ctl->shared;
747-
int segno;
748-
int fd = -1;
749-
int n_blocks;
750-
char* buffer;
751-
752-
static SMgrRelationData dummy_smgr_rel = {0};
753+
int segno;
753754

754755
/* If page is greater than latest written page, then do not try to download segment from server */
755756
if (ctl->PagePrecedes(pg_atomic_read_u64(&shared->latest_page_number), pageno))
756-
return -1;
757+
return false;
757758

758-
if (!dummy_smgr_rel.smgr)
759-
{
760-
RelFileLocator rlocator = {0};
761-
dummy_smgr_rel.smgr = smgr(INVALID_PROC_NUMBER, rlocator);
762-
}
763759
segno = pageno / SLRU_PAGES_PER_SEGMENT;
764760

765-
if (neon_use_communicator_worker) {
766-
buffer = NULL;
767-
} else {
768-
buffer = palloc(BLCKSZ * SLRU_PAGES_PER_SEGMENT);
769-
}
770-
771-
n_blocks = smgr_read_slru_segment(&dummy_smgr_rel, path, segno, buffer);
772-
if (n_blocks > 0)
773-
{
774-
fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY);
775-
if (fd < 0)
776-
{
777-
slru_errcause = SLRU_OPEN_FAILED;
778-
slru_errno = errno;
779-
pfree(buffer);
780-
return -1;
781-
}
782-
783-
if (!neon_use_communicator_worker) {
784-
errno = 0;
785-
pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
786-
if (pg_pwrite(fd, buffer, n_blocks*BLCKSZ, 0) != n_blocks*BLCKSZ)
787-
{
788-
pgstat_report_wait_end();
789-
/* if write didn't set errno, assume problem is no disk space */
790-
if (errno == 0)
791-
errno = ENOSPC;
792-
slru_errcause = SLRU_WRITE_FAILED;
793-
slru_errno = errno;
794-
795-
CloseTransientFile(fd);
796-
pfree(buffer);
797-
return -1;
798-
}
799-
pgstat_report_wait_end();
800-
}
801-
}
802-
pfree(buffer);
803-
return fd;
761+
return smgr_read_slru_segment(path, segno);
804762
}
805763

806764
/*
@@ -819,29 +777,34 @@ SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int64 pageno)
819777
int fd;
820778
bool result;
821779
off_t endpos;
780+
bool attempted_download = false;
822781

823782
/* update the stats counter of checked pages */
824783
pgstat_count_slru_page_exists(ctl->shared->slru_stats_idx);
825784

826785
SlruFileName(ctl, path, segno);
827786

787+
retry_after_download:
828788
fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
829789
if (fd < 0)
830790
{
831-
/* expected: file doesn't exist */
832-
if (errno == ENOENT)
791+
if (errno == ENOENT && !attempted_download)
833792
{
834-
fd = SimpleLruDownloadSegment(ctl, pageno, path);
835-
if (fd < 0)
836-
return false;
837-
}
838-
else
839-
{
840-
/* report error normally */
841-
slru_errcause = SLRU_OPEN_FAILED;
842-
slru_errno = errno;
843-
SlruReportIOError(ctl, pageno, 0);
793+
/* Try to download the file from the pageserver */
794+
attempted_download = true;
795+
if (SimpleLruDownloadSegment(ctl, pageno, path))
796+
goto retry_after_download;
797+
errno = ENOENT;
844798
}
799+
800+
/* expected: file doesn't exist */
801+
if (errno == ENOENT)
802+
return false;
803+
804+
/* report error normally */
805+
slru_errcause = SLRU_OPEN_FAILED;
806+
slru_errno = errno;
807+
SlruReportIOError(ctl, pageno, 0);
845808
}
846809

847810
if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
@@ -882,6 +845,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno)
882845
off_t offset = rpageno * BLCKSZ;
883846
char path[MAXPGPATH];
884847
int fd;
848+
bool attempted_download = false;
885849

886850
SlruFileName(ctl, path, segno);
887851

@@ -892,33 +856,31 @@ SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno)
892856
* SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
893857
* where the file doesn't exist, and return zeroes instead.
894858
*/
859+
retry_after_download:
895860
fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
896861
if (fd < 0)
897862
{
898-
if (errno != ENOENT)
863+
if (errno == ENOENT && !attempted_download)
864+
{
865+
/* Try to download the file from the pageserver */
866+
attempted_download = true;
867+
if (SimpleLruDownloadSegment(ctl, pageno, path))
868+
goto retry_after_download;
869+
errno = ENOENT;
870+
}
871+
872+
if (errno != ENOENT || !InRecovery)
899873
{
900874
slru_errcause = SLRU_OPEN_FAILED;
901875
slru_errno = errno;
902876
return false;
903877
}
904-
fd = SimpleLruDownloadSegment(ctl, pageno, path);
905-
if (fd < 0)
906-
{
907-
if (!InRecovery)
908-
{
909-
slru_errcause = SLRU_OPEN_FAILED;
910-
slru_errno = errno;
911-
return false;
912-
}
913-
else
914-
{
915-
ereport(LOG,
916-
(errmsg("file \"%s\" doesn't exist, reading as zeroes",
917-
path)));
918-
MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
919-
return true;
920-
}
921-
}
878+
879+
ereport(LOG,
880+
(errmsg("file \"%s\" doesn't exist, reading as zeroes",
881+
path)));
882+
MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
883+
return true;
922884
}
923885

924886
errno = 0;

src/backend/storage/smgr/smgr.c

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,13 @@ static const f_smgr smgr_md = {
8383
.smgr_registersync = mdregistersync,
8484
};
8585

86+
/*
87+
* SLRU download isn't really part of the smgr API, as SLRUs are not
88+
* relations. But we define this here anyway, to keep it close to the smgr
89+
* hooks in Neon.
90+
*/
91+
read_slru_segment_hook_type read_slru_segment_hook;
92+
8693
/*
8794
* Each backend has a hashtable that stores all extant SMgrRelation objects.
8895
* In addition, "unpinned" SMgrRelation objects are chained together in a list.
@@ -821,22 +828,24 @@ smgr_end_unlogged_build(SMgrRelation reln)
821828
}
822829

823830
/*
824-
* NEON: we do not want to include large pg_xact/multixact files in basebackup and prefer
825-
* to download them on demand to reduce startup time.
826-
* If SLRU segment is not found, we try to download it from page server
831+
* NEON: Attempt to download an SLRU file from remote storage.
832+
*
833+
* To reduce startup time, we don't want to include large pg_xact/multixact
834+
* files in the basebackup. Instead, we have this hook to download them on
835+
* demand. If an SLRU segment is not found, the code in slru.c calls this to
836+
* check if it can be downloaded from the pageserver.
827837
*
828-
* This function returns number of blocks in segment. Usually it should be SLRU_PAGES_PER_SEGMENT but in case
829-
* of partial segment, it can be smaller. Zero value means that segment doesn't exist.
830-
* From Postgres point of view empty segment is the same as absent segment.
838+
* If the segment is found in remote storage, the hook writes it to the local
839+
* file and returns 'true'. If the file is not found, returns 'false'.
831840
*/
832-
int
833-
smgr_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buffer)
841+
bool
842+
smgr_read_slru_segment(const char *path, int segno)
834843
{
835-
return (*reln->smgr).smgr_read_slru_segment ? (*reln->smgr).smgr_read_slru_segment(reln, path, segno, buffer) : 0;
844+
if (read_slru_segment_hook)
845+
return read_slru_segment_hook(path, segno);
846+
return false;
836847
}
837848

838-
839-
840849
/*
841850
* AtEOXact_SMgr
842851
*

src/backend/utils/init/globals.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,6 @@ pid_t PostmasterPid = 0;
116116
bool IsPostmasterEnvironment = false;
117117
bool IsUnderPostmaster = false;
118118
bool IsBinaryUpgrade = false;
119-
bool neon_use_communicator_worker = false;
120119

121120
bool ExitOnAnyError = false;
122121

src/include/miscadmin.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -170,12 +170,6 @@ extern PGDLLIMPORT pid_t PostmasterPid;
170170
extern PGDLLIMPORT bool IsPostmasterEnvironment;
171171
extern PGDLLIMPORT bool IsUnderPostmaster;
172172
extern PGDLLIMPORT bool IsBinaryUpgrade;
173-
/* Whether the communicator worker is used or not. Defined here so that
174-
* it is also accessible from the main postgres code easily without
175-
* having to look up the value using strings and chain of other
176-
* functions.
177-
*/
178-
extern PGDLLIMPORT bool neon_use_communicator_worker;
179173

180174
extern PGDLLIMPORT bool ExitOnAnyError;
181175

src/include/storage/smgr.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,6 @@ typedef struct f_smgr
130130
void (*smgr_start_unlogged_build) (SMgrRelation reln);
131131
void (*smgr_finish_unlogged_build_phase_1) (SMgrRelation reln);
132132
void (*smgr_end_unlogged_build) (SMgrRelation reln);
133-
134-
int (*smgr_read_slru_segment) (SMgrRelation reln, const char *path, int segno, void* buffer);
135133
} f_smgr;
136134

137135
typedef void (*smgr_init_hook_type) (void);
@@ -141,6 +139,10 @@ extern PGDLLIMPORT smgr_shutdown_hook_type smgr_shutdown_hook;
141139
extern void smgr_init_standard(void);
142140
extern void smgr_shutdown_standard(void);
143141

142+
/* NEON: Hook for reading an SLRU segment from e.g. remote storage */
143+
typedef bool (*read_slru_segment_hook_type) (const char *path, int segno);
144+
extern read_slru_segment_hook_type read_slru_segment_hook;
145+
144146
// Alternative implementation of calculate_database_size()
145147
typedef int64 (*dbsize_hook_type) (Oid dbOid);
146148
extern PGDLLIMPORT dbsize_hook_type dbsize_hook;
@@ -210,6 +212,6 @@ extern void smgr_start_unlogged_build(SMgrRelation reln);
210212
extern void smgr_finish_unlogged_build_phase_1(SMgrRelation reln);
211213
extern void smgr_end_unlogged_build(SMgrRelation reln);
212214

213-
extern int smgr_read_slru_segment(SMgrRelation reln, const char *path, int segno, void* buffer);
215+
extern bool smgr_read_slru_segment(const char *path, int segno);
214216

215217
#endif /* SMGR_H */

0 commit comments

Comments
 (0)