diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index af57fe9e53a..0b88bd03ebc 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -59,6 +59,7 @@ #include "pgstat.h" #include "storage/fd.h" #include "storage/shmem.h" +#include "storage/smgr.h" #define SlruFileName(ctl, path, seg) \ snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg) @@ -617,6 +618,66 @@ SimpleLruWritePage(SlruCtl ctl, int slotno) SlruInternalWritePage(ctl, slotno, NULL); } + +/* + * NEON: we do not want to include large pg_xact/multixact files in basebackup and prefer + * to download them on demand to reduce startup time. + * If SLRU segment is not found, we try to download it from page server + */ +static int +SimpleLruDownloadSegment(SlruCtl ctl, int pageno, char const* path) +{ + int segno; + int fd = -1; + int n_blocks; + char* buffer; + + static SMgrRelationData dummy_smgr_rel = {0}; + + /* If page is greater than latest written page, then do not try to download segment from server */ + if (ctl->PagePrecedes(ctl->shared->latest_page_number, pageno)) + return -1; + + if (!dummy_smgr_rel.smgr) + { + RelFileNode rnode = {0}; + dummy_smgr_rel.smgr = smgr(InvalidBackendId, rnode); + } + segno = pageno / SLRU_PAGES_PER_SEGMENT; + + buffer = palloc(BLCKSZ * SLRU_PAGES_PER_SEGMENT); + n_blocks = smgr_read_slru_segment(&dummy_smgr_rel, path, segno, buffer); + if (n_blocks > 0) + { + fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY); + if (fd < 0) + { + slru_errcause = SLRU_OPEN_FAILED; + slru_errno = errno; + pfree(buffer); + return -1; + } + errno = 0; + pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE); + if (pg_pwrite(fd, buffer, n_blocks*BLCKSZ, 0) != n_blocks*BLCKSZ) + { + pgstat_report_wait_end(); + /* if write didn't set errno, assume problem is no disk space */ + if (errno == 0) + errno = ENOSPC; + slru_errcause = SLRU_WRITE_FAILED; + slru_errno = errno; + + CloseTransientFile(fd); + pfree(buffer); + return -1; + } + pgstat_report_wait_end(); + } + pfree(buffer); + return fd; +} + /* * Return whether the given page exists on disk. * @@ -644,12 +705,18 @@ SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno) { /* expected: file doesn't exist */ if (errno == ENOENT) - return false; - - /* report error normally */ - slru_errcause = SLRU_OPEN_FAILED; - slru_errno = errno; - SlruReportIOError(ctl, pageno, 0); + { + fd = SimpleLruDownloadSegment(ctl, pageno, path); + if (fd < 0) + return false; + } + else + { + /* report error normally */ + slru_errcause = SLRU_OPEN_FAILED; + slru_errno = errno; + SlruReportIOError(ctl, pageno, 0); + } } if ((endpos = lseek(fd, 0, SEEK_END)) < 0) @@ -703,18 +770,30 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno) fd = OpenTransientFile(path, O_RDONLY | PG_BINARY); if (fd < 0) { - if (errno != ENOENT || !InRecovery) + if (errno != ENOENT) { slru_errcause = SLRU_OPEN_FAILED; slru_errno = errno; return false; } - - ereport(LOG, - (errmsg("file \"%s\" doesn't exist, reading as zeroes", - path))); - MemSet(shared->page_buffer[slotno], 0, BLCKSZ); - return true; + fd = SimpleLruDownloadSegment(ctl, pageno, path); + if (fd < 0) + { + if (!InRecovery) + { + slru_errcause = SLRU_OPEN_FAILED; + slru_errno = errno; + return false; + } + else + { + ereport(LOG, + (errmsg("file \"%s\" doesn't exist, reading as zeroes", + path))); + MemSet(shared->page_buffer[slotno], 0, BLCKSZ); + return true; + } + } } errno = 0; diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index b8679b73700..8ba58a4d9a7 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -570,6 +570,22 @@ smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, buffer, skipFsync); } +/* + * NEON: we do not want to include large pg_xact/multixact files in basebackup and prefer + * to download them on demand to reduce startup time. + * If SLRU segment is not found, we try to download it from page server + * + * This function returns number of blocks in segment. Usually it should be SLRU_PAGES_PER_SEGMENT but in case + * of partial segment, it can be smaller. Zero value means that segment doesn't exist. + * From Postgres point of view empty segment is the same as absent segment. + */ +int +smgr_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buffer) +{ + return (*reln->smgr).smgr_read_slru_segment ? (*reln->smgr).smgr_read_slru_segment(reln, path, segno, buffer) : 0; +} + + /* * smgrwriteback() -- Trigger kernel writeback for the supplied range of diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 2a29dcd194b..ab6a961d471 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -88,7 +88,6 @@ typedef SMgrRelationData *SMgrRelation; #define SmgrIsTemp(smgr) \ RelFileNodeBackendIsTemp((smgr)->smgr_rnode) - /* * This struct of function pointers defines the API between smgr.c and * any individual storage manager module. Note that smgr subfunctions are @@ -129,6 +128,8 @@ typedef struct f_smgr void (*smgr_start_unlogged_build) (SMgrRelation reln); void (*smgr_finish_unlogged_build_phase_1) (SMgrRelation reln); void (*smgr_end_unlogged_build) (SMgrRelation reln); + + int (*smgr_read_slru_segment) (SMgrRelation reln, const char *path, int segno, void* buffer); } f_smgr; typedef void (*smgr_init_hook_type) (void); @@ -183,4 +184,6 @@ extern void smgr_start_unlogged_build(SMgrRelation reln); extern void smgr_finish_unlogged_build_phase_1(SMgrRelation reln); extern void smgr_end_unlogged_build(SMgrRelation reln); +extern int smgr_read_slru_segment(SMgrRelation reln, const char *path, int segno, void* buffer); + #endif /* SMGR_H */