Skip to content

Resolve issues #5 and #1: reduce number of collisions in the ptrack map #6

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 16, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
.deps
*.so
*.o
ptrack--2.0.sql
Dockerfile

18 changes: 3 additions & 15 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@

MODULE_big = ptrack
OBJS = ptrack.o datapagemap.o engine.o $(WIN32RES)
EXTENSION = ptrack
EXTVERSION = 2.1
DATA = ptrack.sql ptrack--2.0--2.1.sql
DATA_built = $(EXTENSION)--$(EXTVERSION).sql
PGFILEDESC = "ptrack - block-level incremental backup engine"

EXTRA_CLEAN = $(EXTENSION)--$(EXTVERSION).sql
EXTENSION = ptrack
EXTVERSION = 2.2
DATA = ptrack--2.1.sql ptrack--2.0--2.1.sql ptrack--2.1--2.2.sql

TAP_TESTS = 1

Expand All @@ -22,13 +20,3 @@ top_builddir = ../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif

$(EXTENSION)--$(EXTVERSION).sql: ptrack.sql
cat $^ > $@

# temp-install: EXTRA_INSTALL=contrib/ptrack

# check-tap: temp-install
# $(prove_check)

# check: check-tap
95 changes: 57 additions & 38 deletions engine.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ ptrackMapInit(void)
sprintf(ptrack_path, "%s/%s", DataDir, PTRACK_PATH);
sprintf(ptrack_mmap_path, "%s/%s", DataDir, PTRACK_MMAP_PATH);

ptrack_map_reinit:

/* Remove old PTRACK_MMAP_PATH file, if exists */
if (ptrack_file_exists(ptrack_mmap_path))
durable_unlink(ptrack_mmap_path, LOG);
Expand All @@ -175,18 +177,15 @@ ptrackMapInit(void)
if (stat(ptrack_path, &stat_buf) == 0)
{
copy_file(ptrack_path, ptrack_mmap_path);
is_new_map = false; /* flag to check checksum */
is_new_map = false; /* flag to check map file format and checksum */
ptrack_fd = BasicOpenFile(ptrack_mmap_path, O_RDWR | PG_BINARY);
if (ptrack_fd < 0)
elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path);
}
else
{
/* Create new file for PTRACK_MMAP_PATH */
ptrack_fd = BasicOpenFile(ptrack_mmap_path, O_RDWR | O_CREAT | PG_BINARY);
if (ptrack_fd < 0)
elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path);
}

if (ptrack_fd < 0)
elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path);

#ifdef WIN32
{
Expand Down Expand Up @@ -227,7 +226,19 @@ ptrackMapInit(void)
elog(ERROR, "ptrack init: wrong map format of file \"%s\"", ptrack_path);

/* Check ptrack version inside old ptrack map */
/* No-op for now, but may be used for future compatibility checks */
if (ptrack_map->version_num != PTRACK_VERSION_NUM)
{
ereport(WARNING,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("ptrack init: map format version %d in the file \"%s\" is incompatible with loaded version %d",
ptrack_map->version_num, ptrack_path, PTRACK_VERSION_NUM),
errdetail("Deleting file \"%s\" and reinitializing ptrack map.", ptrack_path)));

/* Delete and try again */
durable_unlink(ptrack_path, LOG);
is_new_map = true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Не могу найти, где делается unmap в этом случае?
При этом сразу после метки ptrack_map_reinit делается durable_unlink(ptrack_mmap_path).
В итоге, этот файл повисает невидимкой в файловой системе, и в адрессном пространстве процесса повисает его mmap.

Наверное есть смысл позвать здесь ptrackCleanFilesAndMap ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Да, похоже на то. Я сомневался в этом месте, но потом забыл и не разобрался до конца

goto ptrack_map_reinit;
}

/* Check CRC */
INIT_CRC32C(crc);
Expand Down Expand Up @@ -641,48 +652,56 @@ void
ptrack_mark_block(RelFileNodeBackend smgr_rnode,
ForkNumber forknum, BlockNumber blocknum)
{
PtBlockId bid;
size_t hash;
size_t slot1;
size_t slot2;
XLogRecPtr new_lsn;
PtBlockId bid;
/*
* We use pg_atomic_uint64 here only for alignment purposes, because
* pg_atomic_uint64 is forcely aligned on 8 bytes during the MSVC build.
* pg_atomic_uint64 is forcedly aligned on 8 bytes during the MSVC build.
*/
pg_atomic_uint64 old_lsn;
pg_atomic_uint64 old_init_lsn;

if (ptrack_map_size != 0 && (ptrack_map != NULL) &&
smgr_rnode.backend == InvalidBackendId) /* do not track temporary
* relations */
{
bid.relnode = smgr_rnode.node;
bid.forknum = forknum;
bid.blocknum = blocknum;
hash = BID_HASH_FUNC(bid);

if (RecoveryInProgress())
new_lsn = GetXLogReplayRecPtr(NULL);
else
new_lsn = GetXLogInsertRecPtr();
if (ptrack_map_size == 0
|| ptrack_map == NULL
|| smgr_rnode.backend != InvalidBackendId) /* do not track temporary
* relations */
return;

old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[hash]);
bid.relnode = smgr_rnode.node;
bid.forknum = forknum;
bid.blocknum = blocknum;

/* Atomically assign new init LSN value */
old_init_lsn.value = pg_atomic_read_u64(&ptrack_map->init_lsn);
hash = BID_HASH_FUNC(bid);
slot1 = hash % PtrackContentNblocks;
slot2 = ((hash << 32) | (hash >> 32)) % PtrackContentNblocks;

if (old_init_lsn.value == InvalidXLogRecPtr)
{
elog(DEBUG1, "ptrack_mark_block: init_lsn " UINT64_FORMAT " <- " UINT64_FORMAT, old_init_lsn.value, new_lsn);

while (old_init_lsn.value < new_lsn &&
!pg_atomic_compare_exchange_u64(&ptrack_map->init_lsn, (uint64 *) &old_init_lsn.value, new_lsn));
}
if (RecoveryInProgress())
new_lsn = GetXLogReplayRecPtr(NULL);
else
new_lsn = GetXLogInsertRecPtr();

elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT " <- " UINT64_FORMAT, hash, old_lsn.value, new_lsn);
/* Atomically assign new init LSN value */
old_init_lsn.value = pg_atomic_read_u64(&ptrack_map->init_lsn);
if (old_init_lsn.value == InvalidXLogRecPtr)
{
elog(DEBUG1, "ptrack_mark_block: init_lsn " UINT64_FORMAT " <- " UINT64_FORMAT, old_init_lsn.value, new_lsn);

/* Atomically assign new LSN value */
while (old_lsn.value < new_lsn &&
!pg_atomic_compare_exchange_u64(&ptrack_map->entries[hash], (uint64 *) &old_lsn.value, new_lsn));
elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT, hash, pg_atomic_read_u64(&ptrack_map->entries[hash]));
while (old_init_lsn.value < new_lsn &&
!pg_atomic_compare_exchange_u64(&ptrack_map->init_lsn, (uint64 *) &old_init_lsn.value, new_lsn));
}

/* Atomically assign new LSN value to the first slot */
old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[slot1]);
elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT " <- " UINT64_FORMAT, slot1, old_lsn.value, new_lsn);
while (old_lsn.value < new_lsn &&
!pg_atomic_compare_exchange_u64(&ptrack_map->entries[slot1], (uint64 *) &old_lsn.value, new_lsn));
elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT, hash, pg_atomic_read_u64(&ptrack_map->entries[slot1]));

/* And to the second */
old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[slot2]);
while (old_lsn.value < new_lsn &&
!pg_atomic_compare_exchange_u64(&ptrack_map->entries[slot2], (uint64 *) &old_lsn.value, new_lsn));
}
8 changes: 4 additions & 4 deletions engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ typedef struct PtrackMapHdr
{
/*
* Three magic bytes (+ \0) to be sure, that we are reading ptrack.map
* with a right PtrackMapHdr strucutre.
* with a right PtrackMapHdr structure.
*/
char magic[PTRACK_MAGIC_SIZE];

Expand All @@ -72,7 +72,6 @@ typedef struct PtrackMapHdr

typedef PtrackMapHdr * PtrackMap;

/* TODO: check MAXALIGN usage below */
/* Number of elements in ptrack map (LSN array) */
#define PtrackContentNblocks \
((ptrack_map_size - offsetof(PtrackMapHdr, entries) - sizeof(pg_crc32c)) / sizeof(pg_atomic_uint64))
Expand All @@ -84,9 +83,10 @@ typedef PtrackMapHdr * PtrackMap;
/* CRC32 value offset in order to directly access it in the mmap'ed memory chunk */
#define PtrackCrcOffset (PtrackActualSize - sizeof(pg_crc32c))

/* Map block address 'bid' to map slot */
/* Block address 'bid' to hash. To get slot position in map should be divided
* with '% PtrackContentNblocks' */
#define BID_HASH_FUNC(bid) \
(size_t)(DatumGetUInt64(hash_any_extended((unsigned char *)&bid, sizeof(bid), 0)) % PtrackContentNblocks)
(size_t)(DatumGetUInt64(hash_any_extended((unsigned char *)&bid, sizeof(bid), 0)))

/*
* Per process pointer to shared ptrack_map
Expand Down
29 changes: 29 additions & 0 deletions ptrack--2.1--2.2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/* ptrack/ptrack--2.1--2.2.sql */

-- Complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION ptrack UPDATE;" to load this file.\ quit

CREATE FUNCTION ptrack_get_change_stat(start_lsn pg_lsn)
RETURNS TABLE (
files bigint,
pages bigint,
"size, MB" numeric
) AS
$func$
DECLARE
block_size bigint;
BEGIN
block_size := (SELECT setting FROM pg_settings WHERE name = 'block_size');

RETURN QUERY
SELECT changed_files,
changed_pages,
block_size*changed_pages/(1024.0*1024)
FROM
(SELECT count(path) AS changed_files,
sum(
length(replace(right((pagemap)::text, -1)::varbit::text, '0', ''))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Если таблицы 8TB, то вот эта строчка потребует выделение 1GB памяти для преобразования ::varbit::text.
Соответственно, таблица 16TB потребует уже 2GB памяти, и постгресс просто сам не позволит этого сделать.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Это очень грустно, что varbit не имеет функции countbits.

Copy link
Contributor

@funny-falcon funny-falcon May 13, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

В любом случае, для ptrack_get_change_stat и ptrack_get_change_file_stat кажется нужно создать ptrack_get_pagecount (ну или другое название).
Или даже просто реализовать ptrack_get_change_file_stat полностью в сишке.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Таблицы же разбиты на сегменты по 1 ГБ дефолтно, а ptrack_get_pagemapset() выдаёт изначально битмапы per file/segment, то есть потребуется максимум в 1000 раз меньше памяти на каждое преобразование. Разве нет?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

А ок. Я ещё не посмотрел ptrack_get_pagemapset() .

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Слушай, но я бы всё равно поменял бы ptrack_get_pagemapset, добавив поле count в вывод.
pg_probackup при этом не поломается, т.к. он указывает поля, которые хочет.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Сделал

) AS changed_pages
FROM ptrack_get_pagemapset(start_lsn)) s;
END
$func$ LANGUAGE plpgsql;
2 changes: 2 additions & 0 deletions ptrack.sql → ptrack--2.1.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
/* ptrack/ptrack--2.1.sql */

-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION ptrack" to load this file. \quit

Expand Down
31 changes: 23 additions & 8 deletions ptrack.c
Original file line number Diff line number Diff line change
Expand Up @@ -420,10 +420,9 @@ PG_FUNCTION_INFO_V1(ptrack_get_pagemapset);
Datum
ptrack_get_pagemapset(PG_FUNCTION_ARGS)
{
PtScanCtx *ctx;
FuncCallContext *funcctx;
PtScanCtx *ctx;
MemoryContext oldcontext;
XLogRecPtr update_lsn;
datapagemap_t pagemap;
char gather_path[MAXPGPATH];

Expand Down Expand Up @@ -486,6 +485,12 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS)

while (true)
{
size_t hash;
size_t slot1;
size_t slot2;
XLogRecPtr update_lsn1;
XLogRecPtr update_lsn2;

/* Stop traversal if there are no more segments */
if (ctx->bid.blocknum > ctx->relsize)
{
Expand Down Expand Up @@ -525,15 +530,25 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS)
}
}

update_lsn = pg_atomic_read_u64(&ptrack_map->entries[BID_HASH_FUNC(ctx->bid)]);
hash = BID_HASH_FUNC(ctx->bid);
slot1 = hash % PtrackContentNblocks;
slot2 = ((hash << 32) | (hash >> 32)) % PtrackContentNblocks;

update_lsn1 = pg_atomic_read_u64(&ptrack_map->entries[slot1]);
update_lsn2 = pg_atomic_read_u64(&ptrack_map->entries[slot2]);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is better to fetch and check slot1 first, and only if check passed then fetch and check slot2.
This way you will save TLB and cache misses for slot2 for most of page items.
Note that compiler could not optimize/reorder atomic instructions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I hope that I did it


if (update_lsn1 != InvalidXLogRecPtr)
elog(DEBUG3, "ptrack: update_lsn1 %X/%X of blckno %u of file %s",
(uint32) (update_lsn1 >> 32), (uint32) update_lsn1,
ctx->bid.blocknum, ctx->relpath);

if (update_lsn != InvalidXLogRecPtr)
elog(DEBUG3, "ptrack: update_lsn %X/%X of blckno %u of file %s",
(uint32) (update_lsn >> 32), (uint32) update_lsn,
if (update_lsn2 != InvalidXLogRecPtr)
elog(DEBUG3, "ptrack: update_lsn2 %X/%X of blckno %u of file %s",
(uint32) (update_lsn1 >> 32), (uint32) update_lsn2,
ctx->bid.blocknum, ctx->relpath);

/* Block has been changed since specified LSN. Mark it in the bitmap */
if (update_lsn >= ctx->lsn)
/* Block has been changed since specified LSN. Mark it in the bitmap */
if (update_lsn1 >= ctx->lsn && update_lsn2 >= ctx->lsn)
datapagemap_add(&pagemap, ctx->bid.blocknum % ((BlockNumber) RELSEG_SIZE));

ctx->bid.blocknum += 1;
Expand Down
2 changes: 1 addition & 1 deletion ptrack.control
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# ptrack extension
comment = 'block-level incremental backup engine'
default_version = '2.1'
default_version = '2.2'
module_pathname = '$libdir/ptrack'
relocatable = true
4 changes: 2 additions & 2 deletions ptrack.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
#include "utils/relcache.h"

/* Ptrack version as a string */
#define PTRACK_VERSION "2.1"
#define PTRACK_VERSION "2.2"
/* Ptrack version as a number */
#define PTRACK_VERSION_NUM 210
#define PTRACK_VERSION_NUM 220

/*
* Structure identifying block on the disk.
Expand Down
6 changes: 5 additions & 1 deletion t/001_basic.pl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
use TestLib;
use Test::More;

plan tests => 23;
plan tests => 24;

my $node;
my $res;
Expand Down Expand Up @@ -115,6 +115,10 @@
qr/$rel_oid/,
'ptrack pagemapset should contain new relation oid');

# Check change stats
$res_stdout = $node->safe_psql("postgres", "SELECT pages FROM ptrack_get_change_stat('$flush_lsn')");
is($res_stdout > 0, 1, 'should be able to get aggregated stats of changes');

# We should be able to change ptrack map size (but loose all changes)
$node->append_conf(
'postgresql.conf', q{
Expand Down