From bc5294528c46b969fe22bb7f239a55bb385bed84 Mon Sep 17 00:00:00 2001 From: Zsolt Parragi Date: Sun, 25 Aug 2024 20:42:56 +0100 Subject: [PATCH] Performance / memory usage fix: do not allocate memory in pg_tde_slot Until now, for some reason we allocated memory for each decrypted tuple in pg_tde_slot, and only freed all that memory when the transaction ended. This caused two issues: * palloc takes time, and the many small pallocs resulted in a significant performance drop in scans * sequential scans could potentially allocate memory to the entire table, requiring way too much memory for large tables This allocation is most likely a leftover from before we even used slots, and handled selects differently. With slots, we don't need them at all, as slots are not expected to handle multiple tuples at the same time, only the last one. This commit removes the palloc completely, and instead adds a single BLCKSZ array to the slot structure, which can hold any size of decrypted tuple. To be safe, it also disables the get tuple function, forcing the core code to use copy instead when needed. This change results in: 1. no "memory spike" during sequential scans 2. ~1.55x overhead instead of ~2.2x (The TODO in slot_copytuple will be addressed in a separate commit) --- src/access/pg_tde_slot.c | 41 ++++++++++++++++++-------------- src/include/access/pg_tde_slot.h | 3 ++- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/access/pg_tde_slot.c b/src/access/pg_tde_slot.c index 854ad0e1..cb6729c5 100644 --- a/src/access/pg_tde_slot.c +++ b/src/access/pg_tde_slot.c @@ -54,6 +54,8 @@ tdeheap_tts_buffer_heap_init(TupleTableSlot *slot) static void tdeheap_tts_buffer_heap_release(TupleTableSlot *slot) { + TDEBufferHeapTupleTableSlot *bslot = (TDEBufferHeapTupleTableSlot *) slot; + bslot->decrypted_tuple = NULL; } static void @@ -61,8 +63,6 @@ tdeheap_tts_buffer_heap_clear(TupleTableSlot *slot) { TDEBufferHeapTupleTableSlot *bslot = (TDEBufferHeapTupleTableSlot *) slot; - if (bslot->decrypted_tuple) - tdeheap_freetuple(bslot->decrypted_tuple); bslot->decrypted_tuple = NULL; /* @@ -244,19 +244,6 @@ tdeheap_tts_buffer_heap_copyslot(TupleTableSlot *dstslot, TupleTableSlot *srcslo } } -static HeapTuple -tdeheap_tts_buffer_heap_get_heap_tuple(TupleTableSlot *slot) -{ - BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot; - - Assert(!TTS_EMPTY(slot)); - - if (!bslot->base.tuple) - tdeheap_tts_buffer_heap_materialize(slot); - - return bslot->base.tuple; -} - static HeapTuple tdeheap_tts_buffer_heap_copy_heap_tuple(TupleTableSlot *slot) { @@ -448,6 +435,24 @@ tdeheap_slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *of slot->tts_flags &= ~TTS_FLAG_SLOW; } +static HeapTuple +slot_copytuple(void* buffer, HeapTuple tuple) +{ + HeapTuple newTuple; + + if (!HeapTupleIsValid(tuple) || tuple->t_data == NULL) + return NULL; + + newTuple = (HeapTuple) buffer; + newTuple->t_len = tuple->t_len; + newTuple->t_self = tuple->t_self; + newTuple->t_tableOid = tuple->t_tableOid; + newTuple->t_data = (HeapTupleHeader) ((char *) newTuple + HEAPTUPLESIZE); + // TODO: no need for this memcpy, don't decrypt in place instead! + memcpy((char *) newTuple->t_data, (char *) tuple->t_data, tuple->t_len); + return newTuple; +} + const TupleTableSlotOps TTSOpsTDEBufferHeapTuple = { .base_slot_size = sizeof(TDEBufferHeapTupleTableSlot), .init = tdeheap_tts_buffer_heap_init, @@ -460,7 +465,7 @@ const TupleTableSlotOps TTSOpsTDEBufferHeapTuple = { .is_current_xact_tuple = tdeheap_buffer_is_current_xact_tuple, #endif .copyslot = tdeheap_tts_buffer_heap_copyslot, - .get_heap_tuple = tdeheap_tts_buffer_heap_get_heap_tuple, + .get_heap_tuple = NULL, /* A buffer heap tuple table slot can not "own" a minimal tuple. */ .get_minimal_tuple = NULL, @@ -509,7 +514,7 @@ PGTdeExecStoreBufferHeapTuple(Relation rel, if (rel->rd_rel->relkind != RELKIND_TOASTVALUE) { RelKeyData *key = GetRelationKey(rel->rd_locator); - bslot->decrypted_tuple = tdeheap_copytuple(tuple); + bslot->decrypted_tuple = slot_copytuple(bslot->decrypted_buffer, tuple); PG_TDE_DECRYPT_TUPLE_EX(tuple, bslot->decrypted_tuple, key, "ExecStoreBuffer"); /* TODO: revisit this */ tuple->t_data = bslot->decrypted_tuple->t_data; @@ -551,7 +556,7 @@ PGTdeExecStorePinnedBufferHeapTuple(Relation rel, { RelKeyData *key = GetRelationKey(rel->rd_locator); - bslot->decrypted_tuple = tdeheap_copytuple(tuple); + bslot->decrypted_tuple = slot_copytuple(bslot->decrypted_buffer, tuple); PG_TDE_DECRYPT_TUPLE_EX(tuple, bslot->decrypted_tuple, key, "ExecStorePinnedBuffer"); /* TODO: revisit this */ tuple->t_data = bslot->decrypted_tuple->t_data; diff --git a/src/include/access/pg_tde_slot.h b/src/include/access/pg_tde_slot.h index b2c21a83..5f5c58b9 100644 --- a/src/include/access/pg_tde_slot.h +++ b/src/include/access/pg_tde_slot.h @@ -29,7 +29,8 @@ typedef struct TDEBufferHeapTupleTableSlot * such a case, since presumably base.tuple is pointing into the buffer.) */ Buffer buffer; /* tuple's buffer, or InvalidBuffer */ - HeapTuple decrypted_tuple; /* decrypted tuple */ + char decrypted_buffer[BLCKSZ]; + HeapTuple decrypted_tuple; /* decrypted tuple */ } TDEBufferHeapTupleTableSlot; extern PGDLLIMPORT const TupleTableSlotOps TTSOpsTDEBufferHeapTuple;