Skip to content

Commit

Permalink
Document journal v2 index file format. (netdata#19701)
Browse files Browse the repository at this point in the history
  • Loading branch information
vkalintiris authored Feb 24, 2025
1 parent 6402d03 commit 81171c9
Showing 1 changed file with 163 additions and 49 deletions.
212 changes: 163 additions & 49 deletions src/database/engine/journalfile.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,67 +75,181 @@ struct journal_v2_block_trailer {
};
};

// Journal V2
// 28 bytes
/*
* Journal File V2 Format
*
* File Layout:
* +------------------------------------------------+
* | HEADER SECTION |
* | +--------------------------------------------+
* | | Header (72 bytes) |
* | | - magic, times, counts, offsets |
* | +--------------------------------------------+
* | | Padding (to 4096 bytes) |
* +------------------------------------------------+
* | EXTENT SECTION |
* | +--------------------------------------------+
* | | Extent Items |
* | | - Array of Extent Entries (16 bytes each)|
* | +--------------------------------------------+
* | | Extent Section Trailer (4 bytes CRC) |
* +------------------------------------------------+
* | METRICS SECTION |
* | +--------------------------------------------+
* | | Metric List |
* | | - Array of Metric Entries (36 bytes each)|
* | +--------------------------------------------+
* | | Metric List Trailer (4 bytes CRC) |
* +------------------------------------------------+
* | PAGE DATA SECTION |
* | +--------------------------------------------+
* | | For each metric: |
* | | - Page Header (28 bytes) |
* | | - Page Entries (20 bytes each) |
* | | - Page Trailer (4 bytes CRC) |
* +------------------------------------------------+
* | FILE TRAILER |
* | - File CRC (4 bytes) |
* +------------------------------------------------+
*
* Data Integrity:
* - All sections have CRC32 checksums
* - The file header CRC is stored in the file trailer
* - Each section (extent list, metric list) has its own trailer with CRC
* - Each page header includes a CRC of its content
*
* Time Representation:
* - File-level times are stored in microseconds (start_time_ut, end_time_ut)
* - Page-level times are stored as deltas in seconds relative to the journal start time
*/

// Journal v2 header (72 bytes)
struct journal_v2_header {
// File type identifier
// - 0x01230317: Normal journal file
// - 0x00230317: File needs rebuild
// - 0x02230317: File should be skipped
uint32_t magic;

// --- implicit padding of 4-bytes because the struct is not packed ---

// Mininimum start time in microseconds
usec_t start_time_ut;
// Maximum end time in microseconds
usec_t end_time_ut;

// Number of extents
uint32_t extent_count;
// Offset to extents section
uint32_t extent_offset;

// Number of metrics
uint32_t metric_count;
// Offset to metrics section
uint32_t metric_offset;

// Total count of pages
uint32_t page_count;
// Offset to page data section
uint32_t page_offset;

// Offset to extents section CRC
uint32_t extent_trailer_offset;
// Offset to metrics section CRC
uint32_t metric_trailer_offset;

// Size of original journal file
uint32_t journal_v1_file_size;
// Total file size
uint32_t journal_v2_file_size;

// Pointer used only when writing to build up the memory-mapped file.
void *data;
};

// Reserve the first 4 KiB for the journal v2 header
#define JOURNAL_V2_HEADER_PADDING_SZ (RRDENG_BLOCK_SIZE - (sizeof(struct journal_v2_header)))

// Extent section item (16 bytes)
struct journal_extent_list {
// Extent offset in datafile
uint64_t datafile_offset;

// Size of the extent
uint32_t datafile_size;

// Index of the data file
uint16_t file_index;

// Number of pages in the extent (not all are necesssarily valid)
uint8_t pages;

// --- implicit padding of 1-byte because the struct is not packed ---
};

// Metric section item (36 bytes)
struct journal_metric_list {
// Unique identifier of the metric
nd_uuid_t uuid;

// Number of pages for this metric
uint32_t entries;

// Offset to the page data section
// Points to: journal_page_header + (entries * journal_page_list)
uint32_t page_offset;

// Start time relative to journal start
uint32_t delta_start_s;

// End time relative to journal start
uint32_t delta_end_s;

// Last update every for this metric in this journal (last page collected)
uint32_t update_every_s;
};

// Page section item header (28 bytes)
struct journal_page_header {
// CRC32 of the header
union {
uint8_t checksum[CHECKSUM_SZ]; // CRC check
uint8_t checksum[CHECKSUM_SZ];
uint32_t crc;
};
uint32_t uuid_offset; // Points back to the UUID list which should point here (UUIDs should much)
uint32_t entries; // Entries
nd_uuid_t uuid; // Which UUID this is

// Offset to corresponding metric in metric list
uint32_t uuid_offset;

// Number of page items that follow
uint32_t entries;

// UUID of the metric
nd_uuid_t uuid;
};

// 20 bytes
// Page section item (20 bytes)
struct journal_page_list {
uint32_t delta_start_s; // relative to the start time of journal
uint32_t delta_end_s; // relative to delta_start
uint32_t extent_index; // Index to the extent (extent list) (bytes from BASE)
// Start time relative to journal start
uint32_t delta_start_s;

// End time relative to journal start
uint32_t delta_end_s;

// Offset into extent section
uint32_t extent_index;

// Update frequency
uint32_t update_every_s;
uint16_t page_length;
uint8_t type;
};

// UUID_LIST
// 36 bytes
struct journal_metric_list {
nd_uuid_t uuid;
uint32_t entries; // Number of entries
uint32_t page_offset; // OFFSET that contains entries * struct( journal_page_list )
uint32_t delta_start_s; // Min time of metric
uint32_t delta_end_s; // Max time of metric (to be used to populate page_index)
uint32_t update_every_s; // Last update every for this metric in this journal (last page collected)
};
// Length of the page
uint16_t page_length;

// 16 bytes
struct journal_extent_list {
uint64_t datafile_offset; // Datafile offset to find the extent
uint32_t datafile_size; // Size of the extent
uint16_t file_index; // which file index is this datafile[index]
uint8_t pages; // number of pages (not all are necesssarily valid)
};
// Page type identifier
uint8_t type;

// 72 bytes
struct journal_v2_header {
uint32_t magic;
usec_t start_time_ut; // Min start time of journal
usec_t end_time_ut; // Maximum end time of journal
uint32_t extent_count; // Count of extents
uint32_t extent_offset;
uint32_t metric_count; // Count of metrics (unique UUIDS)
uint32_t metric_offset;
uint32_t page_count; // Total count of pages (descriptors @ time)
uint32_t page_offset;
uint32_t extent_trailer_offset; // CRC for entent list
uint32_t metric_trailer_offset; // CRC for metric list
uint32_t journal_v1_file_size; // This is the original journal file
uint32_t journal_v2_file_size; // This is the total file size
void *data; // Used when building the index
// --- implicit padding of 1-byte because the struct is not packed ---
};

#define JOURNAL_V2_HEADER_PADDING_SZ (RRDENG_BLOCK_SIZE - (sizeof(struct journal_v2_header)))

struct wal;

void journalfile_v1_generate_path(struct rrdengine_datafile *datafile, char *str, size_t maxlen);
Expand Down Expand Up @@ -173,4 +287,4 @@ typedef struct {

struct rrdengine_datafile *njfv2idx_find_and_acquire_j2_header(NJFV2IDX_FIND_STATE *s);

#endif /* NETDATA_JOURNALFILE_H */
#endif /* NETDATA_JOURNALFILE_H */

0 comments on commit 81171c9

Please sign in to comment.