Skip to content

Commit

Permalink
Integrate shared chunk cache (H5SC) package with upper layers of library
Browse files Browse the repository at this point in the history
Also some updates to shared chunk cache interface, and other general fixes. Non functional currently since the shared chunk cache hasn't been implemented.
  • Loading branch information
fortnern authored Feb 24, 2025
2 parents d47e168 + d808f03 commit 520fcb6
Show file tree
Hide file tree
Showing 20 changed files with 738 additions and 398 deletions.
2 changes: 2 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -829,6 +829,7 @@ set (common_SRCS
${H5UC_SOURCES}
${H5RS_SOURCES}
${H5S_SOURCES}
${H5SC_SOURCES}
${H5SL_SOURCES}
${H5SM_SOURCES}
${H5T_SOURCES}
Expand Down Expand Up @@ -869,6 +870,7 @@ set (H5_PUBLIC_HEADERS
${H5PL_HDRS}
${H5R_HDRS}
${H5S_HDRS}
${H5SC_HDRS}
${H5SM_HDRS}
${H5T_HDRS}
${H5TS_HDRS}
Expand Down
69 changes: 68 additions & 1 deletion src/H5CX.c
Original file line number Diff line number Diff line change
Expand Up @@ -2900,6 +2900,40 @@ H5CX_set_mpio_actual_chunk_opt(H5D_mpio_actual_chunk_opt_mode_t mpio_actual_chun
FUNC_LEAVE_NOAPI_VOID
} /* end H5CX_set_mpio_actual_chunk_opt() */

/*-------------------------------------------------------------------------
* Function: H5CX_or_mpio_actual_chunk_opt
*
* Purpose: Performs a bitwise "or" operation on the the actual chunk
* optimization used for parallel I/O for the current API
* call context.
*
* Return: <none>
*
*-------------------------------------------------------------------------
*/
void
H5CX_or_mpio_actual_chunk_opt(H5D_mpio_actual_chunk_opt_mode_t mpio_actual_chunk_opt)
{
H5CX_node_t **head = NULL; /* Pointer to head of API context list */

FUNC_ENTER_NOAPI_NOINIT_NOERR

/* Sanity checks */
head = H5CX_get_my_context(); /* Get the pointer to the head of the API context, for this thread */
assert(head && *head);
assert(!((*head)->ctx.dxpl_id == H5P_DEFAULT || (*head)->ctx.dxpl_id == H5P_DATASET_XFER_DEFAULT));

/* If the value is already set, "or" it with the requested value, otherwise simply set it */
if ((*head)->ctx.mpio_actual_chunk_opt_set)
(*head)->ctx.mpio_actual_chunk_opt |= mpio_actual_chunk_opt;
else {
(*head)->ctx.mpio_actual_chunk_opt = mpio_actual_chunk_opt;
(*head)->ctx.mpio_actual_chunk_opt_set = true;
}

FUNC_LEAVE_NOAPI_VOID
} /* end H5CX_or_mpio_actual_chunk_opt() */

/*-------------------------------------------------------------------------
* Function: H5CX_set_mpio_actual_io_mode
*
Expand All @@ -2926,7 +2960,40 @@ H5CX_set_mpio_actual_io_mode(H5D_mpio_actual_io_mode_t mpio_actual_io_mode)
(*head)->ctx.mpio_actual_io_mode_set = true;

FUNC_LEAVE_NOAPI_VOID
} /* end H5CX_set_mpio_actual_chunk_opt() */
} /* end H5CX_set_mpio_actual_io_mode() */

/*-------------------------------------------------------------------------
* Function: H5CX_or_mpio_actual_io_mode
*
* Purpose: Performs a bitwise "or" operation on the actual I/O mode
* used for parallel I/O for the current API call context.
*
* Return: <none>
*
*-------------------------------------------------------------------------
*/
void
H5CX_or_mpio_actual_io_mode(H5D_mpio_actual_io_mode_t mpio_actual_io_mode)
{
H5CX_node_t **head = NULL; /* Pointer to head of API context list */

FUNC_ENTER_NOAPI_NOINIT_NOERR

/* Sanity checks */
head = H5CX_get_my_context(); /* Get the pointer to the head of the API context, for this thread */
assert(head && *head);
assert(!((*head)->ctx.dxpl_id == H5P_DEFAULT || (*head)->ctx.dxpl_id == H5P_DATASET_XFER_DEFAULT));

/* If the value is already set, "or" it with the requested value, otherwise simply set it */
if ((*head)->ctx.mpio_actual_io_mode_set)
(*head)->ctx.mpio_actual_io_mode |= mpio_actual_io_mode;
else {
(*head)->ctx.mpio_actual_io_mode = mpio_actual_io_mode;
(*head)->ctx.mpio_actual_io_mode_set = true;
}

FUNC_LEAVE_NOAPI_VOID
} /* end H5CX_or_mpio_actual_io_mode() */

/*-------------------------------------------------------------------------
* Function: H5CX_set_mpio_local_no_coll_cause
Expand Down
2 changes: 2 additions & 0 deletions src/H5CXprivate.h
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,9 @@ H5_DLL void H5CX_set_actual_selection_io_mode(uint32_t actual_selection_io_mode)

#ifdef H5_HAVE_PARALLEL
H5_DLL void H5CX_set_mpio_actual_chunk_opt(H5D_mpio_actual_chunk_opt_mode_t chunk_opt);
H5_DLL void H5CX_or_mpio_actual_chunk_opt(H5D_mpio_actual_chunk_opt_mode_t chunk_opt);
H5_DLL void H5CX_set_mpio_actual_io_mode(H5D_mpio_actual_io_mode_t actual_io_mode);
H5_DLL void H5CX_or_mpio_actual_io_mode(H5D_mpio_actual_io_mode_t actual_io_mode);
H5_DLL void H5CX_set_mpio_local_no_coll_cause(uint32_t mpio_local_no_coll_cause);
H5_DLL void H5CX_set_mpio_global_no_coll_cause(uint32_t mpio_global_no_coll_cause);
#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
Expand Down
137 changes: 51 additions & 86 deletions src/H5Dchunk.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
#include "H5MMprivate.h" /* Memory management */
#include "H5MFprivate.h" /* File memory management */
#include "H5PBprivate.h" /* Page Buffer */
#include "H5SCprivate.h" /* Shared chunk cache */
#include "H5SLprivate.h" /* Skip Lists */
#include "H5VMprivate.h" /* Vector and array functions */

Expand Down Expand Up @@ -317,6 +318,7 @@ static herr_t H5D__chunk_collective_fill(const H5D_t *dset, H5D_chunk_coll_fill_
const void *fill_buf, const void *partial_chunk_fill_buf);
static int H5D__chunk_cmp_coll_fill_info(const void *_entry1, const void *_entry2);
#endif /* H5_HAVE_PARALLEL */
static herr_t H5D__chunk_verify_offset(const H5D_t *dset, const hsize_t *offset);

/* Debugging helper routine callback */
static int H5D__chunk_dump_index_cb(const H5D_chunk_rec_t *chunk_rec, void *_udata);
Expand Down Expand Up @@ -378,7 +380,8 @@ H5FL_EXTERN(H5S_sel_iter_t);
*-------------------------------------------------------------------------
*/
herr_t
H5D__chunk_direct_write(H5D_t *dset, uint32_t filters, hsize_t *offset, uint32_t data_size, const void *buf)
H5D__chunk_direct_write(H5D_t *dset, uint32_t filters, const hsize_t *offset, uint32_t data_size,
const void *buf)
{
const H5O_layout_t *layout = &(dset->shared->layout); /* Dataset layout */
H5D_chunk_ud_t udata; /* User data for querying chunk info */
Expand All @@ -393,6 +396,11 @@ H5D__chunk_direct_write(H5D_t *dset, uint32_t filters, hsize_t *offset, uint32_t
/* Sanity checks */
assert(layout->type == H5D_CHUNKED);

/* Copy the user's offset array so we can be sure it's terminated properly.
* (we don't want to mess with the user's buffer). */
if (H5D__chunk_verify_offset(dset, offset) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTCOPY, FAIL, "failure to copy offset array");

/* Allocate dataspace and initialize it if it hasn't been. */
if (!H5D__chunk_is_space_alloc(&layout->storage))
if (H5D__alloc_storage(dset, H5D_ALLOC_WRITE, false, NULL) < 0)
Expand Down Expand Up @@ -483,7 +491,7 @@ H5D__chunk_direct_write(H5D_t *dset, uint32_t filters, hsize_t *offset, uint32_t
*-------------------------------------------------------------------------
*/
herr_t
H5D__chunk_direct_read(const H5D_t *dset, hsize_t *offset, uint32_t *filters, void *buf)
H5D__chunk_direct_read(const H5D_t *dset, const hsize_t *offset, uint32_t *filters, void *buf)
{
const H5O_layout_t *layout = &(dset->shared->layout); /* Dataset layout */
const H5D_rdcc_t *rdcc = &(dset->shared->cache.chunk); /* raw data chunk cache */
Expand All @@ -501,6 +509,11 @@ H5D__chunk_direct_read(const H5D_t *dset, hsize_t *offset, uint32_t *filters, vo

*filters = 0;

/* Copy the user's offset array so we can be sure it's terminated properly.
* (we don't want to mess with the user's buffer). */
if (H5D__chunk_verify_offset(dset, offset) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTCOPY, FAIL, "failure to copy offset array");

/* Allocate dataspace and initialize it if it hasn't been. */
if (!H5D__chunk_is_space_alloc(&layout->storage) && !H5D__chunk_is_data_cached(dset->shared))
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "storage is not initialized");
Expand Down Expand Up @@ -8222,20 +8235,16 @@ H5D__chunk_iter(H5D_t *dset, H5D_chunk_iter_op_t op, void *op_data)
} /* end H5D__chunk_iter() */

/*-------------------------------------------------------------------------
* Function: H5D__chunk_get_offset_copy
* Function: H5D__chunk_verify_offset
*
* Purpose: Copies an offset buffer and performs bounds checks on the
* values.
*
* This helper function ensures that the offset buffer given
* by the user is suitable for use with the rest of the library.
* Purpose: Performs bounds checks on the provided chunk offset values.
*
* Return: SUCCEED/FAIL
*
*-------------------------------------------------------------------------
*/
herr_t
H5D__chunk_get_offset_copy(const H5D_t *dset, const hsize_t *offset, hsize_t *offset_copy)
static herr_t
H5D__chunk_verify_offset(const H5D_t *dset, const hsize_t *offset)
{
unsigned u;
herr_t ret_value = SUCCEED; /* Return value */
Expand All @@ -8244,13 +8253,6 @@ H5D__chunk_get_offset_copy(const H5D_t *dset, const hsize_t *offset, hsize_t *of

assert(dset);
assert(offset);
assert(offset_copy);

/* The library's chunking code requires the offset to terminate with a zero.
* So transfer the offset array to an internal offset array that we
* can properly terminate (handled via the memset call).
*/
memset(offset_copy, 0, H5O_LAYOUT_NDIMS * sizeof(hsize_t));

for (u = 0; u < dset->shared->ndims; u++) {
/* Make sure the offset doesn't exceed the dataset's dimensions */
Expand All @@ -8260,70 +8262,11 @@ H5D__chunk_get_offset_copy(const H5D_t *dset, const hsize_t *offset, hsize_t *of
/* Make sure the offset fall right on a chunk's boundary */
if (offset[u] % dset->shared->layout.u.chunk.dim[u])
HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "offset doesn't fall on chunks's boundary");

offset_copy[u] = offset[u];
}

done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5D__chunk_get_offset_copy() */

/*-------------------------------------------------------------------------
* Function: H5D__write_struct_chunk_direct
*
* Purpose: Internal routine to write a structured chunk directly into the file.
*
* Return: Non-negative on success/Negative on failure
*
*-------------------------------------------------------------------------
*/
herr_t
H5D__write_struct_chunk_direct(H5D_t H5_ATTR_UNUSED *dset, hsize_t H5_ATTR_UNUSED *offset,
H5D_struct_chunk_info_t H5_ATTR_UNUSED *chunk_info, void H5_ATTR_UNUSED *buf[])
{
herr_t ret_value = SUCCEED; /* Return value */

FUNC_ENTER_PACKAGE_NOERR

/* Sanity checks */
/* TBD: check for H5D_SPARSE_CHUNK */
/* assert(layout->type == H5D_SPARSE_CHUNK); */

/* TBD: set up and call routine to write the structured chunk */
/* FOR NOW: just return success */

FUNC_LEAVE_NOAPI(ret_value)
} /* end H5D__write_struct_chunk_direct() */

/*-------------------------------------------------------------------------
* Function: H5D__read_struct_chunk_direct
*
* Purpose: Internal routine to read a structured chunk directly from the file.
*
* Return: Non-negative on success/Negative on failure
*
*-------------------------------------------------------------------------
*/
herr_t
H5D__read_struct_chunk_direct(const H5D_t H5_ATTR_UNUSED *dset, hsize_t H5_ATTR_UNUSED *offset,
H5D_struct_chunk_info_t H5_ATTR_UNUSED *chunk_info, void H5_ATTR_UNUSED *buf[])
{
herr_t ret_value = SUCCEED; /* Return value */

FUNC_ENTER_PACKAGE_NOERR

/* Check args */
/* TBD: check for H5D_SPARSE_CHUNK */
/* assert(dset && H5D_SPARSE_CHUNK == layout->type); */
assert(offset);
assert(chunk_info);
assert(buf);

/* TBD: set up and call routine to read the structured chunk */
/* FOR NOW: just return success */

FUNC_LEAVE_NOAPI(ret_value)
} /* end H5D__read_struct_chunk_direct() */
} /* end H5D__chunk_verify_offset() */

/*-------------------------------------------------------------------------
* Function: H5D__get_struct_chunk_info
Expand All @@ -8339,22 +8282,30 @@ H5D__read_struct_chunk_direct(const H5D_t H5_ATTR_UNUSED *dset, hsize_t H5_ATTR_
*-------------------------------------------------------------------------
*/
herr_t
H5D__get_struct_chunk_info(const H5D_t H5_ATTR_UNUSED *dset, const H5S_t H5_ATTR_UNUSED *space,
hsize_t H5_ATTR_UNUSED chunk_idx, hsize_t H5_ATTR_UNUSED *offset,
H5D_struct_chunk_info_t H5_ATTR_UNUSED *chunk_info, haddr_t H5_ATTR_UNUSED *addr,
hsize_t H5_ATTR_UNUSED *chunk_size)
H5D__get_struct_chunk_info(H5D_t *dset, const H5S_t H5_ATTR_UNUSED *space, hsize_t H5_ATTR_UNUSED chunk_idx,
hsize_t H5_ATTR_UNUSED *offset, H5D_struct_chunk_info_t H5_ATTR_UNUSED *chunk_info,
haddr_t H5_ATTR_UNUSED *addr, hsize_t H5_ATTR_UNUSED *chunk_size)
{
herr_t ret_value = SUCCEED; /* Return value */

FUNC_ENTER_PACKAGE_NOERR
FUNC_ENTER_PACKAGE

assert(dset);
assert(dset->shared);
assert(space);

/* Flush the dataset's cached chunks out to disk, to make certain the size is correct later */
/* It should be possible to optimize this in the future by only flushing the target chunk, and later
* directly looking up the target chunk instead of iterating, and potentially avoiding the flush and/or
* index query completely if the shared chunk cache has all the needed information needed. For now, just
* mirror the previous algorithm for legacy chunks. */
if (H5SC_flush_dset(H5F_SHARED_CACHE(dset->oloc.file), dset, false) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTFLUSH, FAIL, "cannot flush shared chunk cache for dataset");

/* TBD: go get structured chunk information using chunk index */
/* FOR NOW: just return success */

done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5D__get_struct_chunk_info() */

Expand All @@ -8370,22 +8321,31 @@ H5D__get_struct_chunk_info(const H5D_t H5_ATTR_UNUSED *dset, const H5S_t H5_ATTR
*-------------------------------------------------------------------------
*/
herr_t
H5D__get_struct_chunk_info_by_coord(const H5D_t H5_ATTR_UNUSED *dset, const hsize_t H5_ATTR_UNUSED *offset,
H5D__get_struct_chunk_info_by_coord(H5D_t *dset, const hsize_t H5_ATTR_UNUSED *offset,
H5D_struct_chunk_info_t H5_ATTR_UNUSED *chunk_info,
haddr_t H5_ATTR_UNUSED *addr, hsize_t H5_ATTR_UNUSED *chunk_size)
{
herr_t ret_value = SUCCEED; /* Return value */

FUNC_ENTER_PACKAGE_NOERR
FUNC_ENTER_PACKAGE

/* Check args */
assert(dset);
assert(dset->shared);
assert(offset);

/* Flush the dataset's cached chunks out to disk, to make certain the size is correct later */
/* It should be possible to optimize this in the future by only flushing the target chunk, and later
* directly looking up the target chunk instead of iterating, and potentially avoiding the flush and/or
* index query completely if the shared chunk cache has all the needed information needed. For now, just
* mirror the previous algorithm for legacy chunks. */
if (H5SC_flush_dset(H5F_SHARED_CACHE(dset->oloc.file), dset, false) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTFLUSH, FAIL, "cannot flush shared chunk cache for dataset");

/* TBD: go get structured chunk information using chunk coordinates */
/* FOR NOW: just return success */

done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5D__get_struct_chunk_info_by_coord() */

Expand All @@ -8399,19 +8359,24 @@ H5D__get_struct_chunk_info_by_coord(const H5D_t H5_ATTR_UNUSED *dset, const hsiz
*-------------------------------------------------------------------------
*/
herr_t
H5D__struct_chunk_iter(H5D_t H5_ATTR_UNUSED *dset, H5D_struct_chunk_iter_op_t H5_ATTR_UNUSED op,
H5D__struct_chunk_iter(H5D_t *dset, H5D_struct_chunk_iter_op_t H5_ATTR_UNUSED op,
void H5_ATTR_UNUSED *op_data)
{
herr_t ret_value = SUCCEED; /* Return value */

FUNC_ENTER_PACKAGE_NOERR
FUNC_ENTER_PACKAGE

/* Check args */
assert(dset);
assert(dset->shared);

/* Flush the dataset's cached chunks out to disk, to make certain the size is correct later */
if (H5SC_flush_dset(H5F_SHARED_CACHE(dset->oloc.file), dset, false) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTFLUSH, FAIL, "cannot flush shared chunk cache for dataset");

/* TBD: iterate over all the structured chunks in the dataset */
/* FOR NOW: just return success */

done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5D__chunk_iter() */
Loading

0 comments on commit 520fcb6

Please sign in to comment.