Skip to content

Commit

Permalink
Fix evicting data from GPU when main ram is full of cached tiles
Browse files Browse the repository at this point in the history
If we have a cached data that has the right size, we can evict to it.
This does not fix the case when tiles have different sizes, we would need to
maintain the size of the cached allocations.
  • Loading branch information
sthibaul committed Mar 29, 2024
1 parent d9dc85d commit 6dbcd4e
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 5 deletions.
42 changes: 39 additions & 3 deletions src/datawizard/memalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,37 @@ static int _starpu_data_interface_compare(void *data_interface_a, struct starpu_
return ret;
}

#ifdef STARPU_USE_ALLOCATION_CACHE
#ifndef STARPU_USE_ALLOCATION_CACHE
static int _starpu_memchunk_cache_test_locked(unsigned node, starpu_data_handle_t handle, uint32_t footprint) { return 0; }
#else
static int _starpu_memchunk_cache_test_locked(unsigned node, starpu_data_handle_t handle, uint32_t footprint)
{
/* go through all buffers in the cache */
struct mc_cache_entry *entry;
struct _starpu_node *node_struct = _starpu_get_node_struct(node);

HASH_FIND(hh, node_struct->mc_cache, &footprint, sizeof(footprint), entry);
if (!entry)
/* No data with that footprint */
return 0;

struct _starpu_mem_chunk *mc;
for (mc = _starpu_mem_chunk_list_begin(&entry->list);
mc != _starpu_mem_chunk_list_end(&entry->list);
mc = _starpu_mem_chunk_list_next(mc))
{
/* Is that a false hit ? (this is _very_ unlikely) */
if (_starpu_data_interface_compare(handle->per_node[node].data_interface, handle->ops, mc->chunk_interface, mc->ops) != 1)
continue;

/* Cache hit */
return 1;
}

/* This is a cache miss */
return 0;
}

/* This function must be called with node->mc_lock taken */
static struct _starpu_mem_chunk *_starpu_memchunk_cache_lookup_locked(unsigned node, starpu_data_handle_t handle, uint32_t footprint)
{
Expand Down Expand Up @@ -1979,10 +2009,13 @@ choose_target(starpu_data_handle_t handle, unsigned node)
{
unsigned i;
unsigned nb_numa_nodes = starpu_memory_nodes_get_numa_count();
uint32_t footprint = _starpu_compute_data_alloc_footprint(handle);
for (i=0; i<nb_numa_nodes; i++)
{
if (handle->per_node[i].allocated ||
_starpu_memory_manager_test_allocate_size(i, size_handle) == 1)
_starpu_memory_manager_test_allocate_size(i, size_handle) == 1 ||
/* FIXME: should rather maintain how many bytes we have in cache, for the different-tiles case */
_starpu_memchunk_cache_test_locked(i, handle, footprint))
{
target = i;
break;
Expand Down Expand Up @@ -2011,10 +2044,13 @@ choose_target(starpu_data_handle_t handle, unsigned node)
/* try to push data to RAM if we can before to push on disk*/
unsigned i;
unsigned nb_numa_nodes = starpu_memory_nodes_get_numa_count();
uint32_t footprint = _starpu_compute_data_alloc_footprint(handle);
for (i=0; i<nb_numa_nodes; i++)
{
if (handle->per_node[i].allocated ||
_starpu_memory_manager_test_allocate_size(i, size_handle) == 1)
_starpu_memory_manager_test_allocate_size(i, size_handle) == 1 ||
/* FIXME: should rather maintain how many bytes we have in cache, for the different-tiles case */
_starpu_memchunk_cache_test_locked(i, handle, footprint))
{
target = i;
break;
Expand Down
25 changes: 23 additions & 2 deletions tests/datawizard/allocate.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* StarPU --- Runtime system for heterogeneous multicore architectures.
*
* Copyright (C) 2013-2023 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
* Copyright (C) 2013-2024 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
*
* StarPU is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
Expand Down Expand Up @@ -38,7 +38,7 @@ int test_prefetch(unsigned memnodes)
{
float *buffers[4];
starpu_data_handle_t handles[4];
unsigned i;
unsigned i, j;
starpu_ssize_t available_size;

if (starpu_getenv_number_default("STARPU_DIDUSE_BARRIER", 0))
Expand Down Expand Up @@ -120,6 +120,27 @@ int test_prefetch(unsigned memnodes)
free(buffers[i]);
}

i = 0;
available_size = starpu_memory_get_available(i);
FPRINTF(stderr, "Available memory size on node %u: %zd\n", i, available_size);
STARPU_CHECK_RETURN_VALUE_IS((int)available_size, SIZE_ALLOC*1024*1024, "starpu_memory_get_available (node %u)", i);

/* Write data directly in GPU without any allocation in main memory, starpu should be able to evict to main memory progressively */
for(i=1 ; i<memnodes ; i++)
{
for(j=0 ; j<4 ; j++)
{
starpu_variable_data_register(&handles[j], -1, 0, SIZE_ALLOC*1024*400);
starpu_data_acquire_on_node(handles[j], i, STARPU_W);
starpu_data_release_on_node(handles[j], i);
}
for(j=0 ; j<4 ; j++)
{
starpu_data_unregister(handles[j]);
}
}


for(i=1 ; i<memnodes ; i++)
{
available_size = starpu_memory_get_available(i);
Expand Down

0 comments on commit 6dbcd4e

Please sign in to comment.