@@ -336,11 +336,7 @@ function alloc(device::ROCDevice, bytesize::Integer; coherent=false, slow_fallba
336
336
337
337
bytesize == 0 && return Buffer (C_NULL , C_NULL , C_NULL , 0 , device, coherent, false )
338
338
339
- region_kind = if coherent
340
- :finegrained
341
- else
342
- :coarsegrained
343
- end
339
+ region_kind = coherent ? :finegrained : :coarsegrained
344
340
345
341
buf = nothing
346
342
region = nothing
@@ -403,52 +399,45 @@ function alloc_or_retry!(f)
403
399
end
404
400
405
401
const ALL_ALLOCS = Threads. Atomic {Int64} (0 )
406
- function alloc (device:: ROCDevice , pool:: ROCMemoryPool , bytesize:: Integer )
407
- if ALL_ALLOCS[] + bytesize > MEMORY_ALLOC_LIMIT
408
- check (HSA. STATUS_ERROR_OUT_OF_RESOURCES)
409
- end
402
+
403
+ _alloc (p:: ROCMemoryPool , bytesize:: Integer , ptr_ref) = HSA. amd_memory_pool_allocate (p. pool, bytesize, 0 , ptr_ref)
404
+ _alloc (p:: ROCMemoryRegion , bytesize:: Integer , ptr_ref) = HSA. memory_allocate (p. region, bytesize, ptr_ref)
405
+
406
+ _accessible (p:: ROCMemoryRegion ):: Bool = Runtime. region_host_accessible (p)
407
+ _accessible (p:: ROCMemoryPool ):: Bool = Runtime. pool_accessible_by_all (p)
408
+
409
+ function alloc (
410
+ device:: ROCDevice , space:: S , bytesize:: Integer ,
411
+ ) where S <: Union{ROCMemoryPool, ROCMemoryRegion}
410
412
ptr_ref = Ref {Ptr{Cvoid}} ()
411
- alloc_or_retry! () do
412
- HSA. amd_memory_pool_allocate (pool. pool, bytesize, 0 , ptr_ref)
413
- end
414
- Threads. atomic_add! (ALL_ALLOCS, Int64 (bytesize))
415
- AMDGPU. hsaref! ()
413
+ alloc_or_retry! (() -> _alloc (space, bytesize, ptr_ref))
416
414
ptr = ptr_ref[]
417
- return Buffer (ptr, C_NULL , ptr, Int64 (bytesize), device, Runtime. pool_accessible_by_all (pool), true )
418
- end
419
- function alloc (device:: ROCDevice , region:: ROCMemoryRegion , bytesize:: Integer )
420
- if ALL_ALLOCS[] + bytesize > MEMORY_ALLOC_LIMIT
421
- check (HSA. STATUS_ERROR_OUT_OF_RESOURCES)
422
- end
423
- ptr_ref = Ref {Ptr{Cvoid}} ()
424
- alloc_or_retry! () do
425
- HSA. memory_allocate (region. region, bytesize, ptr_ref)
426
- end
427
- Threads. atomic_add! (ALL_ALLOCS, Int64 (bytesize))
428
415
AMDGPU. hsaref! ()
429
- ptr = ptr_ref[]
430
- return Buffer (ptr, C_NULL , ptr, Int64 (bytesize), device, Runtime . region_host_accessible (region ), false )
416
+ Threads . atomic_add! (ALL_ALLOCS, Int64 (bytesize))
417
+ Buffer (ptr, C_NULL , ptr, Int64 (bytesize), device, _accessible (space ), S <: ROCMemoryPool )
431
418
end
419
+
432
420
alloc (bytesize; kwargs... ) = alloc (AMDGPU. device (), bytesize; kwargs... )
433
421
434
422
@static if AMDGPU. hip_configured
435
- function alloc_hip (bytesize:: Integer )
436
- ptr_ref = Ref {Ptr{Cvoid}} ()
437
- # FIXME : Set HIP device
438
- alloc_or_retry! () do
439
- try
440
- HIP. @check HIP. hipMalloc (ptr_ref, Csize_t (bytesize))
441
- HSA. STATUS_SUCCESS
442
- catch
443
- # FIXME : Actually check error code
444
- HSA. STATUS_ERROR_OUT_OF_RESOURCES
423
+ function alloc_hip (bytesize:: Integer )
424
+ ptr_ref = Ref {Ptr{Cvoid}} ()
425
+ # FIXME : Set HIP device
426
+ alloc_or_retry! () do
427
+ try
428
+ HIP. @check HIP. hipMalloc (ptr_ref, Csize_t (bytesize))
429
+ HSA. STATUS_SUCCESS
430
+ catch
431
+ # FIXME : Actually check error code
432
+ HSA. STATUS_ERROR_OUT_OF_RESOURCES
433
+ end
445
434
end
435
+ AMDGPU. hsaref! ()
436
+ ptr = ptr_ref[]
437
+ Threads. atomic_add! (ALL_ALLOCS, Int64 (bytesize))
438
+ Buffer (ptr, C_NULL , ptr, Int64 (bytesize), AMDGPU. device (), false , true )
446
439
end
447
- AMDGPU. hsaref! ()
448
- ptr = ptr_ref[]
449
- return Buffer (ptr, C_NULL , ptr, Int64 (bytesize), AMDGPU. device (), false , true )
450
440
end
451
- end # if AMDGPU.hip_configured
452
441
453
442
function free (buf:: Buffer )
454
443
buf. ptr == C_NULL && return
0 commit comments