Skip to content

Commit 0fece1f

Browse files
authored
Use GPUArrays allocations cache (#717)
1 parent c8f8afd commit 0fece1f

6 files changed

+14
-32
lines changed

Project.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ Atomix = "0.1, 1"
4646
CEnum = "0.4, 0.5"
4747
ChainRulesCore = "1"
4848
ExprTools = "0.1"
49-
GPUArrays = "11.1"
49+
GPUArrays = "11.2"
5050
GPUCompiler = "0.27, 1.0"
5151
KernelAbstractions = "0.9.2"
5252
LLD_jll = "15, 16, 17"

src/AMDGPU.jl

-2
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,6 @@ include("ROCKernels.jl")
140140
import .ROCKernels: ROCBackend
141141
export ROCBackend
142142

143-
# include("cache_allocator.jl")
144-
145143
function __init__()
146144
# Used to shutdown hostcalls if any is running.
147145
atexit(() -> begin Runtime.RT_EXITING[] = true end)

src/array.jl

+6-20
Original file line numberDiff line numberDiff line change
@@ -3,31 +3,17 @@ mutable struct ROCArray{T, N, B} <: AbstractGPUArray{T, N}
33
dims::Dims{N}
44
offset::Int # Offset is in number of elements (not bytes).
55

6-
function ROCArray{T, N, B}(
7-
::UndefInitializer, dims::Dims{N},
8-
) where {T, N, B <: Mem.AbstractAMDBuffer}
6+
function ROCArray{T, N, B}(::UndefInitializer, dims::Dims{N}) where {T, N, B <: Mem.AbstractAMDBuffer}
97
@assert isbitstype(T) "ROCArray only supports bits types"
10-
function _alloc_f()
11-
sz::Int64 = prod(dims) * sizeof(T)
8+
sz::Int64 = prod(dims) * sizeof(T)
9+
return GPUArrays.cached_alloc((ROCArray, AMDGPU.device(), T, B, sz)) do
1210
@debug "Allocate `T=$T`, `dims=$dims`: $(Base.format_bytes(sz))"
1311
data = DataRef(pool_free, pool_alloc(B, sz))
14-
finalizer(unsafe_free!, new{T, N, B}(data, dims, 0))
15-
end
16-
return _alloc_f()
17-
18-
# name = GPUArrays.CacheAllocatorName[]
19-
# # Do not use caching allocator if it is not set or
20-
# # the buffer is not a device memory.
21-
# return if !(B <: Mem.HIPBuffer) || name == :none
22-
# _alloc_f()
23-
# else
24-
# GPUArrays.alloc!(_alloc_f, ROCBackend(), name, T, dims)::ROCArray{T, N, B}
25-
# end
12+
return finalizer(unsafe_free!, new{T, N, B}(data, dims, 0))
13+
end::ROCArray{T, N, B}
2614
end
2715

28-
function ROCArray{T, N}(
29-
buf::DataRef{Managed{B}}, dims::Dims{N}; offset::Integer = 0,
30-
) where {T, N, B <: Mem.AbstractAMDBuffer}
16+
function ROCArray{T, N}(buf::DataRef{Managed{B}}, dims::Dims{N}; offset::Integer = 0) where {T, N, B <: Mem.AbstractAMDBuffer}
3117
@assert isbitstype(T) "ROCArray only supports bits types"
3218
xs = new{T, N, B}(buf, dims, offset)
3319
return finalizer(unsafe_free!, xs)

src/cache_allocator.jl

-5
This file was deleted.

src/exception_handler.jl

+4-4
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ struct ExceptionHolder
4040
n_str_buffers = 100
4141

4242
exception_flag = Mem.HostBuffer(sizeof(Int32), HIP.hipHostAllocDefault)
43-
gate, buffers_counter, str_buffers_counter = (#GPUArrays.@no_cache_scope begin
43+
gate, buffers_counter, str_buffers_counter = GPUArrays.@uncached begin
4444
ROCArray(UInt64[0]), ROCArray(Int32[0]), ROCArray(Int32[0])
45-
)
45+
end
4646

4747
errprintf_buffers = [
4848
Mem.HostBuffer(buf_len, HIP.hipHostAllocDefault)
@@ -51,9 +51,9 @@ struct ExceptionHolder
5151
Mem.HostBuffer(str_len, HIP.hipHostAllocDefault)
5252
for _ in 1:n_str_buffers]
5353

54-
errprintf_buffers_dev, str_buffers_dev = (#GPUArrays.@no_cache_scope begin
54+
errprintf_buffers_dev, str_buffers_dev = GPUArrays.@uncached begin
5555
ROCArray(Mem.device_ptr.(errprintf_buffers)), ROCArray(Mem.device_ptr.(str_buffers))
56-
)
56+
end
5757

5858
new(
5959
exception_flag, gate, buffers_counter, str_buffers_counter,

test/gpuarrays_tests.jl

+3
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,6 @@ end
8484
@testitem "gpuarrays - uniformscaling" setup=[TSGPUArrays] begin
8585
gpuarrays_test("uniformscaling")
8686
end
87+
@testitem "gpuarrays - alloc cache" setup=[TSGPUArrays] begin
88+
gpuarrays_test("alloc cache")
89+
end

0 commit comments

Comments
 (0)