Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Some performance optimization #40

Merged
merged 1 commit into from
Feb 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Furnace.Backends.Reference/Reference.RawTensor.fs
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ module internal RawTensorCPU =
let inline LUDecomposition (m: ^T[,]) =
let rows = m.GetLength(0)
let res = Array2D.copy m
let perm = Array.init rows (fun i -> i)
let perm = Array.init rows id
let mutable toggle = LanguagePrimitives.GenericOne<'T>
for j = 0 to rows - 2 do
let mutable colmax:'T = abs res[j, j]
Expand Down
8 changes: 5 additions & 3 deletions src/Furnace.Core/Distributions.fs
Original file line number Diff line number Diff line change
Expand Up @@ -249,10 +249,12 @@ type Empirical<'T when 'T:equality>(values:seq<'T>, ?weights:Tensor, ?logWeights
let uniques = Dictionary<'T, Tensor>()
for i = 0 to _values.Length-1 do
let v, lw = _values[i], _categorical.logits[i]
if uniques.ContainsKey(v) then
let lw2 = uniques[v]

match uniques.TryGetValue v with
| true, lw2 ->
uniques[v] <- FurnaceImage.stack([lw; lw2]).logsumexp(dim=0)
else uniques[v] <- lw
| false, _ ->
uniques[v] <- lw
Dictionary.copyKeys uniques, FurnaceImage.stack(Dictionary.copyValues uniques).view(-1)
else
let vals, counts = _values |> Array.getUniqueCounts false
Expand Down
39 changes: 21 additions & 18 deletions src/Furnace.Core/Dtype.fs
Original file line number Diff line number Diff line change
Expand Up @@ -85,35 +85,38 @@ module DtypeAutoOpens =
module Dtype =

/// Matches all floating point tensor element types
let (|FloatingPoint|_|) (x: Dtype) = if x.IsFloatingPoint then Some() else None
[<return: Struct>]
let (|FloatingPoint|_|) (x: Dtype) = if x.IsFloatingPoint then ValueSome() else ValueNone

/// Matches all integral tensor element types
let (|Integral|_|) (x: Dtype) = if x.IsIntegral then Some() else None
[<return: Struct>]
let (|Integral|_|) (x: Dtype) = if x.IsIntegral then ValueSome() else ValueNone

/// Matches all integral or boolean tensor element types
[<return: Struct>]
let (|IntegralOrBool|_|) x =
match x with
| Integral | Bool -> Some()
| _ -> None
| Integral | Bool -> ValueSome()
| _ -> ValueNone

/// Find the Dtype into which dtype1 and dtype2 can be widened
let widen (dtype1: Dtype) (dtype2: Dtype) =
if dtype1 = dtype2 then Some dtype1
if dtype1 = dtype2 then ValueSome dtype1
else
match dtype1, dtype2 with
| Float64, _ | _, Float64 -> Some Float64
| Float32, _ | _, Float32 -> Some Float32
| BFloat16, _ | _, BFloat16 -> Some BFloat16
| Float16, _ | _, Float16 -> Some Float16
| Int64, _ | _, Int64 -> Some Int64
| Int32, _ | _, Int32 -> Some Int32
| Int16, _ | _, Int16 -> Some Int16
| Int8, Bool | Bool, Int8 -> Some Int8
| Byte, Bool | Bool, Byte -> Some Byte
| Int8, Int8 -> Some Int8
| Byte, Byte -> Some Byte
| Bool, Bool -> Some Bool
| Int8, Byte | Byte, Int8 -> None
| Float64, _ | _, Float64 -> ValueSome Float64
| Float32, _ | _, Float32 -> ValueSome Float32
| BFloat16, _ | _, BFloat16 -> ValueSome BFloat16
| Float16, _ | _, Float16 -> ValueSome Float16
| Int64, _ | _, Int64 -> ValueSome Int64
| Int32, _ | _, Int32 -> ValueSome Int32
| Int16, _ | _, Int16 -> ValueSome Int16
| Int8, Bool | Bool, Int8 -> ValueSome Int8
| Byte, Bool | Bool, Byte -> ValueSome Byte
| Int8, Int8 -> ValueSome Int8
| Byte, Byte -> ValueSome Byte
| Bool, Bool -> ValueSome Bool
| Int8, Byte | Byte, Int8 -> ValueNone

/// Get or set the default element type used when creating tensors. Only floating point types are supported as the default type. Note, use <c>FurnaceImage.config(...)</c> instead.
let mutable Default = Dtype.Float32
Expand Down
4 changes: 3 additions & 1 deletion src/Furnace.Core/Extensions.fs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ module Array =
let getUniqueCounts (sorted:bool) (values:'T[]) =
let counts = Dictionary<'T, int>()
for v in values do
if counts.ContainsKey(v) then counts[v] <- counts[v] + 1 else counts[v] <- 1
match counts.TryGetValue v with
| true, cv -> counts[v] <- cv + 1
| false, _ -> counts[v] <- 1
if sorted then
counts |> Array.ofSeq |> Array.sortByDescending (fun (KeyValue(_, v)) -> v) |> Array.map (fun (KeyValue(k, v)) -> k, v) |> Array.unzip
else
Expand Down
6 changes: 3 additions & 3 deletions src/Furnace.Core/Model.fs
Original file line number Diff line number Diff line change
Expand Up @@ -352,19 +352,19 @@ type ModelBase() =
/// <summary>TBD</summary>
member m.hasOwnParameters
with get () =
let childrenParams = m.children |> List.map (fun c -> c.nparameters) |> List.sum
let childrenParams = m.children |> List.sumBy (fun c -> c.nparameters)
m.nparameters <> childrenParams

/// <summary>TBD</summary>
member m.hasOwnBuffers
with get () =
let childrenBuffers = m.children |> List.map (fun c -> c.nbuffers) |> List.sum
let childrenBuffers = m.children |> List.sumBy (fun c -> c.nbuffers)
m.nbuffers <> childrenBuffers

/// <summary>TBD</summary>
member m.hasOwnState
with get () =
let childrenState = m.children |> List.map (fun c -> c.nstate) |> List.sum
let childrenState = m.children |> List.sumBy (fun c -> c.nstate)
m.nstate <> childrenState

/// <summary>TBD</summary>
Expand Down
28 changes: 14 additions & 14 deletions src/Furnace.Core/Tensor.fs
Original file line number Diff line number Diff line change
Expand Up @@ -759,8 +759,8 @@ type Tensor =
member a.max(b:Tensor) =
if a.dtype <> b.dtype then
match Dtype.widen a.dtype b.dtype with
| None -> opNotSupported "max" a.dtype b.dtype
| Some tnew ->
| ValueNone -> opNotSupported "max" a.dtype b.dtype
| ValueSome tnew ->
let aCast = a.cast(tnew)
let bCast = b.cast(tnew)
aCast.max(bCast)
Expand All @@ -775,8 +775,8 @@ type Tensor =
member a.min(b:Tensor) =
if a.dtype <> b.dtype then
match Dtype.widen a.dtype b.dtype with
| None -> opNotSupported "min" a.dtype b.dtype
| Some tnew ->
| ValueNone -> opNotSupported "min" a.dtype b.dtype
| ValueSome tnew ->
let aCast = a.cast(tnew)
let bCast = b.cast(tnew)
aCast.min(bCast)
Expand Down Expand Up @@ -1105,8 +1105,8 @@ type Tensor =
static member (+) (a:Tensor, b:Tensor) : Tensor =
if a.dtype <> b.dtype then
match Dtype.widen a.dtype b.dtype with
| None -> opNotSupported "+" a.dtype b.dtype
| Some tnew ->
| ValueNone -> opNotSupported "+" a.dtype b.dtype
| ValueSome tnew ->
let aCast = a.cast(tnew)
let bCast = b.cast(tnew)
aCast + bCast
Expand Down Expand Up @@ -1155,8 +1155,8 @@ type Tensor =
static member (-) (a:Tensor, b:Tensor) =
if a.dtype <> b.dtype then
match Dtype.widen a.dtype b.dtype with
| None -> opNotSupported "-" a.dtype b.dtype
| Some tnew ->
| ValueNone -> opNotSupported "-" a.dtype b.dtype
| ValueSome tnew ->
let aCast = a.cast(tnew)
let bCast = b.cast(tnew)
aCast - bCast
Expand Down Expand Up @@ -1216,8 +1216,8 @@ type Tensor =
static member (*) (a:Tensor, b:Tensor) =
if a.dtype <> b.dtype then
match Dtype.widen a.dtype b.dtype with
| None -> opNotSupported "*" a.dtype b.dtype
| Some tnew ->
| ValueNone -> opNotSupported "*" a.dtype b.dtype
| ValueSome tnew ->
let aCast = a.cast(tnew)
let bCast = b.cast(tnew)
aCast * bCast
Expand Down Expand Up @@ -1267,8 +1267,8 @@ type Tensor =
static member (/) (a:Tensor, b:Tensor) =
if a.dtype <> b.dtype then
match Dtype.widen a.dtype b.dtype with
| None -> opNotSupported "/" a.dtype b.dtype
| Some tnew ->
| ValueNone -> opNotSupported "/" a.dtype b.dtype
| ValueSome tnew ->
let aCast = a.cast(tnew)
let bCast = b.cast(tnew)
aCast / bCast
Expand Down Expand Up @@ -1327,8 +1327,8 @@ type Tensor =
static member internal powImpl (a:Tensor, b:Tensor) =
if a.dtype <> b.dtype then
match Dtype.widen a.dtype b.dtype with
| None -> opNotSupported "Pow" a.dtype b.dtype
| Some tnew ->
| ValueNone -> opNotSupported "Pow" a.dtype b.dtype
| ValueSome tnew ->
let aCast = a.cast(tnew)
let bCast = b.cast(tnew)
Tensor.Pow (aCast, bCast)
Expand Down
20 changes: 11 additions & 9 deletions src/Furnace.Core/Util.fs
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,10 @@ module DataConverter =
| ArrayTy (_,ety) -> sprintf "%s[]" (formatType ety)
| SeqTy ety -> sprintf "seq<%s>" (formatType ety)
| TupleTy etys -> String.concat "*" (Array.map formatType etys)
| ty when ty = typeof<int64> -> "int64"
| ty when ty = typeof<int> -> "int"
| ty when ty = typeof<double> -> "double"
| ty when ty = typeof<float32> -> "float32"
| ty when Type.(=)(ty, typeof<int64>) -> "int64"
| ty when Type.(=)(ty, typeof<int>) -> "int"
| ty when Type.(=)(ty, typeof<double>) -> "double"
| ty when Type.(=)(ty, typeof<float32>) -> "float32"
| _ -> ty.ToString()

let private (|SeqTupleTy|_|) (ty: Type) =
Expand All @@ -193,15 +193,17 @@ module DataConverter =
Some (etys[0])
| _ -> None

[<return: Struct>]
let private (|TupleLeafTy|_|) (tgt: Type) (ty: Type) =
match ty with
| TupleTy etys when etys |> Array.forall (fun ety -> ety = tgt) -> Some ()
| _ -> None
| TupleTy etys when etys |> Array.forall (fun ety -> ety = tgt) -> ValueSome ()
| _ -> ValueNone

[<return: Struct>]
let private (|SeqTupleLeafTy|_|) (tgt: Type) (ty: Type) =
match ty with
| SeqTy (TupleLeafTy tgt) -> Some ()
| _ -> None
| SeqTy (TupleLeafTy tgt) -> ValueSome ()
| _ -> ValueNone

let private flatArrayAndShape1D<'T> (v: 'T[]) =
v, [|Array.length v|]
Expand Down Expand Up @@ -293,7 +295,7 @@ module DataConverter =
// An exact type-match test is needed because of https://github.com/Furnace/Furnace/issues/203 and https://github.com/dotnet/fsharp/issues/10202
// That is in .NET and F#, a boxed "byte[]" can be unboxed to "int8[]" and vice-versa.
// This also affects pattern matches of the element types of sequences as well
let typesMatch<'T> (array: System.Array) = (array.GetType().GetElementType() = typeof<'T>)
let typesMatch<'T> (array: System.Array) = Type.(=)(array.GetType().GetElementType(), typeof<'T>)

let rec tryFlatArrayAndShape<'T> (value:obj) : ('T[] * int[]) option =
match value with
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ module PythonHelpers =
// your mileage may differ
if Environment.GetEnvironmentVariable("COMPUTERNAME") = "MSRC-3617253" then
Environment.SetEnvironmentVariable("PYTHONHOME", @"C:\ProgramData\Anaconda3\", EnvironmentVariableTarget.User)
if Environment.GetEnvironmentVariable("PYTHONHOME") = null then failwith "expect PYTHONHOME to be set"
if isNull (Environment.GetEnvironmentVariable "PYTHONHOME") then failwith "expect PYTHONHOME to be set"
let _prepPython = scope.Exec("import torch")

let execPython(code) =
Expand Down
6 changes: 3 additions & 3 deletions tests/Furnace.Benchmarks.Python/Program.fs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ let main args =
for case in summary.BenchmarksCases do
let v =
try
if case.Descriptor <> null &&
case.Descriptor.Categories <> null &&
if not (isNull case.Descriptor) &&
(not(isNull case.Descriptor.Categories)) &&
case.Descriptor.Categories.Length > 0 then
if summary <> null && (try (summary[case] |> ignore); true with _ -> false) then
if (not (isNull summary)) && (try (summary[case] |> ignore); true with _ -> false) then
let report = summary[case]
let tensorSize = case.Parameters["tensorSize"] :?> int
let dtypeName = case.Parameters["dtypeName"] :?> string
Expand Down
6 changes: 3 additions & 3 deletions tests/Furnace.Benchmarks/BasicTensorOpsPerf.fs
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,10 @@ type BasicTensorOps() =
member perf.simulatePythonResult(nm) =
// Note, this string allocation and dictionary lookup can affect result
let key = nm + string perf.tensorSize + perf.dtypeName + perf.deviceName
if PythonResults.pythonResults.ContainsKey(key) then
let time = PythonResults.pythonResults[key]
match PythonResults.pythonResults.TryGetValue key with
| true, time ->
Thread.Sleep(time)
else
| false, _ ->
failwithf "key '%s' not found in python results, have you run Furnace.Benchmarks.Python?" key

member perf.configure(backend, factor) =
Expand Down
2 changes: 1 addition & 1 deletion tests/Furnace.Tests/TestModel.fs
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,7 @@ type TestModel () =
let m3 = Sequential([l1; l2; l3])

let childrenParams (m:Model) =
m.children |> List.map (fun c -> c.nparameters) |> List.sum
m.children |> List.sumBy (fun c -> c.nparameters)

let m1Params = m1.nparameters
let m2Params = m2.nparameters
Expand Down
12 changes: 6 additions & 6 deletions tests/Furnace.Tests/TestTensor.fs
Original file line number Diff line number Diff line change
Expand Up @@ -1738,8 +1738,8 @@ type TestTensor () =
for combo in Combos.IntegralAndFloatingPoint do
for dtype2 in Dtypes.IntegralAndFloatingPoint do
match Dtype.widen combo.dtype dtype2 with
| None -> ()
| Some dtypeRes ->
| ValueNone -> ()
| ValueSome dtypeRes ->
let t1 = combo.tensor([1.; 2.]) + combo.tensor([3.; 4.], dtype=dtype2)
let t1Correct = combo.tensor([4.; 6.], dtype=dtypeRes)

Expand Down Expand Up @@ -2086,8 +2086,8 @@ type TestTensor () =
for combo in Combos.IntegralAndFloatingPoint do
for dtype2 in Dtypes.IntegralAndFloatingPoint do
match Dtype.widen combo.dtype dtype2 with
| None -> ()
| Some dtypeRes ->
| ValueNone -> ()
| ValueSome dtypeRes ->

let t1 = combo.tensor([1.; 2.]) - combo.tensor([3.; 4.], dtype=dtype2)
let t1Correct = combo.tensor([-2.; -2.], dtype=dtypeRes)
Expand Down Expand Up @@ -2138,8 +2138,8 @@ type TestTensor () =
for combo in Combos.IntegralAndFloatingPoint do
for dtype2 in Dtypes.IntegralAndFloatingPoint do
match Dtype.widen combo.dtype dtype2 with
| None -> ()
| Some dtypeRes ->
| ValueNone -> ()
| ValueSome dtypeRes ->
let t1 = combo.tensor([1.; 2.]) * combo.tensor([3.; 4.], dtype=dtype2)
let t1Correct = combo.tensor([3.; 8.], dtype=dtypeRes)

Expand Down
Loading