Some performance optimisation

fsprojects · Feb 17, 2025 · 52d7987 · 52d7987
1 parent 31dd412
commit 52d7987
Show file tree

Hide file tree

Showing 12 changed files with 72 additions and 63 deletions.
diff --git a/src/Furnace.Backends.Reference/Reference.RawTensor.fs b/src/Furnace.Backends.Reference/Reference.RawTensor.fs
@@ -639,7 +639,7 @@ module internal RawTensorCPU =
     let inline LUDecomposition (m: ^T[,]) =
         let rows = m.GetLength(0)
         let res = Array2D.copy m
-        let perm = Array.init rows (fun i -> i)
+        let perm = Array.init rows id
         let mutable toggle = LanguagePrimitives.GenericOne<'T>
         for j = 0 to rows - 2 do
             let mutable colmax:'T = abs res[j, j]

diff --git a/src/Furnace.Core/Distributions.fs b/src/Furnace.Core/Distributions.fs
@@ -249,10 +249,12 @@ type Empirical<'T when 'T:equality>(values:seq<'T>, ?weights:Tensor, ?logWeights
                     let uniques = Dictionary<'T, Tensor>()
                     for i = 0 to _values.Length-1 do
                         let v, lw = _values[i], _categorical.logits[i]
-                        if uniques.ContainsKey(v) then
-                            let lw2 = uniques[v]
+
+                        match uniques.TryGetValue v with
+                        | true, lw2 ->
                             uniques[v] <- FurnaceImage.stack([lw; lw2]).logsumexp(dim=0)
-                        else uniques[v] <- lw
+                        | false, _ ->
+                            uniques[v] <- lw
                     Dictionary.copyKeys uniques, FurnaceImage.stack(Dictionary.copyValues uniques).view(-1)
                 else
                     let vals, counts = _values |> Array.getUniqueCounts false

diff --git a/src/Furnace.Core/Dtype.fs b/src/Furnace.Core/Dtype.fs
@@ -85,35 +85,38 @@ module DtypeAutoOpens =
 module Dtype =
 
     /// Matches all floating point tensor element types
-    let (|FloatingPoint|_|) (x: Dtype) = if x.IsFloatingPoint then Some() else None
+    [<return: Struct>]
+    let (|FloatingPoint|_|) (x: Dtype) = if x.IsFloatingPoint then ValueSome() else ValueNone
 
     /// Matches all integral tensor element types
-    let (|Integral|_|) (x: Dtype) = if x.IsIntegral then Some() else None
+    [<return: Struct>]
+    let (|Integral|_|) (x: Dtype) = if x.IsIntegral then ValueSome() else ValueNone
 
     /// Matches all integral or boolean tensor element types
+    [<return: Struct>]
     let (|IntegralOrBool|_|) x =
         match x with
-        | Integral | Bool -> Some()
-        | _ -> None
+        | Integral | Bool -> ValueSome()
+        | _ -> ValueNone
 
     /// Find the Dtype into which dtype1 and dtype2 can be widened
     let widen (dtype1: Dtype) (dtype2: Dtype) =
-        if dtype1 = dtype2 then Some dtype1
+        if dtype1 = dtype2 then ValueSome dtype1
         else
             match dtype1, dtype2 with 
-            | Float64, _ | _, Float64 -> Some Float64
-            | Float32, _ | _, Float32 -> Some Float32
-            | BFloat16, _ | _, BFloat16 -> Some BFloat16
-            | Float16, _ | _, Float16 -> Some Float16
-            | Int64, _ | _, Int64 -> Some Int64
-            | Int32, _ | _, Int32 -> Some Int32
-            | Int16, _ | _, Int16 -> Some Int16
-            | Int8, Bool | Bool, Int8 -> Some Int8
-            | Byte, Bool | Bool, Byte -> Some Byte
-            | Int8, Int8 -> Some Int8
-            | Byte, Byte -> Some Byte
-            | Bool, Bool -> Some Bool
-            | Int8, Byte | Byte, Int8  -> None
+            | Float64, _ | _, Float64 -> ValueSome Float64
+            | Float32, _ | _, Float32 -> ValueSome Float32
+            | BFloat16, _ | _, BFloat16 -> ValueSome BFloat16
+            | Float16, _ | _, Float16 -> ValueSome Float16
+            | Int64, _ | _, Int64 -> ValueSome Int64
+            | Int32, _ | _, Int32 -> ValueSome Int32
+            | Int16, _ | _, Int16 -> ValueSome Int16
+            | Int8, Bool | Bool, Int8 -> ValueSome Int8
+            | Byte, Bool | Bool, Byte -> ValueSome Byte
+            | Int8, Int8 -> ValueSome Int8
+            | Byte, Byte -> ValueSome Byte
+            | Bool, Bool -> ValueSome Bool
+            | Int8, Byte | Byte, Int8  -> ValueNone
 
     /// Get or set the default element type used when creating tensors. Only floating point types are supported as the default type. Note, use <c>FurnaceImage.config(...)</c> instead.
     let mutable Default = Dtype.Float32

diff --git a/src/Furnace.Core/Extensions.fs b/src/Furnace.Core/Extensions.fs
@@ -36,7 +36,9 @@ module Array =
     let getUniqueCounts (sorted:bool) (values:'T[]) =
         let counts = Dictionary<'T, int>()
         for v in values do
-            if counts.ContainsKey(v) then counts[v] <- counts[v] + 1 else counts[v] <- 1
+            match counts.TryGetValue v with
+            | true, cv -> counts[v] <- cv + 1 
+            | false, _ -> counts[v] <- 1
         if sorted then
             counts |> Array.ofSeq |> Array.sortByDescending (fun (KeyValue(_, v)) -> v) |> Array.map (fun (KeyValue(k, v)) -> k, v) |> Array.unzip
         else

diff --git a/src/Furnace.Core/Model.fs b/src/Furnace.Core/Model.fs
@@ -352,19 +352,19 @@ type ModelBase() =
     /// <summary>TBD</summary>
     member m.hasOwnParameters
         with get () =
-            let childrenParams = m.children |> List.map (fun c -> c.nparameters) |> List.sum
+            let childrenParams = m.children |> List.sumBy (fun c -> c.nparameters)
             m.nparameters <> childrenParams
 
     /// <summary>TBD</summary>
     member m.hasOwnBuffers
         with get () =
-            let childrenBuffers = m.children |> List.map (fun c -> c.nbuffers) |> List.sum
+            let childrenBuffers = m.children |> List.sumBy (fun c -> c.nbuffers)
             m.nbuffers <> childrenBuffers
 
     /// <summary>TBD</summary>
     member m.hasOwnState
         with get () =
-            let childrenState = m.children |> List.map (fun c -> c.nstate) |> List.sum
+            let childrenState = m.children |> List.sumBy (fun c -> c.nstate)
             m.nstate <> childrenState
 
     /// <summary>TBD</summary>

diff --git a/src/Furnace.Core/Tensor.fs b/src/Furnace.Core/Tensor.fs
@@ -759,8 +759,8 @@ type Tensor =
     member a.max(b:Tensor) = 
         if a.dtype <> b.dtype then
             match Dtype.widen a.dtype b.dtype with
-            | None -> opNotSupported "max" a.dtype b.dtype 
-            | Some tnew ->
+            | ValueNone -> opNotSupported "max" a.dtype b.dtype 
+            | ValueSome tnew ->
                 let aCast = a.cast(tnew)
                 let bCast = b.cast(tnew)
                 aCast.max(bCast)
@@ -775,8 +775,8 @@ type Tensor =
     member a.min(b:Tensor) = 
         if a.dtype <> b.dtype then
             match Dtype.widen a.dtype b.dtype with
-            | None -> opNotSupported "min" a.dtype b.dtype 
-            | Some tnew ->
+            | ValueNone -> opNotSupported "min" a.dtype b.dtype 
+            | ValueSome tnew ->
                 let aCast = a.cast(tnew)
                 let bCast = b.cast(tnew)
                 aCast.min(bCast)
@@ -1105,8 +1105,8 @@ type Tensor =
     static member (+) (a:Tensor, b:Tensor) : Tensor =
         if a.dtype <> b.dtype then
             match Dtype.widen a.dtype b.dtype with
-            | None -> opNotSupported "+" a.dtype b.dtype 
-            | Some tnew ->
+            | ValueNone -> opNotSupported "+" a.dtype b.dtype 
+            | ValueSome tnew ->
                 let aCast = a.cast(tnew)
                 let bCast = b.cast(tnew)
                 aCast + bCast
@@ -1155,8 +1155,8 @@ type Tensor =
     static member (-) (a:Tensor, b:Tensor) =
         if a.dtype <> b.dtype then
             match Dtype.widen a.dtype b.dtype with
-            | None -> opNotSupported "-" a.dtype b.dtype 
-            | Some tnew ->
+            | ValueNone -> opNotSupported "-" a.dtype b.dtype 
+            | ValueSome tnew ->
                 let aCast = a.cast(tnew)
                 let bCast = b.cast(tnew)
                 aCast - bCast
@@ -1216,8 +1216,8 @@ type Tensor =
     static member (*) (a:Tensor, b:Tensor) =
         if a.dtype <> b.dtype then
             match Dtype.widen a.dtype b.dtype with
-            | None -> opNotSupported "*" a.dtype b.dtype 
-            | Some tnew ->
+            | ValueNone -> opNotSupported "*" a.dtype b.dtype 
+            | ValueSome tnew ->
                 let aCast = a.cast(tnew)
                 let bCast = b.cast(tnew)
                 aCast * bCast
@@ -1267,8 +1267,8 @@ type Tensor =
     static member (/) (a:Tensor, b:Tensor) =
         if a.dtype <> b.dtype then
             match Dtype.widen a.dtype b.dtype with
-            | None -> opNotSupported "/" a.dtype b.dtype 
-            | Some tnew ->
+            | ValueNone -> opNotSupported "/" a.dtype b.dtype 
+            | ValueSome tnew ->
                 let aCast = a.cast(tnew)
                 let bCast = b.cast(tnew)
                 aCast / bCast
@@ -1327,8 +1327,8 @@ type Tensor =
     static member internal powImpl (a:Tensor, b:Tensor) =
         if a.dtype <> b.dtype then
             match Dtype.widen a.dtype b.dtype with
-            | None -> opNotSupported "Pow" a.dtype b.dtype 
-            | Some tnew ->
+            | ValueNone -> opNotSupported "Pow" a.dtype b.dtype 
+            | ValueSome tnew ->
                 let aCast = a.cast(tnew)
                 let bCast = b.cast(tnew)
                 Tensor.Pow (aCast, bCast)

diff --git a/src/Furnace.Core/Util.fs b/src/Furnace.Core/Util.fs
@@ -178,10 +178,10 @@ module DataConverter =
         | ArrayTy (_,ety) -> sprintf "%s[]" (formatType ety)
         | SeqTy ety -> sprintf "seq<%s>" (formatType ety)
         | TupleTy etys -> String.concat "*" (Array.map formatType etys)
-        | ty when ty = typeof<int64> -> "int64"
-        | ty when ty = typeof<int> -> "int"
-        | ty when ty = typeof<double> -> "double"
-        | ty when ty = typeof<float32> -> "float32"
+        | ty when Type.(=)(ty, typeof<int64>) -> "int64"
+        | ty when Type.(=)(ty, typeof<int>) -> "int"
+        | ty when Type.(=)(ty, typeof<double>) -> "double"
+        | ty when Type.(=)(ty, typeof<float32>) -> "float32"
         | _ -> ty.ToString()
 
     let private (|SeqTupleTy|_|) (ty: Type) = 
@@ -193,15 +193,17 @@ module DataConverter =
             Some (etys[0])
         | _ -> None
 
+    [<return: Struct>]
     let private (|TupleLeafTy|_|) (tgt: Type) (ty: Type) = 
         match ty with 
-        | TupleTy etys when etys |> Array.forall (fun ety -> ety = tgt) -> Some ()
-        | _ -> None
+        | TupleTy etys when etys |> Array.forall (fun ety -> ety = tgt) -> ValueSome ()
+        | _ -> ValueNone
 
+    [<return: Struct>]
     let private (|SeqTupleLeafTy|_|) (tgt: Type) (ty: Type) = 
         match ty with 
-        | SeqTy (TupleLeafTy tgt) -> Some ()
-        | _ -> None
+        | SeqTy (TupleLeafTy tgt) -> ValueSome ()
+        | _ -> ValueNone
 
     let private flatArrayAndShape1D<'T> (v: 'T[]) =
         v, [|Array.length v|]
@@ -293,7 +295,7 @@ module DataConverter =
     // An exact type-match test is needed because of https://github.com/Furnace/Furnace/issues/203 and https://github.com/dotnet/fsharp/issues/10202
     // That is in .NET and F#, a boxed "byte[]" can be unboxed to "int8[]" and vice-versa.
     // This also affects pattern matches of the element types of sequences as well
-    let typesMatch<'T> (array: System.Array) = (array.GetType().GetElementType() = typeof<'T>)
+    let typesMatch<'T> (array: System.Array) = Type.(=)(array.GetType().GetElementType(), typeof<'T>)
 
     let rec tryFlatArrayAndShape<'T> (value:obj) : ('T[] * int[]) option =
         match value with

diff --git a/tests/Furnace.Benchmarks.Python/BasicTensorOpsPerfPython.fs b/tests/Furnace.Benchmarks.Python/BasicTensorOpsPerfPython.fs
@@ -20,7 +20,7 @@ module PythonHelpers =
     // your mileage may differ
     if Environment.GetEnvironmentVariable("COMPUTERNAME") = "MSRC-3617253" then
         Environment.SetEnvironmentVariable("PYTHONHOME", @"C:\ProgramData\Anaconda3\", EnvironmentVariableTarget.User)
-    if Environment.GetEnvironmentVariable("PYTHONHOME") = null then failwith "expect PYTHONHOME to be set"
+    if isNull (Environment.GetEnvironmentVariable "PYTHONHOME") then failwith "expect PYTHONHOME to be set"
     let _prepPython = scope.Exec("import torch")
 
     let execPython(code) = 

diff --git a/tests/Furnace.Benchmarks.Python/Program.fs b/tests/Furnace.Benchmarks.Python/Program.fs
@@ -17,10 +17,10 @@ let main args =
            for case in summary.BenchmarksCases do
             let v = 
              try
-              if case.Descriptor <> null && 
-               case.Descriptor.Categories <> null &&
+              if not (isNull case.Descriptor) && 
+               (not(isNull case.Descriptor.Categories)) &&
                case.Descriptor.Categories.Length > 0 then
-                if summary <> null && (try (summary[case] |> ignore); true with _ -> false) then 
+                if (not (isNull summary)) && (try (summary[case] |> ignore); true with _ -> false) then 
                     let report = summary[case]
                     let tensorSize = case.Parameters["tensorSize"] :?> int
                     let dtypeName = case.Parameters["dtypeName"] :?> string

diff --git a/tests/Furnace.Benchmarks/BasicTensorOpsPerf.fs b/tests/Furnace.Benchmarks/BasicTensorOpsPerf.fs
@@ -83,10 +83,10 @@ type BasicTensorOps() =
     member perf.simulatePythonResult(nm) =
         // Note, this string allocation and dictionary lookup can affect result
         let key = nm + string perf.tensorSize + perf.dtypeName + perf.deviceName
-        if PythonResults.pythonResults.ContainsKey(key) then
-            let time = PythonResults.pythonResults[key]
+        match PythonResults.pythonResults.TryGetValue key with
+        | true, time ->
             Thread.Sleep(time)
-        else  
+        | false, _ ->
             failwithf "key '%s' not found in python results, have you run Furnace.Benchmarks.Python?" key
 
     member perf.configure(backend, factor) = 

diff --git a/tests/Furnace.Tests/TestModel.fs b/tests/Furnace.Tests/TestModel.fs
@@ -726,7 +726,7 @@ type TestModel () =
         let m3 = Sequential([l1; l2; l3])
 
         let childrenParams (m:Model) = 
-            m.children |> List.map (fun c -> c.nparameters) |> List.sum
+            m.children |> List.sumBy (fun c -> c.nparameters)
 
         let m1Params = m1.nparameters
         let m2Params = m2.nparameters

diff --git a/tests/Furnace.Tests/TestTensor.fs b/tests/Furnace.Tests/TestTensor.fs
@@ -1738,8 +1738,8 @@ type TestTensor () =
         for combo in Combos.IntegralAndFloatingPoint do 
             for dtype2 in Dtypes.IntegralAndFloatingPoint do 
                 match Dtype.widen combo.dtype dtype2 with 
-                | None -> ()
-                | Some dtypeRes -> 
+                | ValueNone -> ()
+                | ValueSome dtypeRes -> 
                 let t1 = combo.tensor([1.; 2.]) + combo.tensor([3.; 4.], dtype=dtype2)
                 let t1Correct = combo.tensor([4.; 6.], dtype=dtypeRes)
 
@@ -2086,8 +2086,8 @@ type TestTensor () =
         for combo in Combos.IntegralAndFloatingPoint do 
             for dtype2 in Dtypes.IntegralAndFloatingPoint do 
                 match Dtype.widen combo.dtype dtype2 with 
-                | None -> ()
-                | Some dtypeRes -> 
+                | ValueNone -> ()
+                | ValueSome dtypeRes -> 
 
                 let t1 = combo.tensor([1.; 2.]) - combo.tensor([3.; 4.], dtype=dtype2)
                 let t1Correct = combo.tensor([-2.; -2.], dtype=dtypeRes)
@@ -2138,8 +2138,8 @@ type TestTensor () =
         for combo in Combos.IntegralAndFloatingPoint do 
             for dtype2 in Dtypes.IntegralAndFloatingPoint do 
                 match Dtype.widen combo.dtype dtype2 with 
-                | None -> ()
-                | Some dtypeRes -> 
+                | ValueNone -> ()
+                | ValueSome dtypeRes -> 
                 let t1 = combo.tensor([1.; 2.]) * combo.tensor([3.; 4.], dtype=dtype2)
                 let t1Correct = combo.tensor([3.; 8.], dtype=dtypeRes)