tracel-ai · laggui · Mar 3, 2025 · Feb 14, 2025 · Feb 18, 2025 · Feb 18, 2025
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -147,6 +147,10 @@ jobs:
           - rust: prev
             toolchain: ${{ needs.prepare-checks.outputs.rust-prev-version }}
     steps:
+      # disable incremental compilation (reduces artifact size)
+      - name: Set CI Profile
+        run: echo "CARGO_PROFILE_TEST_INCREMENTAL=false" >> $GITHUB_ENV
+      # --------------------------------------------------------------------------------
       - name: Setup Rust
         uses: tracel-ai/github-actions/setup-rust@v1
         with:

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/burn-book/src/quantization.md b/burn-book/src/quantization.md
@@ -45,12 +45,12 @@ tensors and can collect their statistics, such as the min and max value when usi
 
 ```rust , ignore
 # use burn::module::Quantizer;
-# use burn::tensor::quantization::{MinMaxCalibration, QuantizationScheme, QuantizationType};
+# use burn::tensor::quantization::{Calibration, QuantizationScheme, QuantizationType};
 #
 // Quantization config
 let mut quantizer = Quantizer {
-    calibration: MinMaxCalibration {},
-    scheme: QuantizationScheme::PerTensorSymmetric(QuantizationType::QInt8),
+    calibration: Calibration::MinMax,
+    scheme: QuantizationScheme::PerTensor(QuantizationMode::Symmetric, QuantizationType::QInt8),
 };
 
 // Quantize the weights
@@ -95,9 +95,9 @@ _quantization-time_ (weights are static), but activations require more attention
 
 To compute the quantization parameters, Burn supports the following `Calibration` methods.
 
-| Method              | Description                                                                      |
-| :------------------ | :------------------------------------------------------------------------------- |
-| `MinMaxCalibration` | Computes the quantization range mapping based on the running min and max values. |
+| Method   | Description                                                                      |
+| :------- | :------------------------------------------------------------------------------- |
+| `MinMax` | Computes the quantization range mapping based on the running min and max values. |
 
 ### Quantization Scheme
 
@@ -116,7 +116,23 @@ channel with per-channel quantization (commonly used with CNNs).
 
 Burn currently supports the following `QuantizationScheme` variants.
 
-| Variant              | Description                                                                                                    |
-| :------------------- | :------------------------------------------------------------------------------------------------------------- |
-| `PerTensorAffine`    | Computes the quantization parameters for the whole tensor and applies an affine range mapping with zero point. |
-| `PerTensorSymmetric` | Computes the quantization parameters for the whole tensor and applies a scale range mapping centered around 0. |
+| Variant                        | Description                                                                                                                                                              |
+| :----------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `PerTensor(mode, type)`        | Applies a single set of quantization parameters to the entire tensor. The `mode` defines how values are transformed, and `type` represents the target quantization type. |
+| `PerBlock(mode, type, layout)` | Applies quantization parameters to individual blocks within the tensor. The `layout` defines how the tensor is partitioned.                                              |
+
+#### Quantization Mode
+
+| Mode        | Description                                                          |
+| ----------- | -------------------------------------------------------------------- |
+| `Affine`    | Maps values using an affine transformation with a zero point offset. |
+| `Symmetric` | Maps values using a scale factor for a range centered around zero.   |
+
+---
+
+#### Block Layout
+
+| Layout             | Description                                              |
+| ------------------ | -------------------------------------------------------- |
+| `Flat(block_size)` | Divides the tensor into linear 1D blocks of fixed size.  |
+| `Grid(m, n)`       | Divides the tensor into 2D blocks of `m` x `n` elements. |
diff --git a/crates/burn-candle/src/backend.rs b/crates/burn-candle/src/backend.rs
@@ -2,7 +2,7 @@ use std::marker::PhantomData;
 
 use burn_tensor::{
     backend::{Backend, DeviceId, DeviceOps},
-    quantization::{QTensorPrimitive, QuantizationStrategy},
+    quantization::QTensorPrimitive,
     Device,
 };
 use candle_core::{backend::BackendDevice, DeviceLocation};

diff --git a/crates/burn-candle/src/ops/qtensor.rs b/crates/burn-candle/src/ops/qtensor.rs
@@ -3,7 +3,7 @@ use std::ops::Range;
 use burn_tensor::{
     backend::Backend,
     ops::{FloatTensor, IntTensor, QTensorOps, QuantizedTensor},
-    quantization::{QuantizationParametersPrimitive, QuantizationScheme, QuantizationStrategy},
+    quantization::{QuantizationParametersPrimitive, QuantizationScheme},
     DType, Device, Shape, TensorData,
 };
 

diff --git a/crates/burn-candle/src/tensor.rs b/crates/burn-candle/src/tensor.rs
@@ -1,5 +1,5 @@
 use burn_tensor::{
-    quantization::{QTensorPrimitive, QuantizationScheme, QuantizationStrategy},
+    quantization::{QTensorPrimitive, QuantizationScheme},
     DType, Element, Shape, TensorData, TensorMetadata,
 };
 

diff --git a/crates/burn-core/src/module/base.rs b/crates/burn-core/src/module/base.rs
@@ -5,7 +5,7 @@ use crate::{
 };
 use alloc::vec::Vec;
 pub use burn_derive::Module;
-use burn_tensor::{ops::Device, quantization::Calibration, Bool, Int, Tensor};
+use burn_tensor::{ops::Device, Bool, Int, Tensor};
 
 /// Type alias to `Vec<B::Device>` which supports `no_std` environments, but automatically using
 /// the `alloc` crate.
@@ -204,7 +204,7 @@ pub trait Module<B: Backend>: Clone + Send + core::fmt::Debug {
     }
 
     /// Quantize the weights of the module.
-    fn quantize_weights<C: Calibration>(self, quantizer: &mut Quantizer<C>) -> Self {
+    fn quantize_weights(self, quantizer: &mut Quantizer) -> Self {
         self.map(quantizer)
     }
 }

diff --git a/crates/burn-core/src/module/quantize.rs b/crates/burn-core/src/module/quantize.rs
@@ -7,16 +7,16 @@ use burn_tensor::{
 use crate::module::{ModuleMapper, ParamId};
 
 /// Describes how to quantize a module.
-pub struct Quantizer<C: Calibration> {
+pub struct Quantizer {
     /// The calibration method used in quantization.
-    pub calibration: C,
+    pub calibration: Calibration,
     /// The quantization scheme.
     pub scheme: QuantizationScheme,
 }
 
-impl<B: Backend, C: Calibration> ModuleMapper<B> for Quantizer<C> {
+impl<B: Backend> ModuleMapper<B> for Quantizer {
     fn map_float<const D: usize>(&mut self, _id: ParamId, tensor: Tensor<B, D>) -> Tensor<B, D> {
-        let range = self.calibration.compute_range(&tensor);
+        let range = self.scheme.compute_range(&tensor, &self.calibration);
         let qparams = self.scheme.compute_q_params(range);
         tensor.quantize(&self.scheme, qparams)
     }