quantize.py

# -*- coding: utf-8 -*-
"""quantize.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1iWgVX-94wT6nA2SBVfFkVvuJtRqxqn_e
"""

import numpy as np
from sklearn.cluster import KMeans
import torch
import torch.nn as nn
from pruned_layers import PrunedConv, PruneLinear

device = 'cuda' if torch.cuda.is_available() else 'cpu'

def quantize_whole_model(net, bits=5):
    """
    Quantize the whole model.
    :param net: (object) network model.
    :return: centroids of each weight layer, used in the quantization codebook.
    """
    # Create an empty list to store the cluster centers
    cluster_centers = []
    assert isinstance(net, nn.Module)
    layer_ind = 0

    for n, m in net.named_modules():
        if isinstance(m, PrunedConv):
            # Flatten the weights of the PrunedConv layer
            flat_weights = m.conv.weight.view(-1, 1).cpu().detach().numpy()

            # Apply K-Means clustering to the flattened weights
            kmeans = KMeans(n_clusters=2 ** bits, random_state=0)
            kmeans.fit(flat_weights)

            # Get the centroids of the clusters
            centroids = kmeans.cluster_centers_

            # Quantize the weights of the PrunedConv layer using the centroids
            m.conv.weight.data = torch.from_numpy(centroids[kmeans.labels_].reshape(m.conv.weight.shape)).to(device)

            # Append the centroids to the cluster_centers list
            cluster_centers.append(centroids)

            # Increment the layer index and print a progress message
            layer_ind += 1
            print(f"Complete {layer_ind} layers quantization for PrunedConv...")
        elif isinstance(m, PruneLinear):
            # Flatten the weights of the PruneLinear layer
            flat_weights = m.linear.weight.view(-1, 1).cpu().detach().numpy()

            # Apply K-Means clustering to the flattened weights
            kmeans = KMeans(n_clusters=2 ** bits, random_state=0)
            kmeans.fit(flat_weights)

            # Get the centroids of the clusters
            centroids = kmeans.cluster_centers_

            # Quantize the weights of the PruneLinear layer using the centroids
            m.linear.weight.data = torch.from_numpy(centroids[kmeans.labels_].reshape(m.linear.weight.shape)).to(device)

            # Append the centroids to the cluster_centers list
            cluster_centers.append(centroids)

            # Increment the layer index and print a progress message
            layer_ind += 1
            print(f"Complete {layer_ind} layers quantization for PruneLinear...")

    # Save the quantized model checkpoint
    torch.save(net.state_dict(), "net_after_quantization.pt")

    # Return the cluster centers as a NumPy array
    return np.array(cluster_centers)