Skip to content

Commit 12dd802

Browse files
committed
set default block size
Signed-off-by: jiqing-feng <jiqing.feng@intel.com>
1 parent fe55db5 commit 12dd802

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

optimum/exporters/ipex/cache_utils.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55
from transformers import Cache, PretrainedConfig
66

77

8+
# May need to tune based on sequence length and different models but default to 16 currently.
9+
BLOCK_SIZE = 16
10+
11+
812
class IPEXPagedCache(Cache):
913
"""
1014
A PagedCache that grows dynamically as more tokens are generated. everytime it grows block-size memory, vendor could set the pageCache memory layout.
@@ -44,7 +48,7 @@ def __init__(
4448
self.batch_size = batch_size
4549
# Used in `generate` to keep tally of how many tokens the cache has seen
4650
self._seen_tokens = torch.zeros([batch_size], dtype=torch.int32, device=device)
47-
self.block_size = 64
51+
self.block_size = BLOCK_SIZE
4852
self.num_blocks = (max_cache_len // self.block_size + (max_cache_len % self.block_size != 0)) * batch_size
4953
self.block_tables = -1 * torch.ones([self.num_blocks], dtype=torch.int32, device=device).reshape(
5054
batch_size, -1

0 commit comments

Comments
 (0)