-
Notifications
You must be signed in to change notification settings - Fork 58
Open
Description
When reproducing the Matmul function on the TPUv3 described in the paper, we encountered the following error:
(llmcompass) root@testpc115159:/*/LLMCompass# python test-gemm.py
Matmul :
input1_shape: [512, 12288]
input2_shape: [12288, 12288]
Traceback (most recent call last):
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 3812, in get_loc
return self._engine.get_loc(casted_key)
File "pandas/_libs/index.pyx", line 167, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 196, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 2606, in pandas._libs.hashtable.Int64HashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 2630, in pandas._libs.hashtable.Int64HashTable.get_item
KeyError: 4096
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "pandas/_libs/index.pyx", line 786, in pandas._libs.index.BaseMultiIndexCodesEngine.get_loc
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 3819, in get_loc
raise KeyError(key) from err
KeyError: 4096
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexes/multi.py", line 3239, in _get_loc_level
return (self._engine.get_loc(key), None)
File "pandas/_libs/index.pyx", line 789, in pandas._libs.index.BaseMultiIndexCodesEngine.get_loc
KeyError: (32, 4096, 128, 128, 128, 'os')
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/*/LLMCompass/software_model/matmul.py", line 1417, in simulate_systolic_array_cycle_count
cycle_count = look_up_table.loc[
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexing.py", line 1184, in __getitem__
return self._getitem_tuple(key)
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexing.py", line 1368, in _getitem_tuple
return self._getitem_lowerdim(tup)
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexing.py", line 1041, in _getitem_lowerdim
return self._getitem_nested_tuple(tup)
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexing.py", line 1153, in _getitem_nested_tuple
obj = getattr(obj, self.name)._getitem_axis(key, axis=axis)
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexing.py", line 1431, in _getitem_axis
return self._get_label(key, axis=axis)
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexing.py", line 1381, in _get_label
return self.obj.xs(label, axis=axis)
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/generic.py", line 4312, in xs
loc, new_index = index._get_loc_level(key, level=0)
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexes/multi.py", line 3241, in _get_loc_level
raise KeyError(key) from err
KeyError: (32, 4096, 128, 128, 128, 'os')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 3812, in get_loc
return self._engine.get_loc(casted_key)
File "pandas/_libs/index.pyx", line 167, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 196, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 2606, in pandas._libs.hashtable.Int64HashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 2630, in pandas._libs.hashtable.Int64HashTable.get_item
KeyError: 4096
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "pandas/_libs/index.pyx", line 786, in pandas._libs.index.BaseMultiIndexCodesEngine.get_loc
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 3819, in get_loc
raise KeyError(key) from err
KeyError: 4096
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexes/multi.py", line 3239, in _get_loc_level
return (self._engine.get_loc(key), None)
File "pandas/_libs/index.pyx", line 789, in pandas._libs.index.BaseMultiIndexCodesEngine.get_loc
KeyError: (4096, 32, 128, 128, 128, 'os')
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/*/LLMCompass/software_model/matmul.py", line 1422, in simulate_systolic_array_cycle_count
cycle_count = look_up_table.loc[
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexing.py", line 1184, in __getitem__
return self._getitem_tuple(key)
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexing.py", line 1368, in _getitem_tuple
return self._getitem_lowerdim(tup)
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexing.py", line 1041, in _getitem_lowerdim
return self._getitem_nested_tuple(tup)
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexing.py", line 1153, in _getitem_nested_tuple
obj = getattr(obj, self.name)._getitem_axis(key, axis=axis)
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexing.py", line 1431, in _getitem_axis
return self._get_label(key, axis=axis)
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexing.py", line 1381, in _get_label
return self.obj.xs(label, axis=axis)
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/generic.py", line 4312, in xs
loc, new_index = index._get_loc_level(key, level=0)
File "/root/anaconda3/envs/llmcompass/lib/python3.9/site-packages/pandas/core/indexes/multi.py", line 3241, in _get_loc_level
raise KeyError(key) from err
KeyError: (4096, 32, 128, 128, 128, 'os')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/*/LLMCompass/test-gemm.py", line 47, in <module>
latency = (model.compile_and_simulate(pcb, compile_mode="heuristic-TPU-new")
File "/***/LLMCompass/software_model/matmul.py", line 730, in compile_and_simulate
cycle_count = self.simulate(
File "/*/LLMCompass/software_model/matmul.py", line 828, in simulate
l2_tiles[:M_l2_t, :N_l2_t, :K_l2_t] = self.L2TileSimulator(
File "/*/LLMCompass/software_model/matmul.py", line 1017, in __init__
self.compute_cycle_count = self.simulate_l2_tile_compute_cycle_count(
File "/*/LLMCompass/software_model/matmul.py", line 1080, in simulate_l2_tile_compute_cycle_count
l1_tiles[:M_l1_t, -1, :K_l1_t] = Matmul.L1TileSimulator(
File "/*/LLMCompass/software_model/matmul.py", line 1320, in __init__
self.compute_cycle_count = self.simulate_l1_tile_compute_cycle_count(
File "/*/LLMCompass/software_model/matmul.py", line 1350, in simulate_l1_tile_compute_cycle_count
Matmul.simulate_systolic_array_cycle_count(
File "/*/LLMCompass/software_model/matmul.py", line 1428, in simulate_systolic_array_cycle_count
with open(config, "w") as f:
FileNotFoundError: [Errno 2] No such file or directory: './systolic_array_model/temp/systolic_array_2481373.cfg'
(llmcompass) root@testpc115159:/*/LLMCompass#
The test code is as follows:
from software_model.softmax import Softmax
from software_model.matmul import Matmul
from software_model.utils import data_type_dict, Tensor
from hardware_model.device import device_dict
import sys
pcb = device_dict["TPUv3_new"]
model = Matmul(data_type=data_type_dict["fp16"])
_ = model(
Tensor([512,12288]),
Tensor([12288,12288]),)
# latency = (model.compile_and_simulate(pcb, compile_mode="heuristic-GPU") + 2.1e-5)
latency = (model.compile_and_simulate(pcb, compile_mode="heuristic-TPU-new") + 11e-5)
print("latency+overhead")
print(latency)
I want to ask how to solve this problem?
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels