From ab8bb9663377bd3cff58b1971a03500207b3be94 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Mon, 2 Feb 2026 11:09:52 +0800 Subject: [PATCH 1/7] Integerate dtype_generalizer. --- graph_net/tools/generate_subgraph_dataset.sh | 63 ++++++++++++++------ 1 file changed, 46 insertions(+), 17 deletions(-) diff --git a/graph_net/tools/generate_subgraph_dataset.sh b/graph_net/tools/generate_subgraph_dataset.sh index a56ecaaf6..c550933db 100755 --- a/graph_net/tools/generate_subgraph_dataset.sh +++ b/graph_net/tools/generate_subgraph_dataset.sh @@ -1,29 +1,32 @@ #!/bin/bash set -x -MIN_SEQ_OPS=${1:-16} +MIN_SEQ_OPS=${1:-4} MAX_SEQ_OPS=${2:-64} -GPU_ID=${3:-0} +GPU_ID=${3:-5} OP_RANGE=$MIN_SEQ_OPS-$MAX_SEQ_OPS export CUDA_VISIBLE_DEVICES="${GPU_ID}" +export PYTHONPATH=/work/GraphNet:/work/abstract_pass/Athena:$PYTHONPATH GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print(os.path.dirname(os.path.dirname(graph_net.__file__)))") RESUME="true" -DECOMPOSE_WORKSPACE=/tmp/subgraph_dataset_workspace -OP_NAMES_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/sample_op_names -RANGE_DECOMPOSE_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/range_decompose -GRAPH_VAR_RENAME_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/graph_var_renamed -DEDUPLICATED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/deduplicated -DEVICE_REWRITED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/device_rewrited -CUMSUM_NUM_KERNELS_DIR=$DECOMPOSE_WORKSPACE/cumsum_num_kernels -FUSIBLE_SUBGRAPH_RANGES_DIR=$DECOMPOSE_WORKSPACE/fusible_subgraph_ranges -FUSIBLE_SUBGRAPH_SAMPLES_DIR=$DECOMPOSE_WORKSPACE/fusible_subgraph_samples -RENAMED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/renamed_fusible_subgraphs -DEDUPLICATED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/deduplicated_fusible_subgraphs -UNITTESTS_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/unittests +DECOMPOSE_WORKSPACE=/work/graphnet_test_workspace/subgraph_dataset_20260202 +OP_NAMES_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/01_sample_op_names +SUBGRAPH_RANGES_JSON_ROOT=$DECOMPOSE_WORKSPACE/02_subgraph_ranges +RANGE_DECOMPOSE_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/03_range_decompose_subgraphs +GRAPH_VAR_RENAME_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/04_renamed_subgraphs +DEDUPLICATED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/05_deduplicated_subgraphs +DEVICE_REWRITED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/06_device_rewrited_subgraphs +CUMSUM_NUM_KERNELS_DIR=$DECOMPOSE_WORKSPACE/07_cumsum_num_kernels +FUSIBLE_SUBGRAPH_RANGES_DIR=$DECOMPOSE_WORKSPACE/08_fusible_subgraph_ranges +FUSIBLE_SUBGRAPH_SAMPLES_DIR=$DECOMPOSE_WORKSPACE/09_fusible_subgraph_samples +RENAMED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/10_renamed_fusible_subgraphs +DEDUPLICATED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/11_deduplicated_fusible_subgraphs +DTYPE_GENERALIZED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/12_dtype_generalized_fusible_subgraphs +UNITTESTS_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/13_kernelbench_unittests mkdir -p "$DECOMPOSE_WORKSPACE" @@ -77,7 +80,7 @@ function generate_split_point() { --sample-pass-config=$(base64 -w 0 <>> [10] Dimension generalizer for samples under ${DEDUPLICATED_FUSIBLE_SUBGRAPH_DIR}." + echo ">>>" + python3 -m graph_net.apply_sample_pass \ + --model-path-list $deduplicated_fusible_subgraphs_list \ + --sample-pass-file-path "$GRAPH_NET_ROOT/graph_net/torch/sample_pass/dtype_generalizer.py" \ + --sample-pass-class-name ApplyDataTypeGeneralizationPasses \ + --sample-pass-config $(base64 -w 0 <>> [10] Generate unittests for subgraph samples under ${DEDUPLICATED_FUSIBLE_SUBGRAPH_DIR}." + echo ">>> [11] Generate unittests for subgraph samples under ${DEDUPLICATED_FUSIBLE_SUBGRAPH_DIR}." echo ">>>" python3 -m graph_net.model_path_handler \ --model-path-list ${deduplicated_fusible_subgraphs_list} \ @@ -307,6 +333,9 @@ main() { remove_duplicate_fusible_graphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_fusible_graphs_${suffix}.txt generate_subgraph_list ${DEDUPLICATED_FUSIBLE_SUBGRAPH_DIR} ${deduplicated_fusible_subgraphs_list} + dtype_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_dtype_generalizer_${suffix}.txt + exit + generate_unittests 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_unittests_${suffix}.txt } From 7f06581482b434fd16d49f8fc82a8c0d6cadfe70 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Mon, 2 Feb 2026 13:49:40 +0800 Subject: [PATCH 2/7] Copy graph_net.json in SubgraphGenerator. --- graph_net/torch/sample_pass/subgraph_generator.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/graph_net/torch/sample_pass/subgraph_generator.py b/graph_net/torch/sample_pass/subgraph_generator.py index a014a8169..682a7bf69 100644 --- a/graph_net/torch/sample_pass/subgraph_generator.py +++ b/graph_net/torch/sample_pass/subgraph_generator.py @@ -115,6 +115,8 @@ def fn(submodule, seq_no): return fn def _choose_device(self, device) -> str: + if device is None: + return None if device in ["cpu", "cuda"]: return device return "cuda" if torch.cuda.is_available() else "cpu" @@ -165,6 +167,7 @@ def forward(self, *args): if not self.extracted: self.builtin_extractor(self.submodule, args) self._reset_tensor_metas_by_parent() + self._copy_graph_net_json_from_parent() self.extracted = True return self.submodule(*args) @@ -181,6 +184,14 @@ def _reset_tensor_metas_by_parent(self): const_file_path=parent_model_path / "weight_meta.py", ) + def _copy_graph_net_json_from_parent(self): + parent_model_path = ( + Path(self.parent_graph_model_path_root) / self.parent_graph_rel_model_path + ) + src_path = parent_model_path / "graph_net.json" + dst_path = self._get_model_path() / "graph_net.json" + shutil.copyfile(src_path, dst_path) + def _save_subgraph_sources(self): sources_json_obj = self._get_sources_json_obj() model_path = self._get_model_path() From 04a18588098b08ebe8c60d868685b7e9e9cd2d25 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Mon, 2 Feb 2026 20:09:22 +0800 Subject: [PATCH 3/7] Minor fix. --- graph_net/tools/generate_subgraph_dataset.sh | 122 +++++++++---------- 1 file changed, 56 insertions(+), 66 deletions(-) diff --git a/graph_net/tools/generate_subgraph_dataset.sh b/graph_net/tools/generate_subgraph_dataset.sh index b06287e7d..3def2c614 100755 --- a/graph_net/tools/generate_subgraph_dataset.sh +++ b/graph_net/tools/generate_subgraph_dataset.sh @@ -39,25 +39,17 @@ deduplicated_subgraph_list=${DECOMPOSE_WORKSPACE}/deduplicated_subgraph_sample_l fusible_subgraph_list=${DECOMPOSE_WORKSPACE}/fusible_subgraph_sample_list.txt deduplicated_fusible_subgraphs_list=${DECOMPOSE_WORKSPACE}/deduplicated_fusible_subgraph_sample_list.txt group_subgraph_sources_list=${DECOMPOSE_WORKSPACE}/group_subgraph_sources_sample_list.txt -dimension_generalizer_samples_list=dimension_generalizer_sample_list.txt +dimension_generalized_subgraph_list=${DECOMPOSE_WORKSPACE}/dimension_generalized_subgraph_sample_list.txt -function generate_subgraph_list_by_index() { +function generate_subgraph_list_common() { local target_dir="$1" - local sample_list_name="$2" - local max_index="$3" - echo ">>> Generate subgraph_sample_list for samples under ${target_dir} with index 0-${max_index}." + local sample_list="$2" + echo ">>> Generate subgraph_sample_list for samples under ${target_dir}." echo ">>>" - for index in $(seq 0 $max_index); do - local sample_list="${target_dir}/${index}/${sample_list_name}" - echo ">>> Generating list for index ${index}" - - find ${target_dir}/${index} -name "model.py" \ - | xargs dirname \ - | xargs realpath --relative-to=${target_dir}/${index} \ - | tee $sample_list - - echo "Generated: $sample_list" - done + find ${target_dir} -name "model.py" \ + | xargs dirname \ + | xargs realpath --relative-to=${target_dir} \ + | tee $sample_list } function generate_subgraph_list() { @@ -102,7 +94,7 @@ function dimension_generalizer(){ --sample-pass-class-name "ApplyDimGenPasses" \ --sample-pass-config $(base64 -w 0 <>> [3] Generate op_names.txt for samples in ${device_rewrited_sample_list}." + echo ">>> [3] Generate op_names.txt for samples in ${model_list}." echo ">>>" python3 -m graph_net.model_path_handler \ - --model-path-list $device_rewrited_sample_list \ + --model-path-list $model_list \ --handler-config=$(base64 -w 0 <>> [4] Generate split points for samples in ${device_rewrited_sample_list}." + echo ">>> [4] Generate subgraph_ranges.json for samples in ${model_list}." echo ">>> MIN_SEQ_OPS: ${MIN_SEQ_OPS}, MAX_SEQ_OPS: ${MAX_SEQ_OPS}" echo ">>>" python3 -m graph_net.apply_sample_pass \ - --model-path-list $device_rewrited_sample_list \ + --model-path-list $model_list \ --sample-pass-file-path $GRAPH_NET_ROOT/graph_net/torch/sample_pass/typical_sequence_split_points.py \ --sample-pass-class-name TypicalSequenceSplitPointsGenerator \ --sample-pass-config=$(base64 -w 0 <>> [8] Generate fusible subgraphs for subgraph samples under ${DEVICE_REWRITED_OUTPUT_DIR}." + echo ">>> [8] Generate fusible subgraphs for subgraph samples under ${DEDUPLICATED_OUTPUT_DIR}." echo ">>>" python3 -m graph_net.model_path_handler \ --use-subprocess \ @@ -321,18 +314,20 @@ function remove_duplicate_fusible_graphs() { } function subgraph_dimension_generalizer(){ - echo ">>> [11] Generating dimension_subgraph samples under ${DIMENSION_GENERALIZER_OUTPUT_DIR}." + echo ">>> [11] Generate dimension generalized subgraph samples under ${DIMENSION_GENERALIZED_OUTPUT_DIR}." for index in {0..8}; do - echo ">>> Generating dimension_subgraph variant index: ${index}" + echo ">>> Generating dimension generalized subgraph variant index: ${index}" + dimension_generalized_sample_list="${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index}/dimension_generalized_sample_list.txt" + generate_subgraph_list ${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index} ${dimension_generalized_samples_list} python3 -m graph_net.model_path_handler \ - --model-path-list "${DIMENSION_GENERALIZER_OUTPUT_DIR}/${index}/${dimension_generalizer_samples_list}" \ - --handler-config $(base64 -w 0 <>> [13] Generate unittests for subgraph samples under ${DIMENSION_SUBGRAPH_SAMPLES_OUTPUT_DIR}. " + echo ">>> [13] Generate unittests for subgraph samples under ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR}. " echo ">>>" - for index in {0..8}; do - echo ">>> Generating unittests variant index: ${index}" - python3 -m graph_net.model_path_handler \ - --model-path-list "${DIMENSION_SUBGRAPH_SAMPLES_OUTPUT_DIR}/${index}/dimension_subgraph_list.txt" \ - --handler-config=$(base64 -w 0 <&1 | tee ${DECOMPOSE_WORKSPACE}/log_rewrite_device_${suffix}.txt - generate_subgraph_list ${DEVICE_REWRITED_OUTPUT_DIR} ${device_rewrited_subgraph_list} - - dimension_generalizer 2>&1 | tee ${DIMENSION_GENERALIZER_OUTPUT_DIR}/log_dimension_generalizer_${suffix}.txt - generate_subgraph_list_by_index ${DIMENSION_GENERALIZER_OUTPUT_DIR} ${dimension_generalizer_samples_list} 8 - - generate_op_names 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_op_names_${suffix}.txt - generate_split_point 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_split_point_${suffix}.txt - range_decompose 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_range_decompose_${suffix}.txt - generate_subgraph_list ${RANGE_DECOMPOSE_OUTPUT_DIR} ${range_decomposed_subgraph_list} - - rename_decomposed_subgraph 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rename_decomposed_subgraph_${suffix}.txt - remove_duplicate_renamed_graphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_renamed_graphs_${suffix}.txt - generate_subgraph_list ${DEDUPLICATED_OUTPUT_DIR} ${deduplicated_subgraph_list} - - gen_fusible_subgraphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_fusible_subgraphs_${suffix}.txt - generate_subgraph_list ${FUSIBLE_SUBGRAPH_SAMPLES_DIR} ${fusible_subgraph_list} - - rename_fusible_subgraph 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rename_fusible_subgraph_${suffix}.txt - remove_duplicate_fusible_graphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_fusible_graphs_${suffix}.txt - generate_subgraph_list ${DEDUPLICATED_FUSIBLE_SUBGRAPH_DIR} ${deduplicated_fusible_subgraphs_list} - - get_dimension_subgraph 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_get_dimension_subgraph_${suffix}.txt - generate_subgraph_list_by_index ${DIMENSION_SUBGRAPH_SAMPLES_OUTPUT_DIR} dimension_subgraph_list.txt 8 + #rewrite_device 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rewrite_device_${suffix}.txt + #generate_subgraph_list ${DEVICE_REWRITED_OUTPUT_DIR} ${device_rewrited_sample_list} + + #dimension_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_dimension_generalizer_${suffix}.txt + + #generate_op_names 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_op_names_${suffix}.txt + #generate_split_point 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_split_point_${suffix}.txt + #range_decompose 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_range_decompose_${suffix}.txt + #generate_subgraph_list ${RANGE_DECOMPOSE_OUTPUT_DIR} ${range_decomposed_subgraph_list} + + #rename_decomposed_subgraph 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rename_decomposed_subgraph_${suffix}.txt + #remove_duplicate_renamed_graphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_renamed_graphs_${suffix}.txt + #generate_subgraph_list ${DEDUPLICATED_OUTPUT_DIR} ${deduplicated_subgraph_list} + + #gen_fusible_subgraphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_fusible_subgraphs_${suffix}.txt + #generate_subgraph_list ${FUSIBLE_SUBGRAPH_SAMPLES_DIR} ${fusible_subgraph_list} + + #rename_fusible_subgraph 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rename_fusible_subgraph_${suffix}.txt + #remove_duplicate_fusible_graphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_fusible_graphs_${suffix}.txt + #generate_subgraph_list ${DEDUPLICATED_FUSIBLE_SUBGRAPH_DIR} ${deduplicated_fusible_subgraphs_list} + + #subgraph_dimension_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_subgraph_dimension_generalizer_${suffix}.txt + #generate_subgraph_list_common ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR} ${dimension_generalized_subgraph_list} #dtype_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_dtype_generalizer_${suffix}.txt From 049bdc99fa9fae990b5fa12bc35d1d33587c2c50 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Tue, 3 Feb 2026 10:12:04 +0800 Subject: [PATCH 4/7] Remove the subgraph generation for original fusible samples. --- graph_net/tools/generate_subgraph_dataset.sh | 96 ++++++++------------ 1 file changed, 38 insertions(+), 58 deletions(-) diff --git a/graph_net/tools/generate_subgraph_dataset.sh b/graph_net/tools/generate_subgraph_dataset.sh index 3def2c614..6776d1d22 100755 --- a/graph_net/tools/generate_subgraph_dataset.sh +++ b/graph_net/tools/generate_subgraph_dataset.sh @@ -23,10 +23,11 @@ GRAPH_VAR_RENAME_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/06_renamed_subgraphs DEDUPLICATED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/07_deduplicated_subgraphs CUMSUM_NUM_KERNELS_DIR=$DECOMPOSE_WORKSPACE/08_cumsum_num_kernels FUSIBLE_SUBGRAPH_RANGES_DIR=$DECOMPOSE_WORKSPACE/09_fusible_subgraph_ranges -FUSIBLE_SUBGRAPH_SAMPLES_DIR=$DECOMPOSE_WORKSPACE/10_fusible_subgraph_samples -RENAMED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/11_renamed_fusible_subgraphs -DEDUPLICATED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/12_deduplicated_fusible_subgraphs -SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/13_dimension_generalized_subgraphs +GROUPED_FUSIBLE_SUBGRAPH_RANGES_DIR=$DECOMPOSE_WORKSPACE/10_grouped_fusible_subgraph_ranges +#FUSIBLE_SUBGRAPH_SAMPLES_DIR=$DECOMPOSE_WORKSPACE/10_fusible_subgraph_samples +SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/11_dimension_generalized_fusible_subgraphs +RENAMED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/12_renamed_dimension_generalized_fusible_subgraphs +DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/13_deduplicated_dimension_generalized_fusible_subgraphs #DTYPE_GENERALIZED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/12_dtype_generalized_fusible_subgraphs UNITTESTS_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/14_kernelbench_unittests @@ -36,14 +37,12 @@ model_list="$GRAPH_NET_ROOT/graph_net/config/small100_torch_samples_list.txt" device_rewrited_sample_list=${DECOMPOSE_WORKSPACE}/device_rewrited_sample_list.txt range_decomposed_subgraph_list=${DECOMPOSE_WORKSPACE}/range_decomposed_subgraph_sample_list.txt deduplicated_subgraph_list=${DECOMPOSE_WORKSPACE}/deduplicated_subgraph_sample_list.txt -fusible_subgraph_list=${DECOMPOSE_WORKSPACE}/fusible_subgraph_sample_list.txt -deduplicated_fusible_subgraphs_list=${DECOMPOSE_WORKSPACE}/deduplicated_fusible_subgraph_sample_list.txt -group_subgraph_sources_list=${DECOMPOSE_WORKSPACE}/group_subgraph_sources_sample_list.txt dimension_generalized_subgraph_list=${DECOMPOSE_WORKSPACE}/dimension_generalized_subgraph_sample_list.txt +deduplicated_fusible_subgraphs_list=${DECOMPOSE_WORKSPACE}/deduplicated_dimension_generalized_subgraph_sample_list.txt -function generate_subgraph_list_common() { +function generate_generalized_subgraph_list() { local target_dir="$1" - local sample_list="$2" + local sample_list="$2" echo ">>> Generate subgraph_sample_list for samples under ${target_dir}." echo ">>>" find ${target_dir} -name "model.py" \ @@ -260,16 +259,23 @@ EOF } EOF ) +} - python3 -m graph_net.model_path_handler \ - --model-path-list "$model_list" \ - --handler-config $(base64 -w 0 <>> [9] Generate dimension generalized subgraph samples under ${DIMENSION_GENERALIZED_OUTPUT_DIR}." + for index in {0..8}; do + echo ">>> Generating dimension generalized subgraph variant index: ${index}" + dimension_generalized_sample_list="${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index}/dimension_generalized_sample_list.txt" + generate_subgraph_list ${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index} ${dimension_generalized_samples_list} + python3 -m graph_net.model_path_handler \ + --model-path-list "${dimension_generalized_sample_list}" \ + --handler-config $(base64 -w 0 <>> [9] Rename subgraph samples under ${FUSIBLE_SUBGRAPH_SAMPLES_DIR}." +function rename_dimension_generalized_fusible_subgraph() { + echo ">>> [10] Rename subgraph samples under ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR}." echo ">>>" python3 -m graph_net.model_path_handler \ - --model-path-list ${fusible_subgraph_list} \ + --model-path-list ${dimension_generalized_subgraph_list} \ --handler-config=$(base64 -w 0 <>> [10] Remove duplicated subgraph samples under ${RENAMED_FUSIBLE_SUBGRAPH_DIR}." +function remove_duplicate_dimension_generalized_fusible_graphs() { + echo ">>> [11] Remove duplicated subgraph samples under ${RENAMED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}." echo ">>>" - python3 -m graph_net.tools.deduplicated \ - --samples-dir ${RENAMED_FUSIBLE_SUBGRAPH_DIR} \ - --target-dir ${DEDUPLICATED_FUSIBLE_SUBGRAPH_DIR} -} - -function subgraph_dimension_generalizer(){ - echo ">>> [11] Generate dimension generalized subgraph samples under ${DIMENSION_GENERALIZED_OUTPUT_DIR}." for index in {0..8}; do - echo ">>> Generating dimension generalized subgraph variant index: ${index}" - dimension_generalized_sample_list="${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index}/dimension_generalized_sample_list.txt" - generate_subgraph_list ${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index} ${dimension_generalized_samples_list} - python3 -m graph_net.model_path_handler \ - --model-path-list "${dimension_generalized_sample_list}" \ - --handler-config $(base64 -w 0 <>> [13] Generate unittests for subgraph samples under ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR}. " + echo ">>> [12] Generate unittests for subgraph samples under ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}. " echo ">>>" python3 -m graph_net.model_path_handler \ - --model-path-list ${dimension_generalized_subgraph_list} \ + --model-path-list ${deduplicated_fusible_subgraphs_list} \ --handler-config=$(base64 -w 0 <&1 | tee ${DECOMPOSE_WORKSPACE}/log_fusible_subgraphs_${suffix}.txt - #generate_subgraph_list ${FUSIBLE_SUBGRAPH_SAMPLES_DIR} ${fusible_subgraph_list} - - #rename_fusible_subgraph 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rename_fusible_subgraph_${suffix}.txt - #remove_duplicate_fusible_graphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_fusible_graphs_${suffix}.txt - #generate_subgraph_list ${DEDUPLICATED_FUSIBLE_SUBGRAPH_DIR} ${deduplicated_fusible_subgraphs_list} #subgraph_dimension_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_subgraph_dimension_generalizer_${suffix}.txt - #generate_subgraph_list_common ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR} ${dimension_generalized_subgraph_list} + #generate_generalized_subgraph_list ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR} ${dimension_generalized_subgraph_list} - #dtype_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_dtype_generalizer_${suffix}.txt + #rename_dimension_generalized_fusible_subgraph 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rename_dimension_generalized_subgraph_${suffix}.txt + #remove_duplicate_dimension_generalized_fusible_graphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_dimension_generalized_subgraphs_${suffix}.txt + #generate_generalized_subgraph_list ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} ${deduplicated_fusible_subgraphs_list} generate_unittests 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_unittests_${suffix}.txt } From bfd1d9a3bfca3948a564e2e62cebfdee2028489a Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Tue, 3 Feb 2026 14:46:13 +0800 Subject: [PATCH 5/7] Merge develop. --- graph_net/tools/generate_subgraph_dataset.sh | 42 ++- graph_net_visual/plot_ESt.py | 2 + graph_net_visual/plot_St.py | 2 + sqlite/GraphNet.db | Bin 0 -> 126976 bytes sqlite/Readme.md | 14 + sqlite/graphsample_insert.py | 352 ++++++++++++++++++ sqlite/graphsample_insert.sh | 89 +++++ sqlite/init_db.py | 65 ++++ .../create_main_tables_2026-02-02-031353.sql | 92 +++++ sqlite/orm_models.py | 235 ++++++++++++ 10 files changed, 873 insertions(+), 20 deletions(-) create mode 100644 sqlite/GraphNet.db create mode 100644 sqlite/Readme.md create mode 100644 sqlite/graphsample_insert.py create mode 100644 sqlite/graphsample_insert.sh create mode 100644 sqlite/init_db.py create mode 100644 sqlite/migrates/create_main_tables_2026-02-02-031353.sql create mode 100644 sqlite/orm_models.py diff --git a/graph_net/tools/generate_subgraph_dataset.sh b/graph_net/tools/generate_subgraph_dataset.sh index 6776d1d22..fc5bdf054 100755 --- a/graph_net/tools/generate_subgraph_dataset.sh +++ b/graph_net/tools/generate_subgraph_dataset.sh @@ -13,6 +13,7 @@ export PYTHONPATH=/work/GraphNet:/work/abstract_pass/Athena:$PYTHONPATH GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print(os.path.dirname(os.path.dirname(graph_net.__file__)))") RESUME="true" +#DECOMPOSE_WORKSPACE=/tmp/subgraph_dataset_workspace DECOMPOSE_WORKSPACE=/work/graphnet_test_workspace/subgraph_dataset_20260202 DEVICE_REWRITED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/01_device_rewrited_samples DIMENSION_GENERALIZED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/02_dimension_generalized_samples @@ -24,11 +25,10 @@ DEDUPLICATED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/07_deduplicated_subgraphs CUMSUM_NUM_KERNELS_DIR=$DECOMPOSE_WORKSPACE/08_cumsum_num_kernels FUSIBLE_SUBGRAPH_RANGES_DIR=$DECOMPOSE_WORKSPACE/09_fusible_subgraph_ranges GROUPED_FUSIBLE_SUBGRAPH_RANGES_DIR=$DECOMPOSE_WORKSPACE/10_grouped_fusible_subgraph_ranges -#FUSIBLE_SUBGRAPH_SAMPLES_DIR=$DECOMPOSE_WORKSPACE/10_fusible_subgraph_samples SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/11_dimension_generalized_fusible_subgraphs RENAMED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/12_renamed_dimension_generalized_fusible_subgraphs DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/13_deduplicated_dimension_generalized_fusible_subgraphs -#DTYPE_GENERALIZED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/12_dtype_generalized_fusible_subgraphs +#DTYPE_GENERALIZED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/14_dtype_generalized_fusible_subgraphs UNITTESTS_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/14_kernelbench_unittests mkdir -p "$DECOMPOSE_WORKSPACE" @@ -207,7 +207,7 @@ function remove_duplicate_renamed_graphs() { --target-dir ${DEDUPLICATED_OUTPUT_DIR} } -function gen_fusible_subgraphs() { +function gen_fusible_subgraph_ranges() { echo ">>> [8] Generate fusible subgraphs for subgraph samples under ${DEDUPLICATED_OUTPUT_DIR}." echo ">>>" python3 -m graph_net.model_path_handler \ @@ -373,29 +373,31 @@ main() { timestamp=`date +%Y%m%d_%H%M` suffix="${OP_RANGE}ops_${timestamp}" - #rewrite_device 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rewrite_device_${suffix}.txt - #generate_subgraph_list ${DEVICE_REWRITED_OUTPUT_DIR} ${device_rewrited_sample_list} + # rewrite the device in model to cuda + rewrite_device 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rewrite_device_${suffix}.txt + generate_subgraph_list ${DEVICE_REWRITED_OUTPUT_DIR} ${device_rewrited_sample_list} - #dimension_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_dimension_generalizer_${suffix}.txt + # whole-graph dimension generalization + dimension_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_dimension_generalizer_${suffix}.txt - #generate_op_names 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_op_names_${suffix}.txt - #generate_split_point 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_split_point_${suffix}.txt - #range_decompose 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_range_decompose_${suffix}.txt - #generate_subgraph_list ${RANGE_DECOMPOSE_OUTPUT_DIR} ${range_decomposed_subgraph_list} + # typical subgraph decomposition + generate_op_names 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_op_names_${suffix}.txt + generate_split_point 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_split_point_${suffix}.txt + range_decompose 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_range_decompose_${suffix}.txt + generate_subgraph_list ${RANGE_DECOMPOSE_OUTPUT_DIR} ${range_decomposed_subgraph_list} - #rename_decomposed_subgraph 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rename_decomposed_subgraph_${suffix}.txt - #remove_duplicate_renamed_graphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_renamed_graphs_${suffix}.txt - #generate_subgraph_list ${DEDUPLICATED_OUTPUT_DIR} ${deduplicated_subgraph_list} + # generate fusible subgraph ranges + gen_fusible_subgraph_ranges 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_fusible_subgraphs_${suffix}.txt - #gen_fusible_subgraphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_fusible_subgraphs_${suffix}.txt + # subgraph dimension generalization + subgraph_dimension_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_subgraph_dimension_generalizer_${suffix}.txt + generate_generalized_subgraph_list ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR} ${dimension_generalized_subgraph_list} - #subgraph_dimension_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_subgraph_dimension_generalizer_${suffix}.txt - #generate_generalized_subgraph_list ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR} ${dimension_generalized_subgraph_list} - - #rename_dimension_generalized_fusible_subgraph 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rename_dimension_generalized_subgraph_${suffix}.txt - #remove_duplicate_dimension_generalized_fusible_graphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_dimension_generalized_subgraphs_${suffix}.txt - #generate_generalized_subgraph_list ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} ${deduplicated_fusible_subgraphs_list} + rename_dimension_generalized_fusible_subgraph 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rename_dimension_generalized_subgraph_${suffix}.txt + remove_duplicate_dimension_generalized_fusible_graphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_dimension_generalized_subgraphs_${suffix}.txt + generate_generalized_subgraph_list ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} ${deduplicated_fusible_subgraphs_list} + # generate kernelbench format unittest generate_unittests 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_unittests_${suffix}.txt } diff --git a/graph_net_visual/plot_ESt.py b/graph_net_visual/plot_ESt.py index 4427f3715..f44a57f4f 100644 --- a/graph_net_visual/plot_ESt.py +++ b/graph_net_visual/plot_ESt.py @@ -253,6 +253,8 @@ def plot_ES_results(s_scores: dict, args: argparse.Namespace): ax.xaxis.grid(True, which="major", lw=0.7, ls=":", color="grey", alpha=0.5) ax.yaxis.grid(True, which="major", lw=0.7, ls=":", color="grey", alpha=0.5) + ax.set_ylim(bottom=0) + return fig, ax, all_x_coords diff --git a/graph_net_visual/plot_St.py b/graph_net_visual/plot_St.py index 27194ccd9..7e80b573f 100644 --- a/graph_net_visual/plot_St.py +++ b/graph_net_visual/plot_St.py @@ -56,6 +56,8 @@ def plot_St(s_scores: dict, cli_args: argparse.Namespace): ax.xaxis.grid(True, which="major", lw=0.8, ls=":", color="grey", alpha=0.5) ax.yaxis.grid(True, which="major", lw=0.8, ls=":", color="grey", alpha=0.5) + ax.set_ylim(bottom=0) + ax.legend(fontsize=16, loc="best") output_file = os.path.join(cli_args.output_dir, "St_result.png") plt.savefig(output_file, dpi=300, bbox_inches="tight") diff --git a/sqlite/GraphNet.db b/sqlite/GraphNet.db new file mode 100644 index 0000000000000000000000000000000000000000..cefb9086a0cfb763fc314b40bc6a92f109fac6d6 GIT binary patch literal 126976 zcmeI5Yiwjkc9^@_B%61#t)j4&(dfZ$0oUXq9{FSw)pADN^8>TtS3&#q@ zV&N~(&K3%Vr{Ld@z`xzsDR?uw`v?4A?0P@w?Nfz?&%8I5X0HEPn&$`F{|(BH$B_UM zKmter2_OL^fCP{L5IVKOWscEP(`&01`j~ zNB{{S0VIF~kN^@u0!RP}eDDMwE9qkH(}t0mlAPQ*)AQ7zD>nZ}FgE{#Z!y$}1dsp{ zKmter2_OL^fCP{L5c7i znbr?lm`$Xae9+u^%ebM507ZrbMG^rug!Y5(Q%*DKM8(2_`2uPmTNMc*Ef^PB9)nmp zo@ELu<|KBC19&2=5z;xX;>3oS1DCNZ6X0<~au+izD1i#Pm{2J>1A4JZM$=3}C6opU z21SvuDi{mB2%6GZCkb_NDb3_ulPn4D?V+zF*Pl%CL zhzOc4@-_%ZC3H_HkgfkGuNCS~)L*FnQT6ZFsDBL9<*kSVDQl2Q#NPaS~)l*>Q*5T}47ZIF6nv~})~rKi0= z9R!LuWbFv#)2J;U)QIY0$rXD)55Dtx?-v)Zt+~yMzB#|VxP9N=NT?Nm1GuRQS3#mtcc=ztym2+wT=(6Rfm0xX`V3q51P-uLvzaU3z0sk?u-vZ$v4xV!FSc^!+TyG0Fu~7fROTHRn2cT; zZPoMhzqok8Y&%$y&tEdzO~18zVP$20)o+7dN#-wITiMg9OHY+R$lSzCj)2cqFN z?>=Z4({7CgpU&Z1@XRE=xUf4TPn2#>9^^9A_WysWQ2E`;Z&!Y!^7ksgTKQX*cPhVB z`Nhgxl`qx)XMM4{RQvVnFVz2W{eJDG%I{UbUipjK%-;15-ys1cfCP{L5vsHd730{;KV@t_qofl|h}N~nFd!nRgyXt z$;d!a0q2b-F81K$mUOahk*2YxB2qL_-p~*rG9W1Gg^Q^ov9b{H0FeWVX&V_Hf$P}D zL4sEtod00Fn8>6A8o1yD43Qwk1qa99U3rD8!MA6oZ_CX)wad zawa7KXFdcL7^h`WQ83`KPLv>&gC!pJxRWjdKuU553;;xw5hVh%Au>k5j~vgWNJBXX z3I|zDFe|oI7T>l|H0)A;T7Oc2Y)?;b3m_P;LV1_RVgV>MJ1>#JV^g4>2?b zgsW)bj1$mI%yA>3Z$ab;J9(pX=|{|FHhA>;IzuPhr*n zHTY(Le^~!@hz0oN`Y+Xgq5iY=Hy}H{kN^@u0!RP}AOR$R1dsp{Kmter2_S)k5Ev^J zi)Uui^z@lDJ#{)wPo7HC6DQO3_=z+reou2Iy#o7rO`AUDUFsUAuUZzX6yg5E7ZPTKT|DM-mPA)k=mP;SE_%z z`px=w{hv+!-Ljkf^z@%kpPKwf6VI3bHIVL=BW8ayhP}>_Qo~QQAk*Mv79?w&IdV8$(nZt(M6;ht3^VCt8+8bw{r}pGg zsO=3t&r^HiC#?n|*3*ZswG+FZr?TE0Yn?l6mE8gD zc}id9haUPJfACa^0T8G14RxDsm~QP@$Bf@KB961?Q;c{uaCYlP*As-`Qi7uGRLos zzA{keK0*1p!p1I+ngPOcL-liojQ-SEt3E)=qXh8h3NKw9eeI#4e&DeF9f3VN_>X*g z^rk#etbbnVD%_e)PrqHL|BX7S{i|A1{dV=8>O$rBD_@v?9}eRS2_OL^fCP{L5~9qT!Pa`)yn}Ov~+;`?Zh-_)I{oSW?>6LWb;oI84h)o~MN> zh|m{dWVdC8ZJjtwOW~Wva-SX%FKGCbN)(0St{y#P_r;-lzLqmSK@U^#5soPJ|4-Lm zF2G*@Z`Zz3d$0bD+PCW)^>^!^fPMd8KGKN}g(CqZfCP{L5M0j~eQCkGoEg#?fQ5I;AOR$R1dsp{Kmter z2_OL^fCP{L5#jvLj?a%)Y*=_*W{~v|s#5y1WB!C2v01`j~NB{{S0VIF~ zkN^_MC2;ubfB5`=t|mN)1dsp{Kmter2_OL^fCP{L5^m?rUMfw(&H(NG z|5%~^D|L3rnot}PKmter2_OL^fCP{L50*AK#ua@5_RL8ztsL#~@P37O# z{R%^xnZYk7gm?f z-B_Bvaqj$;rP(2}X3tK|!mFdlCyiTM=HBhb{rhXl>QeZg%DBjoFo( zSFX%nTfMw|ZuRrC&nTt?bq*;|NDZvr?YnpDMOy*6t?$)wSf+M!3J;-E`ac?T&3( zZ>#Qfr^_oBmtLIB$v=B_W%i*mvu7WYId^xuJhKQ|>m99~A}PDhAMGAq>C@TCkoC(v zjXgQ${_5u^%QK()RPpE5vJure*_Qv%pHn>}=An~2BcyZmQ3l3C6!OPYyW4D;ySIE} zyJ@zX14ctThrgQ)IRD{|mL4`k`ghJCd5F>w0aJ5NPmRw!|EXdj>q^_NtgoeW#Qo-G z_IIOGnRZh*=tBplTR$`blkF#qt?OBV`>!;%{5s694}4=|6DE+xJ=44m$L?)5pl5au z^U0lla&zVK^Ea2E8PX9kq-T-P zpozXQ(v`7aYuCxSpB^8dnfqk%^^S_R+}mdRcDIIdpN74B{R;NZ&3$pKJo6OT@S`0C zIW~7|8t>DTQ=ngEPNumx%cb(nCqG$y>-lWpcWdc>Kf0r_eUa=wvoqcwWti_a-oE3z zzmLt{Sb7mIsXLd|_P}UgPa4hF?)i>1N%k(j?Obcyoo>G0rS9(ijmGA^?aq9$d-0s7 zZ82@L(`N7J>bcUlb9MEcT}kZ|AKrws3%qBlNDso`Y7g7FsX0A5o(}T?(?oZZH)!`? zV=Y-M&zwG8{L*sPg;J5}zmc9UlOEaWD!V%KA@<)cW%-A+aF*w;+1Tp^IW5yxytSTn zq^Y?lM#g8JIz7nXEHh2h65L%(q<0vd_^0Ryw7FQ$(AD`^__IicZ-=l zo0EI4Re20NI8Z^}U%__i)IE|b@GlLn*Ivnes`HT0RL@*%@(0Q@F;r)`0)Ha{X}{^KRX?I|4@t^HjgeOZ?Nni6f!k;d1id(B|XS3^H8;Roz}UB ze-P%N%yH8qr(puQTx@-=GawSvG^tN1ZvyF@>x8v*hZD{8jge0N9COpr(m9_iZ@`oi zpDNE>hbp`NDtE;6L?7+1<@DfM=ZiZ?#q&$f{p88R4?8s^&m@Ftmj6d%kS(R%zi zT;ZN8wk~u=M}L&ukWju8eIue@L^cTak1m`&+h1(1C61M6u0s8OvQxjk+T`?ty^|fP z-BZr-zN+pO$dPuSTIZCTYdu{r&)kG+LwMp&u-l%ZPR)8}o&D(?+o$IF0uNOA{GxL& z)y8LDd2Zmuvwu}#=Gi|(J>CDmaO@CoY*7>vKmter2_OL^fCP{L5ELb5VfCP{L54RPFL?SgK(hVlV{p~gQa`Sd$l#b0LWWkQBxNF)9H|EQ^fQR%)#&b&N;k1F$1bTSL+Yfvuv~-0nBF&(`vc zpajM*ZC$LTMx5Iu~)%y?Q!FYRUDBh60W>ovk5b*+Z zC@ppCVztK%&3zJc$ApKU#PB%sENH`x<0i!PgJ_GWZ3b8)ddy&zJTn*(oJ6hbmFlu~ zKRQutWdq#Bh6v^Yk0leta-*4|E>Sk&!BE9^TH~Hc5*T?qQ*G~Gi`TM~FD_nNbDI}^ zbAEZT^Nrci*LS}f{x3Z9PC2SzV&^PTR7jyMvyrooSW2}@Y@*-{u{IHLLSjx#kUR1*WbvlW7BzPi(GBJyjgi6P>R8l=!dtKO^zj%4&>inuCdv>v| z7L#!zV8U@O1W*O@rHo;;QkGa2^N?8R_ev3HK_?_})POM-sw7zyDd%Q%Orsk9k}ta6*kck%*(Mzpu1>FU}|`je%Nj?Xss7`W8G}s@{LzXL++W_ z-6>-lQAdnTtc80I3;ocz1TGgMVu47>G>ieNBy}p1kpa5|oHv@d*y|XMPvn`%A|Y7R zdT|0w+}JY_W<8~$S3p5wosVJG<548jVds-aYr5;+T-w~)fUkA8X4~(X*4=tu(^yjx zDViv6Xb5n*3yON-VyZ~2EORgjL9w=x;SsaM#z8`5U^<2lEXw_+sRfV*^F-@~GMKj3 zG0g|fowtk|nh5BRh6F_t0izFY-FWgyt#{quYMVBHUhEmx-DZIX4s#jwMhS+wU}2h| zK7uJrL_xwGfV1AwIIy7LP>3TJDF!(O)6m#fj^`OBnBrQt%s3eKM#nInz|>=5h8HlK z#8yGWN|>c=WO)n?D8JKgxxHavUW9JR2lUGXMmby?jnguyC>X&_q6DEF`lE;WR=Nnr zppqP>IT(eM5hVh%Au>k5j~vUhkq}0?YW*m&*Q{Ik!bBah9WCynJRdkxs#Go_)N!}JG}M~GmiOt^}iutZ4Dqu59ph^E0Y z!eQvc?GD^OGZ=6}#4v?QAxHa-lM6CZ5h1PBQ83Qz83*%_Ba*wASwRVmKo=7#C1-He zi%oKH<5sR*TYPmL78mo`oHFlV;3Zv?C81g;4(^FrlqA}OKsfa#4sg#8H*AU-7!hC# z^j!*LMF$4gE$f1E5wS#CQ_3@nYFLPf*2|^ZvToHoRtc3*nqEATC=yl$V_|Uw!i{y3 zP!}KEsuve8m~CegKYz(=H~rS?g_V{0Rlf~wD9QY#Yb$##sJo1XeisWVX~LOjk%l`t zYkfkfO+wmlgVrq83V>Lb2ub1U5D}v-hf`ELWk>pr6$=XPdPUUwsS&u*dM&ec5|)I{ zMP5VqXR!7V(1R^ZDlUQRAKYh|VAYPTgO`LG*##UL?ty5y&ASg8#xL z-7bn%>$62L?`CG+1eh07XgBB>#7GM_IdB^pc?;KE3cVwMsYA^hu;T49#zcX^yaiLc zN{Ht)9Y+lMorL9&b+CZdHin5YhE@~KH~~{8gS%J>*F#vba@B7PUl3fvU2(epAFce? zLj9@QcJ(*m4Ze^75;TUT zV~z?6>sJBJFnAVeVpzH+5n&t_zTfG(^Wb)*i|^D!3H}Rs+hfP+JUhUhDj01oo$j&2 zbE3hk>JY@0FPus33C#jacS$}DR|6yfJJ$%BJbSMw};BQl~N7W+o?|G8Nnk7k01X(aV07~ literal 0 HcmV?d00001 diff --git a/sqlite/Readme.md b/sqlite/Readme.md new file mode 100644 index 000000000..2ffe098f4 --- /dev/null +++ b/sqlite/Readme.md @@ -0,0 +1,14 @@ +work under /GraphNet/ + +mkdir -p sqlite/logs + +# migrate database +# Use default database path +python ./sqlite/init_db.py + +# Specify custom database path +python ./sqlite/init_db.py --db_path sqlite/GraphNet.db + + +# Add data to database +bash ./sqlite/graphsample_insert.sh | tee "sqlite/logs/insert_$(date +'%Y-%m-%d-%H%M%S').log" diff --git a/sqlite/graphsample_insert.py b/sqlite/graphsample_insert.py new file mode 100644 index 000000000..faecbea95 --- /dev/null +++ b/sqlite/graphsample_insert.py @@ -0,0 +1,352 @@ +import sqlite3 +import json +import argparse +from pathlib import Path +from datetime import datetime +import uuid as uuid_lib +import re +from orm_models import ( + get_session, + GraphSample, + SubgraphSource, + DimensionGeneralizationSource, + DataTypeGeneralizationSource, +) +from sqlalchemy.exc import IntegrityError + + +# graph_sample insert func +def get_graph_sample_data( + model_path_prefix: str, + relative_model_path: str, + repo_uid: str, + sample_type: str, + order_value: int, +) -> dict: + model_path = Path(model_path_prefix) / relative_model_path + data = { + "uuid": _get_uuid(), + "repo_uid": repo_uid, + "relative_model_path": relative_model_path, + "sample_type": sample_type, + "is_subgraph": _is_subgraph(sample_type), + "num_ops": _get_num_ops(model_path, sample_type), + "graph_hash": _get_graph_hash(model_path), + "order_value": order_value, + "create_at": _get_create_at(), + "deleted": False, + "delete_at": None, + } + return data + + +def insert_graph_sample(db_path: str, data: dict, model_path_prefix: str): + session = get_session(db_path) + try: + graph_sample = GraphSample(**data) + session.add(graph_sample) + session.commit() + return graph_sample + except IntegrityError as e: + session.rollback() + raise e + finally: + session.close() + + +# subgraph source insert func +def insert_subgraph_source( + subgraph_uuid: str, model_path_prefix: str, relative_model_path: str, db_path: str +): + session = get_session(db_path) + try: + parent_relative_path = get_parent_relative_path(relative_model_path) + full_graph = ( + session.query(GraphSample) + .filter( + GraphSample.relative_model_path == parent_relative_path, + GraphSample.sample_type == "full_graph", + ) + .first() + ) + + if not full_graph: + raise ValueError(f"Full graph not found for path: {parent_relative_path}") + + range_info = _get_range_info(model_path_prefix, relative_model_path) + subgraph_source = SubgraphSource( + subgraph_uuid=subgraph_uuid, + full_graph_uuid=full_graph.uuid, + range_start=range_info["start"], + range_end=range_info["end"], + create_at=datetime.now(), + deleted=False, + delete_at=None, + ) + session.add(subgraph_source) + session.commit() + + return { + "subgraph_uuid": subgraph_source.subgraph_uuid, + "full_graph_uuid": subgraph_source.full_graph_uuid, + "range_start": subgraph_source.range_start, + "range_end": subgraph_source.range_end, + } + except IntegrityError as e: + session.rollback() + raise e + finally: + session.close() + + +def _get_range_info(model_path_prefix: str, relative_model_path: str): + model_path = Path(model_path_prefix) / relative_model_path + subgraph_sources_file = model_path / "subgraph_sources.json" + if not subgraph_sources_file.exists(): + return {"start": -1, "end": -1} + + try: + with open(subgraph_sources_file) as f: + data = json.load(f) + for key, ranges in data.items(): + if isinstance(ranges, list): + r = ranges[0] + if isinstance(r, list) and len(r) == 2: + return {"start": r[0], "end": r[1]} + return {"start": -1, "end": -1} + except (json.JSONDecodeError, KeyError, TypeError, IndexError) as e: + print(f"Warning: Failed to parse {subgraph_sources_file}: {e}") + return {"start": -1, "end": -1} + + +def get_parent_relative_path(relative_path: str) -> str: + if "_decomposed" not in relative_path: + return None + + parts = relative_path.split("/") + if len(parts) < 2: + return None + + parent_parts = [] + for part in parts: + if part == "_decomposed": + break + parent_parts.append(part) + + return "/".join(parent_parts) + + +# full_graph insert func +def _get_uuid() -> str: + return uuid_lib.uuid4().hex + + +def _is_subgraph(sample_type: str) -> bool: + return sample_type not in ("full_graph") + + +def _get_num_ops(model_path: Path, sample_type: str): + if sample_type == "full_graph": + return -1 + subgraph_sources_file = model_path / "subgraph_sources.json" + if not subgraph_sources_file.exists(): + return -1 + + try: + with open(subgraph_sources_file) as f: + data = json.load(f) + for key, ranges in data.items(): + if isinstance(ranges, list): + r = ranges[0] + if isinstance(r, list) and len(r) == 2: + return r[1] - r[0] + + return -1 + except (json.JSONDecodeError, KeyError, TypeError, IndexError) as e: + print(f"Warning: Failed to parse {subgraph_sources_file}: {e}") + return -1 + + +def _get_graph_hash(model_path: Path) -> str: + hash_file = model_path / "graph_hash.txt" + if hash_file.exists(): + return hash_file.read_text().strip() + return "" + + +def _get_create_at() -> datetime: + return datetime.now() + + +# DimensionGeneralizationSource insert func +def insert_dimension_generalization_source( + generalized_graph_uuid: str, + original_graph_uuid: str, + model_path_prefix: str, + relative_model_path: str, + db_path: str, +): + session = get_session(db_path) + try: + dimension_source = DimensionGeneralizationSource( + generalized_graph_uuid=generalized_graph_uuid, + original_graph_uuid=original_graph_uuid, + total_element_size=_get_total_element_size( + model_path_prefix, relative_model_path + ), + create_at=datetime.now(), + deleted=False, + delete_at=None, + ) + session.add(dimension_source) + session.commit() + except IntegrityError as e: + session.rollback() + raise e + finally: + session.close() + + +def _get_total_element_size(model_path_prefix: str, relative_model_path: str): + model_path = Path(model_path_prefix) / relative_model_path + weight_meta_file = model_path / "weight_meta.py" + try: + with open(weight_meta_file) as f: + content = f.read() + + shape_matches = re.findall( + r"shape\s*=\s*\[([0-9,\s\.]+(?:\d+)?[^\]]+)\s*\]", content + ) + total_element_size = 0 + for match in shape_matches: + shape_str = match.strip() + shape_element_size = 1 + numbers = re.findall(r"[0-9]+(?:\.[0-9]+)?", shape_str) + for num_str in numbers: + num = float(num_str) if "." in num_str else int(num_str) + shape_element_size *= num + + total_element_size += shape_element_size + + return total_element_size + except Exception as e: + print(f"Warning: Failed to parse {weight_meta_file}: {e}") + return -1 + + +# DataTypeGeneralizationSource insert func +def insert_datatype_generalization_source( + generalized_graph_uuid: str, + original_graph_uuid: str, + model_path_prefix: str, + relative_model_path: str, + db_path: str, +): + session = get_session(db_path) + try: + data_type_source = DataTypeGeneralizationSource( + generalized_graph_uuid=generalized_graph_uuid, + original_graph_uuid=original_graph_uuid, + data_type=_get_data_type(model_path_prefix, relative_model_path), + create_at=datetime.now(), + deleted=False, + delete_at=None, + ) + session.add(data_type_source) + session.commit() + except IntegrityError as e: + session.rollback() + raise e + finally: + session.close() + + +def _get_data_type(model_path_prefix: str, relative_model_path: str): + return "todo" + + +# main func +def main(args): + data = get_graph_sample_data( + model_path_prefix=args.model_path_prefix, + relative_model_path=args.relative_model_path, + repo_uid=args.repo_uid, + sample_type=args.sample_type, + order_value=args.order_value, + ) + print(f"\ninsert into database: {args.db_path}") + try: + insert_graph_sample(args.db_path, data, args.model_path_prefix) + if data["is_subgraph"]: + subgraph_source_data = insert_subgraph_source( + data["uuid"], + args.model_path_prefix, + data["relative_model_path"], + args.db_path, + ) + if args.sample_type in ["fusible_graph"]: + insert_dimension_generalization_source( + subgraph_source_data["subgraph_uuid"], + subgraph_source_data["full_graph_uuid"], + args.model_path_prefix, + args.relative_model_path, + args.db_path, + ) + insert_datatype_generalization_source( + subgraph_source_data["subgraph_uuid"], + subgraph_source_data["full_graph_uuid"], + args.model_path_prefix, + args.relative_model_path, + args.db_path, + ) + print(f"success insert: {data['relative_model_path']}") + except sqlite3.IntegrityError as e: + print("insert failed: integrity error (possible duplicate uuid or graph_hash)") + print(f"error info: {e}") + except Exception as e: + print(f"insert failed: {e}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="insert graph sample to database") + parser.add_argument( + "--model_path_prefix", + type=str, + required=True, + default="GraphNet", + help="Prefix of model path root'", + ) + parser.add_argument( + "--relative_model_path", + type=str, + required=True, + help="Path to model folder e.g '../../samples/torch/resnet18'", + ) + parser.add_argument( + "--repo_uid", + type=str, + required=True, + help="Repository uid e.g 'github torch samples', 'github_paddle_samples'", + ) + parser.add_argument( + "--sample_type", + type=str, + required=True, + default="full_graph", + help="Sample type e.g 'full_graph', 'fusible_graph'", + ) + parser.add_argument( + "--order_value", + type=int, + required=True, + help="Order value e.g '1'", + ) + parser.add_argument( + "--db_path", + type=str, + required=False, + default="graphnet.db", + help="Database file path e.g 'graphnet.db'", + ) + args = parser.parse_args() + main(args) diff --git a/sqlite/graphsample_insert.sh b/sqlite/graphsample_insert.sh new file mode 100644 index 000000000..9af4664a8 --- /dev/null +++ b/sqlite/graphsample_insert.sh @@ -0,0 +1,89 @@ +#!/bin/bash +set -x + +GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print(os.path.dirname(os.path.dirname(graph_net.__file__)))") +DB_PATH="${GRAPH_NET_ROOT}/sqlite/GraphNet.db" +TORCH_MODEL_LIST="graph_net/config/test.txt" +PADDLE_MODEL_LIST="graph_net/config/small10_paddle_samples_list.txt" +TYPICAL_GRAPH_SAMPLES_LIST="tututu/range_decomposed_subgraph_sample_list.txt" +FUSIBLE_GRAPH_SAMPLES_LIST="tututu/fusible_subgraph_sample_list.txt" +SOLE_OP_GRAPH_SAMPLES_LIST="sole_graph/single_operator_sample_list.txt" +ORDER_VALUE=0 + + +if [ ! -f "$DB_PATH" ]; then + echo "Fail ! No Database ! : $DB_PATH" + exit 1 +fi + +while IFS= read -r model_rel_path; do + echo "insert : $model_rel_path" + python3 "${GRAPH_NET_ROOT}/sqlite/graphsample_insert.py" \ + --model_path_prefix "$GRAPH_NET_ROOT" \ + --relative_model_path "$model_rel_path" \ + --repo_uid "github_torch_samples" \ + --sample_type "full_graph" \ + --order_value "$ORDER_VALUE" \ + --db_path "$DB_PATH" + + ((ORDER_VALUE++)) + +done < "$TORCH_MODEL_LIST" + +while IFS= read -r model_rel_path; do + echo "insert : $model_rel_path" + python3 "${GRAPH_NET_ROOT}/sqlite/graphsample_insert.py" \ + --model_path_prefix "$GRAPH_NET_ROOT" \ + --relative_model_path "$model_rel_path" \ + --repo_uid "github_paddle_samples" \ + --sample_type "full_graph" \ + --order_value "$ORDER_VALUE" \ + --db_path "$DB_PATH" + + ((ORDER_VALUE++)) + +done < "$PADDLE_MODEL_LIST" + +while IFS= read -r model_rel_path; do + echo "insert : $model_rel_path" + python3 "${GRAPH_NET_ROOT}/sqlite/graphsample_insert.py" \ + --model_path_prefix "${GRAPH_NET_ROOT}/tututu/range_decompose" \ + --relative_model_path "$model_rel_path" \ + --repo_uid "github_torch_samples" \ + --sample_type "typical_graph" \ + --order_value "$ORDER_VALUE" \ + --db_path "$DB_PATH" + + ((ORDER_VALUE++)) + +done < "$TYPICAL_GRAPH_SAMPLES_LIST" + +while IFS= read -r model_rel_path; do + echo "insert : $model_rel_path" + python3 "${GRAPH_NET_ROOT}/sqlite/graphsample_insert.py" \ + --model_path_prefix "${GRAPH_NET_ROOT}/tututu/fusible_subgraph_samples" \ + --relative_model_path "$model_rel_path" \ + --repo_uid "github_torch_samples" \ + --sample_type "fusible_graph" \ + --order_value "$ORDER_VALUE" \ + --db_path "$DB_PATH" + + ((ORDER_VALUE++)) + +done < "$FUSIBLE_GRAPH_SAMPLES_LIST" + +while IFS= read -r model_rel_path; do + echo "insert : $model_rel_path" + python3 "${GRAPH_NET_ROOT}/sqlite/graphsample_insert.py" \ + --model_path_prefix "${GRAPH_NET_ROOT}/sole_graph" \ + --relative_model_path "$model_rel_path" \ + --repo_uid "github_torch_samples" \ + --sample_type "sole_op_graph" \ + --order_value "$ORDER_VALUE" \ + --db_path "$DB_PATH" + + ((ORDER_VALUE++)) + +done < "$SOLE_OP_GRAPH_SAMPLES_LIST" + +echo "all done" diff --git a/sqlite/init_db.py b/sqlite/init_db.py new file mode 100644 index 000000000..da1d0ab57 --- /dev/null +++ b/sqlite/init_db.py @@ -0,0 +1,65 @@ +import sqlite3 +import re +import argparse +from pathlib import Path + + +def parse_timestamp(filename: str) -> int: + match = re.search(r"(\d{4}-\d{2}-\d{2}-\d{6})", filename) + if match: + timestamp_str = match.group(1).replace("-", "") + return int(timestamp_str) + return 0 + + +def migrate(db_path: str = "sqlite/GraphNet.db", migrates_dir: str = "sqlite/migrates"): + db_path_obj = Path(db_path) + migrates_path = Path(migrates_dir) + + if db_path_obj.exists(): + db_path_obj.unlink() + print(f"Deleted existing database: {db_path}") + + db_path_obj.parent.mkdir(parents=True, exist_ok=True) + db_path_obj.touch() + print(f"Created new database: {db_path}") + + sql_files = list(migrates_path.glob("*.sql")) + if not sql_files: + print(f"No migration files found in {migrates_dir}") + return + + sql_files.sort(key=lambda f: parse_timestamp(f.name)) + print(f"Found {len(sql_files)} migration file(s)") + print("=" * 50) + for sql_file in sql_files: + print(f"\nExecuting: {sql_file.name}") + with open(sql_file, "r", encoding="utf-8") as f: + sql_content = f.read() + + try: + conn = sqlite3.connect(db_path) + conn.executescript(sql_content) + conn.commit() + conn.close() + print(f" ✓ Completed: {sql_file.name}") + except Exception as e: + print(f" ✗ Failed: {sql_file.name}") + print(f" Error: {e}") + if Path(db_path).exists(): + Path(db_path).unlink() + + print("\n" + "=" * 50) + print(f"Migration completed. Database: {db_path}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="GraphNet database migration tool") + parser.add_argument( + "--db_path", + type=str, + default="sqlite/GraphNet.db", + help="Database file path (default: sqlite/GraphNet.db)", + ) + args = parser.parse_args() + migrate(args.db_path) diff --git a/sqlite/migrates/create_main_tables_2026-02-02-031353.sql b/sqlite/migrates/create_main_tables_2026-02-02-031353.sql new file mode 100644 index 000000000..67cd89393 --- /dev/null +++ b/sqlite/migrates/create_main_tables_2026-02-02-031353.sql @@ -0,0 +1,92 @@ +-- SQLite +-- create repo table +CREATE TABLE IF NOT EXISTS repo ( + repo_uid VARCHAR(255) NOT NULL PRIMARY KEY, + repo_type VARCHAR(50) NOT NULL, + repo_name VARCHAR(255) NOT NULL, + repo_url TEXT +); +INSERT OR IGNORE INTO repo (repo_uid, repo_type, repo_name, repo_url) VALUES +('github_torch_samples', 'github', 'GraphNet', 'https://github.com/PaddlePaddle/GraphNet'), +('github_paddle_samples', 'github', 'GraphNet', 'https://github.com/PaddlePaddle/GraphNet'); + + +-- create graph_sample table +CREATE TABLE IF NOT EXISTS graph_sample ( + uuid VARCHAR(255) NOT NULL PRIMARY KEY, + repo_uid VARCHAR(255) NOT NULL, + relative_model_path TEXT NOT NULL, + sample_type VARCHAR(50) NOT NULL, + is_subgraph BOOLEAN DEFAULT FALSE, + num_ops INTEGER DEFAULT -1, + graph_hash VARCHAR(255) NOT NULL, + order_value INTEGER, + create_at DATETIME DEFAULT CURRENT_TIMESTAMP, + deleted BOOLEAN DEFAULT FALSE, + delete_at DATETIME, + FOREIGN KEY (repo_uid) REFERENCES repo(repo_uid) +); +CREATE INDEX IF NOT EXISTS idx_relative_model_path ON graph_sample (relative_model_path); +CREATE INDEX IF NOT EXISTS idx_graph_hash ON graph_sample (graph_hash); +CREATE INDEX IF NOT EXISTS idx_order_value ON graph_sample (order_value); +CREATE UNIQUE INDEX IF NOT EXISTS uq_relative_model_path_repo_uid ON graph_sample (relative_model_path, repo_uid); + +-- create subgraph_source table +CREATE TABLE IF NOT EXISTS subgraph_source ( + subgraph_uuid VARCHAR(255) NOT NULL PRIMARY KEY, + full_graph_uuid VARCHAR(255) NOT NULL, + range_start INTEGER NOT NULL, + range_end INTEGER NOT NULL, + create_at DATETIME DEFAULT CURRENT_TIMESTAMP, + deleted BOOLEAN DEFAULT FALSE, + delete_at DATETIME, + FOREIGN KEY (subgraph_uuid) REFERENCES graph_sample(uuid), + FOREIGN KEY (full_graph_uuid) REFERENCES graph_sample(uuid) +); +CREATE INDEX IF NOT EXISTS idx_subgraph_uuid ON subgraph_source (subgraph_uuid); +CREATE INDEX IF NOT EXISTS idx_full_graph_uuid ON subgraph_source (full_graph_uuid); + +-- create dimension_generalization_source table +CREATE TABLE IF NOT EXISTS dimension_generalization_source ( + generalized_graph_uuid VARCHAR(255) NOT NULL PRIMARY KEY, + original_graph_uuid VARCHAR(255) NOT NULL, + total_element_size INTEGER NOT NULL, + create_at DATETIME DEFAULT CURRENT_TIMESTAMP, + deleted BOOLEAN DEFAULT FALSE, + delete_at DATETIME, + FOREIGN KEY (generalized_graph_uuid) REFERENCES graph_sample(uuid), + FOREIGN KEY (original_graph_uuid) REFERENCES graph_sample(uuid) +); +CREATE INDEX IF NOT EXISTS idx_dimension_generalized_graph_uuid ON dimension_generalization_source (generalized_graph_uuid); +CREATE INDEX IF NOT EXISTS idx_dimension_original_graph_uuid ON dimension_generalization_source (original_graph_uuid); +CREATE INDEX IF NOT EXISTS idx_total_element_size ON dimension_generalization_source (total_element_size); + +-- create datatype_generalization_source table +CREATE TABLE IF NOT EXISTS datatype_generalization_source ( + generalized_graph_uuid VARCHAR(255) NOT NULL PRIMARY KEY, + original_graph_uuid VARCHAR(255) NOT NULL, + data_type VARCHAR(50) NOT NULL, + create_at DATETIME DEFAULT CURRENT_TIMESTAMP, + deleted BOOLEAN DEFAULT FALSE, + delete_at DATETIME, + FOREIGN KEY (generalized_graph_uuid) REFERENCES graph_sample(uuid), + FOREIGN KEY (original_graph_uuid) REFERENCES graph_sample(uuid) +); +CREATE INDEX IF NOT EXISTS idx_datatype_generalized_graph_uuid ON datatype_generalization_source (generalized_graph_uuid); +CREATE INDEX IF NOT EXISTS idx_datatype_original_graph_uuid ON datatype_generalization_source (original_graph_uuid); + +-- create backward_graph_source table +CREATE TABLE IF NOT EXISTS backward_graph_source ( + forward_graph_uuid VARCHAR(255) NOT NULL PRIMARY KEY, + backward_graph_uuid VARCHAR(255) NOT NULL, + original_graph_uuid VARCHAR(255) NOT NULL, + create_at DATETIME DEFAULT CURRENT_TIMESTAMP, + deleted BOOLEAN DEFAULT FALSE, + delete_at DATETIME, + FOREIGN KEY (forward_graph_uuid) REFERENCES graph_sample(uuid), + FOREIGN KEY (backward_graph_uuid) REFERENCES graph_sample(uuid), + FOREIGN KEY (original_graph_uuid) REFERENCES graph_sample(uuid) +); +CREATE INDEX IF NOT EXISTS idx_forward_graph_uuid ON backward_graph_source (forward_graph_uuid); +CREATE INDEX IF NOT EXISTS idx_backward_graph_uuid ON backward_graph_source (backward_graph_uuid); +CREATE INDEX IF NOT EXISTS idx_backward_original_graph_uuid ON backward_graph_source (original_graph_uuid); diff --git a/sqlite/orm_models.py b/sqlite/orm_models.py new file mode 100644 index 000000000..a9fd5cb9a --- /dev/null +++ b/sqlite/orm_models.py @@ -0,0 +1,235 @@ +from sqlalchemy.orm import declarative_base, sessionmaker, relationship +from datetime import datetime +from sqlalchemy import ( + create_engine, + Column, + String, + Integer, + Boolean, + DateTime, + ForeignKey, + Index, + UniqueConstraint, +) + +Base = declarative_base() + + +class Repo(Base): + __tablename__ = "repo" + + repo_uid = Column(String(255), primary_key=True) + repo_type = Column(String(50), nullable=False) + repo_name = Column(String(255), nullable=False) + repo_url = Column(String(255), nullable=False) + graph_samples = relationship("GraphSample", back_populates="repo") + + +class GraphSample(Base): + __tablename__ = "graph_sample" + + uuid = Column(String(255), primary_key=True) + repo_uid = Column(String(255), ForeignKey("repo.repo_uid"), nullable=False) + relative_model_path = Column(String, nullable=False) + sample_type = Column(String(50), nullable=False) + is_subgraph = Column(Boolean, default=False) + num_ops = Column(Integer, default=-1) + graph_hash = Column(String(255), nullable=False) + order_value = Column(Integer) + create_at = Column(DateTime, default=datetime.now) + deleted = Column(Boolean, default=False) + delete_at = Column(DateTime) + + __table_args__ = ( + Index("idx_relative_model_path", "relative_model_path"), + Index("idx_graph_hash", "graph_hash"), + Index("idx_order_value", "order_value"), + UniqueConstraint( + "relative_model_path", "repo_uid", name="uq_relative_model_path_repo_uid" + ), + ) + + repo = relationship("Repo", back_populates="graph_samples") + subgraph_sources = relationship( + "SubgraphSource", + foreign_keys="SubgraphSource.subgraph_uuid", + back_populates="subgraph", + ) + subgraph_as_full_graph = relationship( + "SubgraphSource", + foreign_keys="SubgraphSource.full_graph_uuid", + back_populates="full_graph", + ) + dimension_sources_as_generalized = relationship( + "DimensionGeneralizationSource", + foreign_keys="DimensionGeneralizationSource.generalized_graph_uuid", + back_populates="generalized_graph", + ) + dimension_sources_as_original = relationship( + "DimensionGeneralizationSource", + foreign_keys="DimensionGeneralizationSource.original_graph_uuid", + back_populates="original_graph", + ) + data_type_sources_as_original = relationship( + "DataTypeGeneralizationSource", + foreign_keys="DataTypeGeneralizationSource.original_graph_uuid", + back_populates="original_graph", + ) + data_type_sources_as_generalized = relationship( + "DataTypeGeneralizationSource", + foreign_keys="DataTypeGeneralizationSource.generalized_graph_uuid", + back_populates="generalized_graph", + ) + backward_graph_sources_as_forward = relationship( + "BackwardGraphSource", + foreign_keys="BackwardGraphSource.forward_graph_uuid", + back_populates="forward_graph", + ) + backward_graph_as_backward = relationship( + "BackwardGraphSource", + foreign_keys="BackwardGraphSource.backward_graph_uuid", + back_populates="backward_graph", + ) + backward_graph_as_original = relationship( + "BackwardGraphSource", + foreign_keys="BackwardGraphSource.original_graph_uuid", + back_populates="original_graph", + ) + + +class SubgraphSource(Base): + __tablename__ = "subgraph_source" + + subgraph_uuid = Column( + String(255), ForeignKey("graph_sample.uuid"), nullable=False, primary_key=True + ) + full_graph_uuid = Column( + String(255), ForeignKey("graph_sample.uuid"), nullable=False + ) + range_start = Column(Integer, nullable=False) + range_end = Column(Integer, nullable=False) + create_at = Column(DateTime, default=datetime.now) + deleted = Column(Boolean, default=False) + delete_at = Column(DateTime) + + __table_args__ = ( + Index("idx_subgraph_uuid", "subgraph_uuid"), + Index("idx_full_graph_uuid", "full_graph_uuid"), + ) + + subgraph = relationship( + "GraphSample", foreign_keys=[subgraph_uuid], back_populates="subgraph_sources" + ) + full_graph = relationship( + "GraphSample", + foreign_keys=[full_graph_uuid], + back_populates="subgraph_as_full_graph", + ) + + +class DimensionGeneralizationSource(Base): + __tablename__ = "dimension_generalization_source" + + generalized_graph_uuid = Column( + String(255), ForeignKey("graph_sample.uuid"), nullable=False, primary_key=True + ) + original_graph_uuid = Column( + String(255), ForeignKey("graph_sample.uuid"), nullable=False + ) + total_element_size = Column(Integer, nullable=False) + create_at = Column(DateTime, default=datetime.now) + deleted = Column(Boolean, default=False) + delete_at = Column(DateTime) + + __table_args__ = ( + Index("idx_dimension_generalized_graph_uuid", "generalized_graph_uuid"), + Index("idx_dimension_original_graph_uuid", "original_graph_uuid"), + Index("idx_total_element_size", "total_element_size"), + ) + + generalized_graph = relationship( + "GraphSample", + foreign_keys=[generalized_graph_uuid], + back_populates="dimension_sources_as_generalized", + ) + original_graph = relationship( + "GraphSample", + foreign_keys=[original_graph_uuid], + back_populates="dimension_sources_as_original", + ) + + +class DataTypeGeneralizationSource(Base): + __tablename__ = "datatype_generalization_source" + + generalized_graph_uuid = Column( + String(255), ForeignKey("graph_sample.uuid"), nullable=False, primary_key=True + ) + original_graph_uuid = Column( + String(255), ForeignKey("graph_sample.uuid"), nullable=False + ) + data_type = Column(String(50), nullable=False) + create_at = Column(DateTime, default=datetime.now) + deleted = Column(Boolean, default=False) + delete_at = Column(DateTime) + + __table_args__ = ( + Index("idx_datatype_generalized_graph_uuid", "generalized_graph_uuid"), + Index("idx_datatype_original_graph_uuid", "original_graph_uuid"), + ) + + generalized_graph = relationship( + "GraphSample", + foreign_keys=[generalized_graph_uuid], + back_populates="data_type_sources_as_generalized", + ) + original_graph = relationship( + "GraphSample", + foreign_keys=[original_graph_uuid], + back_populates="data_type_sources_as_original", + ) + + +class BackwardGraphSource(Base): + __tablename__ = "backward_graph_source" + + forward_graph_uuid = Column( + String(255), ForeignKey("graph_sample.uuid"), nullable=False, primary_key=True + ) + backward_graph_uuid = Column( + String(255), ForeignKey("graph_sample.uuid"), nullable=False + ) + original_graph_uuid = Column( + String(255), ForeignKey("graph_sample.uuid"), nullable=False + ) + create_at = Column(DateTime, default=datetime.now) + deleted = Column(Boolean, default=False) + delete_at = Column(DateTime) + + __table_args__ = ( + Index("idx_forward_graph_uuid", "forward_graph_uuid"), + Index("idx_backward_graph_uuid", "backward_graph_uuid"), + Index("idx_backward_original_graph_uuid", "original_graph_uuid"), + ) + + forward_graph = relationship( + "GraphSample", + foreign_keys=[forward_graph_uuid], + back_populates="backward_graph_sources_as_forward", + ) + backward_graph = relationship( + "GraphSample", + foreign_keys=[backward_graph_uuid], + back_populates="backward_graph_as_backward", + ) + original_graph = relationship( + "GraphSample", + foreign_keys=[original_graph_uuid], + back_populates="backward_graph_as_original", + ) + + +def get_session(db_path: str, echo: bool = False): + engine = create_engine(f"sqlite:///{db_path}", echo=echo) + Session = sessionmaker(bind=engine) + return Session() From f83567966c4e44c358969d5b1eb2bf21bb426824 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Tue, 3 Feb 2026 14:50:35 +0800 Subject: [PATCH 6/7] Add summary function and minor fix. --- graph_net/tools/generate_subgraph_dataset.sh | 91 +++++++++++++++++++- 1 file changed, 88 insertions(+), 3 deletions(-) diff --git a/graph_net/tools/generate_subgraph_dataset.sh b/graph_net/tools/generate_subgraph_dataset.sh index fc5bdf054..644094ba4 100755 --- a/graph_net/tools/generate_subgraph_dataset.sh +++ b/graph_net/tools/generate_subgraph_dataset.sh @@ -14,7 +14,7 @@ GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print(os.path.dirname( RESUME="true" #DECOMPOSE_WORKSPACE=/tmp/subgraph_dataset_workspace -DECOMPOSE_WORKSPACE=/work/graphnet_test_workspace/subgraph_dataset_20260202 +DECOMPOSE_WORKSPACE=/work/graphnet_test_workspace/subgraph_dataset_20260203 DEVICE_REWRITED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/01_device_rewrited_samples DIMENSION_GENERALIZED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/02_dimension_generalized_samples OP_NAMES_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/03_sample_op_names @@ -33,7 +33,7 @@ UNITTESTS_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/14_kernelbench_unittests mkdir -p "$DECOMPOSE_WORKSPACE" -model_list="$GRAPH_NET_ROOT/graph_net/config/small100_torch_samples_list.txt" +model_list="$GRAPH_NET_ROOT/graph_net/config/torch_samples_list.txt" device_rewrited_sample_list=${DECOMPOSE_WORKSPACE}/device_rewrited_sample_list.txt range_decomposed_subgraph_list=${DECOMPOSE_WORKSPACE}/range_decomposed_subgraph_sample_list.txt deduplicated_subgraph_list=${DECOMPOSE_WORKSPACE}/deduplicated_subgraph_sample_list.txt @@ -266,7 +266,7 @@ function subgraph_dimension_generalizer(){ for index in {0..8}; do echo ">>> Generating dimension generalized subgraph variant index: ${index}" dimension_generalized_sample_list="${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index}/dimension_generalized_sample_list.txt" - generate_subgraph_list ${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index} ${dimension_generalized_samples_list} + generate_subgraph_list ${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index} ${dimension_generalized_sample_list} python3 -m graph_net.model_path_handler \ --model-path-list "${dimension_generalized_sample_list}" \ --handler-config $(base64 -w 0 <&1 | tee ${DECOMPOSE_WORKSPACE}/log_range_decompose_${suffix}.txt generate_subgraph_list ${RANGE_DECOMPOSE_OUTPUT_DIR} ${range_decomposed_subgraph_list} + rename_decomposed_subgraph 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rename_decomposed_subgraph_${suffix}.txt + remove_duplicate_renamed_graphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_renamed_graphs_${suffix}.txt + generate_subgraph_list ${DEDUPLICATED_OUTPUT_DIR} ${deduplicated_subgraph_list} + # generate fusible subgraph ranges gen_fusible_subgraph_ranges 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_fusible_subgraphs_${suffix}.txt @@ -401,4 +405,85 @@ main() { generate_unittests 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_unittests_${suffix}.txt } +summary() { + num_original_samples=`cat $model_list | grep "^samples/" | wc -l` + echo "Number of original graphnet samples: $num_original_samples" + + num_device_rewrited_samples=`find ${DEVICE_REWRITED_OUTPUT_DIR} -name "model.py" | wc -l` + device_rewrited_successed_precent=$(( num_device_rewrited_samples * 100 / num_original_samples )) + echo "- [Step 1] device rewrite: successed=${num_device_rewrited_samples}, percent=$device_rewrited_successed_precent%" + + num_successed_dimension_generalized_samples=`find ${DIMENSION_GENERALIZED_OUTPUT_DIR} -name "model.py" | wc -l` + dimension_generalized_samples_successed_percent=$((num_successed_dimension_generalized_samples * 100 / (num_original_samples * 9))) + echo "- [Step 2] dimension generalization: successed=${num_successed_dimension_generalized_samples}, percent=${dimension_generalized_samples_successed_percent}%" + for index in {0..8}; do + num_successed_dimension_generalized_samples=`find ${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index} -name "model.py" | wc -l` + dimension_generalized_samples_successed_percent=$(( num_successed_dimension_generalized_samples * 100 / num_original_samples )) + echo " ${index}, successed=${num_successed_dimension_generalized_samples}, percent=${dimension_generalized_samples_successed_percent}%" + done + echo "" + + num_successed_op_names=`find ${OP_NAMES_OUTPUT_DIR} -name op_names.txt | wc -l` + op_names_successed_percent=$(( num_successed_op_names * 100 / num_original_samples )) + echo "- [Step 3] generate op names: successed=${num_successed_op_names}, percent=${op_names_successed_percent}%" + + num_typical_subgraph_ranges=`find ${SUBGRAPH_RANGES_JSON_ROOT} -name typical_subgraph_ranges.json | wc -l` + typical_subgraph_ranges_successed_percent=$(( num_typical_subgraph_ranges * 100 / num_original_samples )) + echo "- [Step 4] generate typical subgraph ranges: successed=${num_typical_subgraph_ranges}, percent=${typical_subgraph_ranges_successed_percent}%" + + num_successed_range_decomposed_subgraphs=`find ${RANGE_DECOMPOSE_OUTPUT_DIR} -name "model.py" | wc -l` + echo "- [Step 5] range decompose: successed=${num_successed_range_decomposed_subgraphs}" + + num_renamed_subgraphs=`find ${GRAPH_VAR_RENAME_OUTPUT_DIR} -name "model.py" | wc -l` + echo "- [Step 6] rename: successed=${num_renamed_subgraphs}" + + num_deduplicated_subgraphs=`find ${DEDUPLICATED_OUTPUT_DIR} -name "model.py" | wc -l` + echo "- [Step 7] remove duplicated: successed=${num_deduplicated_subgraphs}" + + num_successed_cumsum_kernels_subgraphs=`find ${CUMSUM_NUM_KERNELS_DIR} -name "cumsum_num_kernels.json" | wc -l` + cumsum_kernels_successed_percent=$((num_successed_cumsum_kernels_subgraphs * 100 / num_deduplicated_subgraphs)) + echo "- [Step 8] cumsum kernels: successed=${num_successed_cumsum_kernels_subgraphs}, percent=${cumsum_kernels_successed_percent}%" + + num_fusible_subgraph_ranges=`find ${FUSIBLE_SUBGRAPH_RANGES_DIR} -name "fusible_subgraph_ranges.json" | wc -l` + num_grouped_fusible_subgraph_ranges=`find ${GROUPED_FUSIBLE_SUBGRAPH_RANGES_DIR} -name "grouped_fusible_subgraph_ranges.json" | wc -l` + echo " fusible subgraph ranges: successed=${num_fusible_subgraph_ranges}" + echo " grouped fusible subgraph ranges: successed=${num_grouped_fusible_subgraph_ranges}" + echo "" + + num_successed_dimension_generalized_subgraphs=`find ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR} -name "model.py" | wc -l` + echo "- [Step 9] subgraph dimension generalization: successed=${num_successed_dimension_generalized_subgraphs}" + for index in {0..8}; do + num_successed_dimension_generalized_subgraphs=`find ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR}/${index} -name "model.py" | wc -l` + echo " ${index}, successed=${num_successed_dimension_generalized_subgraphs}" + done + echo "" + + num_renamed_fusible_subgraphs=`find ${RENAMED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} -name "model.py" | wc -l` + echo "- [Step 10] rename: successed=${num_renamed_fusible_subgraphs}" + for index in {0..8}; do + num_renamed_fusible_subgraphs_index=`find ${RENAMED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}/${index} -name "model.py" | wc -l` + echo " ${index}, successed=${num_renamed_fusible_subgraphs_index}" + done + echo "" + + num_deduplicated_fusible_subgraphs=`find ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} -name "model.py" | wc -l` + echo "- [Step 11] remove duplicated: successed=${num_deduplicated_fusible_subgraphs}" + for index in {0..8}; do + num_deduplicated_fusible_subgraphs_index=`find ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}/${index} -name "model.py" | wc -l` + echo " ${index}, successed=${num_deduplicated_fusible_subgraphs_index}" + done + echo "" + + num_successed_unittests=`find ${UNITTESTS_OUTPUT_DIR} -name "*_test.py" | wc -l` + unittest_successed_percent=$((num_successed_unittests * 100 / num_deduplicated_fusible_subgraphs)) + echo "- [Step 12] generate unittest: successed=${num_successed_unittests}, percent=${unittest_successed_percent}%" + for index in {0..8}; do + num_successed_unittests=`find ${UNITTESTS_OUTPUT_DIR}/${index} -name "*_test.py" | wc -l` + echo " ${index}, successed=${num_successed_unittests}" + done +} + main + +set +x +summary 2>&1 | tee ${DECOMPOSE_WORKSPACE}/summary.txt \ No newline at end of file From 96a7085ab09c2039daa8e8237e6bf90e9e058695 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Mon, 9 Feb 2026 09:40:31 +0800 Subject: [PATCH 7/7] Intergate dtype generalization. --- .../resumable_sample_pass_mixin.py | 2 +- graph_net/tools/generate_subgraph_dataset.sh | 34 +++++++++++++------ .../torch/sample_pass/dtype_generalizer.py | 19 ++++++++--- 3 files changed, 39 insertions(+), 16 deletions(-) diff --git a/graph_net/sample_pass/resumable_sample_pass_mixin.py b/graph_net/sample_pass/resumable_sample_pass_mixin.py index 804005988..7ffb4b2d6 100644 --- a/graph_net/sample_pass/resumable_sample_pass_mixin.py +++ b/graph_net/sample_pass/resumable_sample_pass_mixin.py @@ -45,7 +45,7 @@ def resumable_handle_sample(self, rel_model_path: str): self._inc_num_handled_models_or_exit() def _inc_num_handled_models_or_exit(self): - if self.config["limits_handled_models"] is None: + if self.config.get("limits_handled_models", None) is None: return self.num_handled_models += 1 if self.num_handled_models >= self.config["limits_handled_models"]: diff --git a/graph_net/tools/generate_subgraph_dataset.sh b/graph_net/tools/generate_subgraph_dataset.sh index 7dfb1b3cf..457864db7 100755 --- a/graph_net/tools/generate_subgraph_dataset.sh +++ b/graph_net/tools/generate_subgraph_dataset.sh @@ -28,8 +28,8 @@ GROUPED_FUSIBLE_SUBGRAPH_RANGES_DIR=$DECOMPOSE_WORKSPACE/10_grouped_fusible_subg SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/11_dimension_generalized_fusible_subgraphs RENAMED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/12_renamed_dimension_generalized_fusible_subgraphs DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/13_deduplicated_dimension_generalized_fusible_subgraphs -#DTYPE_GENERALIZED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/14_dtype_generalized_fusible_subgraphs -UNITTESTS_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/14_kernelbench_unittests +DTYPE_GENERALIZED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/14_dtype_generalized_fusible_subgraphs +UNITTESTS_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/15_kernelbench_unittests mkdir -p "$DECOMPOSE_WORKSPACE" @@ -39,6 +39,13 @@ range_decomposed_subgraph_list=${DECOMPOSE_WORKSPACE}/range_decomposed_subgraph_ deduplicated_subgraph_list=${DECOMPOSE_WORKSPACE}/deduplicated_subgraph_sample_list.txt dimension_generalized_subgraph_list=${DECOMPOSE_WORKSPACE}/dimension_generalized_subgraph_sample_list.txt deduplicated_fusible_subgraphs_list=${DECOMPOSE_WORKSPACE}/deduplicated_dimension_generalized_subgraph_sample_list.txt +dtype_generalized_subgraphs_list=${DECOMPOSE_WORKSPACE}/dtype_generalized_subgraphs_sample_list.txt + +if [[ "$model_list" == *"torch_samples_list.txt" ]]; then + USE_SUBPROCESS_ARGS="--use-subprocess" +else + USE_SUBPROCESS_ARGS="" +fi function generate_generalized_subgraph_list() { local target_dir="$1" @@ -87,7 +94,7 @@ EOF function dimension_generalizer(){ echo ">>> [2] Apply dimension generalization for samples under ${device_rewrited_sample_list}." echo ">>>" - python3 -m graph_net.apply_sample_pass \ + python3 -m graph_net.apply_sample_pass ${USE_SUBPROCESS_ARGS} \ --model-path-list $device_rewrited_sample_list \ --sample-pass-file-path "$GRAPH_NET_ROOT/graph_net/dimension_generalizer.py" \ --sample-pass-class-name "ApplyDimGenPasses" \ @@ -107,7 +114,7 @@ EOF function generate_op_names() { echo ">>> [3] Generate op_names.txt for samples in ${model_list}." echo ">>>" - python3 -m graph_net.model_path_handler \ + python3 -m graph_net.model_path_handler ${USE_SUBPROCESS_ARGS} \ --model-path-list $model_list \ --handler-config=$(base64 -w 0 <>> [5] Decompose according to subgraph_ranges.json for samples in ${device_rewrited_sample_list}." echo ">>>" - python3 -m graph_net.model_path_handler \ + python3 -m graph_net.model_path_handler ${USE_SUBPROCESS_ARGS} \ --model-path-list "$device_rewrited_sample_list" \ --handler-config=$(base64 -w 0 <>> Generating dimension generalized subgraph variant index: ${index}" dimension_generalized_sample_list="${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index}/dimension_generalized_sample_list.txt" generate_subgraph_list ${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index} ${dimension_generalized_sample_list} - python3 -m graph_net.model_path_handler \ + python3 -m graph_net.model_path_handler ${USE_SUBPROCESS_ARGS} \ --model-path-list "${dimension_generalized_sample_list}" \ --handler-config $(base64 -w 0 <>> [12] Data type generalizer for samples under ${DEDUPLICATED_FUSIBLE_SUBGRAPH_DIR}." + echo ">>> [12] Data type generalizer for samples under ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}." echo ">>>" python3 -m graph_net.apply_sample_pass \ --model-path-list $deduplicated_fusible_subgraphs_list \ @@ -335,6 +342,7 @@ function dtype_generalizer() { "model_path_prefix": "$DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR", "model_runnable_predicator_filepath": "$GRAPH_NET_ROOT/graph_net/torch/constraint_util.py", "try_run": false, + "device": "cuda", "resume": ${RESUME} } EOF @@ -342,17 +350,17 @@ EOF } function generate_unittests() { - echo ">>> [12] Generate unittests for subgraph samples under ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}. " + echo ">>> [13] Generate unittests for subgraph samples under ${DTYPE_GENERALIZED_OUTPUT_DIR}. " echo ">>>" python3 -m graph_net.model_path_handler \ - --model-path-list ${deduplicated_fusible_subgraphs_list} \ + --model-path-list ${dtype_generalized_subgraphs_list} \ --handler-config=$(base64 -w 0 <&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_dimension_generalized_subgraphs_${suffix}.txt generate_generalized_subgraph_list ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} ${deduplicated_fusible_subgraphs_list} + # dtype generalization + dtype_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_dtype_generalizer_${suffix}.txt + generate_generalized_subgraph_list ${DTYPE_GENERALIZED_OUTPUT_DIR} ${dtype_generalized_subgraphs_list} + # generate kernelbench format unittest generate_unittests 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_unittests_${suffix}.txt } @@ -483,4 +495,4 @@ summary() { main set +x -summary 2>&1 | tee ${DECOMPOSE_WORKSPACE}/summary.txt \ No newline at end of file +summary 2>&1 | tee ${DECOMPOSE_WORKSPACE}/summary.txt diff --git a/graph_net/torch/sample_pass/dtype_generalizer.py b/graph_net/torch/sample_pass/dtype_generalizer.py index 48c06803a..13a246244 100644 --- a/graph_net/torch/sample_pass/dtype_generalizer.py +++ b/graph_net/torch/sample_pass/dtype_generalizer.py @@ -17,6 +17,7 @@ from pathlib import Path from typing import Any, Dict, List +import torch import torch.fx as fx from graph_net.graph_net_json_file_util import ( @@ -236,9 +237,9 @@ class ApplyDataTypeGeneralizationPasses(SamplePass, ResumableSamplePassMixin): "output_dir": "/path/to/output", "model_path_prefix": "", "model_runnable_predicator_filepath": "...", - "resume": , - "limits_handled_models": , - "try_run": , + "resume": true, + "limits_handled_models": null, + "try_run": true, } """ @@ -268,6 +269,7 @@ def declare_config( output_dir: str, model_path_prefix: str, model_runnable_predicator_filepath: str, + device: str = "auto", resume: bool = False, limits_handled_models: int = None, try_run: bool = True, @@ -281,6 +283,13 @@ def _make_model_runnable_predicator(self, config: Dict[str, Any]): predicator_config = self.model_runnable_predicator_config return cls(predicator_config) + def _choose_device(self, device) -> str: + if device is None: + return None + if device in ["cpu", "cuda"]: + return device + return "cuda" if torch.cuda.is_available() else "cpu" + def sample_handled(self, rel_model_path: str) -> bool: model_path = Path(self.config["model_path_prefix"]) / rel_model_path dtype_pass_names = self._read_dtype_pass_names(model_path) @@ -320,7 +329,9 @@ def resume(self, rel_model_path: str) -> List[str]: return [] # Parse the computation graph - traced_model = parse_immutable_model_path_into_sole_graph_module(abs_model_path) + traced_model = parse_immutable_model_path_into_sole_graph_module( + abs_model_path, device=self._choose_device(self.config["device"]) + ) # Copy the originl sample files_copied = [