PaddlePaddle · lixinqi · Mar 5, 2026 · Feb 2, 2026 · Feb 2, 2026 · Feb 2, 2026
diff --git a/graph_net/sample_pass/resumable_sample_pass_mixin.py b/graph_net/sample_pass/resumable_sample_pass_mixin.py
@@ -45,7 +45,7 @@ def resumable_handle_sample(self, rel_model_path: str):
         self._inc_num_handled_models_or_exit()
 
     def _inc_num_handled_models_or_exit(self):
-        if self.config["limits_handled_models"] is None:
+        if self.config.get("limits_handled_models", None) is None:
             return
         self.num_handled_models += 1
         if self.num_handled_models >= self.config["limits_handled_models"]:

diff --git a/graph_net/test/dtype_gen_test.sh b/graph_net/test/dtype_gen_test.sh
@@ -1,42 +1,45 @@
 #!/bin/bash
 
-GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print(
-os.path.dirname(graph_net.__file__))")
-GRAPHNET_ROOT="$GRAPH_NET_ROOT/../"
+GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print(os.path.dirname(os.path.dirname(graph_net.__file__)))")
 OUTPUT_DIR="/tmp/dtype_gen_samples"
-mkdir -p "$OUTPUT_DIR"
+
+mkdir -p $OUTPUT_DIR
+
+model_list=${GRAPH_NET_ROOT}/graph_net/config/small10_torch_samples_list.txt
+model_path_prefix="${GRAPH_NET_ROOT}"
 
 # Step 1: Initialize dtype generalization passes (samples of torchvision)
 python3 -m graph_net.apply_sample_pass \
-    --model-path-list "graph_net/config/small100_torch_samples_list.txt" \
-    --sample-pass-file-path "$GRAPH_NET_ROOT/torch/sample_pass/dtype_generalizer.py" \
+    --use-subprocess \
+    --model-path-list $model_list \
+    --sample-pass-file-path "$GRAPH_NET_ROOT/graph_net/torch/sample_pass/dtype_generalizer.py" \
     --sample-pass-class-name InitDataTypeGeneralizationPasses \
     --sample-pass-config $(base64 -w 0 <<EOF
 {
     "dtype_list": ["float16", "bfloat16"],
-    "model_path_prefix": "$GRAPHNET_ROOT",
+    "model_path_prefix": "$model_path_prefix",
     "output_dir": "$OUTPUT_DIR",
     "resume": true,
-    "limits_handled_models": null
+    "limits_handled_models": 10
 }
 EOF
-) 
+)
 
 # Step 2: Apply passes to generate samples
 python3 -m graph_net.apply_sample_pass \
-    --model-path-list "graph_net/config/small100_torch_samples_list.txt" \
-    --sample-pass-file-path "$GRAPH_NET_ROOT/torch/sample_pass/dtype_generalizer.py" \
+    --use-subprocess \
+    --model-path-list $model_list \
+    --sample-pass-file-path "$GRAPH_NET_ROOT/graph_net/torch/sample_pass/dtype_generalizer.py" \
     --sample-pass-class-name ApplyDataTypeGeneralizationPasses \
     --sample-pass-config $(base64 -w 0 <<EOF
 {
     "output_dir": "$OUTPUT_DIR",
-    "model_path_prefix": "$GRAPHNET_ROOT",
-    "model_runnable_predicator_filepath": "$GRAPH_NET_ROOT/torch/constraint_util.py",
-    "resume": true,
-    "limits_handled_models": null,
+    "model_path_prefix": "$model_path_prefix",
+    "model_runnable_predicator_filepath": "$GRAPH_NET_ROOT/graph_net/torch/constraint_util.py",
+    "device": "cuda",
+    "resume": false,
+    "limits_handled_models": 10,
     "try_run": true
 }
 EOF
-)
-
-
+)
diff --git a/graph_net/tools/generate_subgraph_dataset.sh b/graph_net/tools/generate_subgraph_dataset.sh
@@ -26,18 +26,26 @@ GROUPED_FUSIBLE_SUBGRAPH_RANGES_DIR=$DECOMPOSE_WORKSPACE/10_grouped_fusible_subg
 SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/11_dimension_generalized_fusible_subgraphs
 RENAMED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/12_renamed_dimension_generalized_fusible_subgraphs
 DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/13_deduplicated_dimension_generalized_fusible_subgraphs
-UNITTESTS_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/14_kernelbench_unittests
-
-DB_PATH=$DECOMPOSE_WORKSPACE/small100_torch_samples.db
+DTYPE_GENERALIZED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/14_dtype_generalized_fusible_subgraphs
+UNITTESTS_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/15_kernelbench_unittests
 
 mkdir -p "$DECOMPOSE_WORKSPACE"
 
-model_list="$GRAPH_NET_ROOT/graph_net/config/small100_torch_samples_list.txt" 
+model_list="$GRAPH_NET_ROOT/graph_net/config/small100_torch_samples_list.txt"
+DB_PATH=$DECOMPOSE_WORKSPACE/small100_torch_samples.db
+
 device_rewrited_sample_list=${DECOMPOSE_WORKSPACE}/device_rewrited_sample_list.txt
 range_decomposed_subgraph_list=${DECOMPOSE_WORKSPACE}/range_decomposed_subgraph_sample_list.txt
 deduplicated_subgraph_list=${DECOMPOSE_WORKSPACE}/deduplicated_subgraph_sample_list.txt
 dimension_generalized_subgraph_list=${DECOMPOSE_WORKSPACE}/dimension_generalized_subgraph_sample_list.txt
 deduplicated_fusible_subgraphs_list=${DECOMPOSE_WORKSPACE}/deduplicated_dimension_generalized_subgraph_sample_list.txt
+dtype_generalized_subgraphs_list=${DECOMPOSE_WORKSPACE}/dtype_generalized_subgraphs_sample_list.txt
+
+if [[ "$model_list" == *"/torch_samples_list.txt" ]]; then
+    USE_SUBPROCESS_ARGS="--use-subprocess"
+else
+    USE_SUBPROCESS_ARGS=""
+fi
 
 function generate_generalized_subgraph_list() {
     local target_dir="$1"
@@ -63,7 +71,7 @@ function generate_subgraph_list() {
         | tee $sample_list
 }
 
-function grpahsample_insert(){
+function insert_graph_sample(){
     local target_dir="$1"
     local repo_uid="$2"
     local sample_type="$3"
@@ -115,7 +123,7 @@ EOF
 function dimension_generalizer(){
     echo ">>> [2] Apply dimension generalization for samples under ${device_rewrited_sample_list}."
     echo ">>>"
-    python3 -m graph_net.apply_sample_pass \
+    python3 -m graph_net.apply_sample_pass ${USE_SUBPROCESS_ARGS} \
         --model-path-list $device_rewrited_sample_list \
         --sample-pass-file-path "$GRAPH_NET_ROOT/graph_net/dimension_generalizer.py" \
         --sample-pass-class-name "ApplyDimGenPasses" \
@@ -135,7 +143,7 @@ EOF
 function generate_op_names() {
     echo ">>> [3] Generate op_names.txt for samples in ${model_list}."
     echo ">>>"
-    python3 -m graph_net.model_path_handler \
+    python3 -m graph_net.model_path_handler ${USE_SUBPROCESS_ARGS} \
         --model-path-list $model_list \
         --handler-config=$(base64 -w 0 <<EOF
 {
@@ -181,7 +189,7 @@ EOF
 function range_decompose() {
     echo ">>> [5] Decompose according to subgraph_ranges.json for samples in ${device_rewrited_sample_list}."
     echo ">>>"
-    python3 -m graph_net.model_path_handler \
+    python3 -m graph_net.model_path_handler ${USE_SUBPROCESS_ARGS} \
         --model-path-list "$device_rewrited_sample_list" \
         --handler-config=$(base64 -w 0 <<EOF
 {
@@ -295,7 +303,7 @@ function subgraph_dimension_generalizer(){
         echo ">>> Generating dimension generalized subgraph variant index: ${index}"
         dimension_generalized_sample_list="${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index}/dimension_generalized_sample_list.txt"
         generate_subgraph_list ${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index} ${dimension_generalized_sample_list}
-        python3 -m graph_net.model_path_handler \
+        python3 -m graph_net.model_path_handler ${USE_SUBPROCESS_ARGS} \
             --model-path-list "${dimension_generalized_sample_list}" \
             --handler-config $(base64 -w 0 <<EOF
 {
@@ -350,18 +358,39 @@ function remove_duplicate_dimension_generalized_fusible_graphs() {
     done
 }
 
+function dtype_generalizer() {
+    echo ">>> [12] Data type generalizer for samples under ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}."
+    echo ">>>"
+    python3 -m graph_net.apply_sample_pass \
+        --use-subprocess \
+        --model-path-list $deduplicated_fusible_subgraphs_list \
+        --sample-pass-file-path "$GRAPH_NET_ROOT/graph_net/torch/sample_pass/dtype_generalizer.py" \
+        --sample-pass-class-name ApplyDataTypeGeneralizationPasses \
+        --sample-pass-config $(base64 -w 0 <<EOF
+{
+    "output_dir": "$DTYPE_GENERALIZED_OUTPUT_DIR",
+    "model_path_prefix": "$DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR",
+    "model_runnable_predicator_filepath": "$GRAPH_NET_ROOT/graph_net/torch/constraint_util.py",
+    "try_run": false,
+    "device": "cuda",
+    "resume": ${RESUME}
+}
+EOF
+)
+}
+
 function generate_unittests() {
-    echo ">>> [12] Generate unittests for subgraph samples under ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}. "
+    echo ">>> [13] Generate unittests for subgraph samples under ${DTYPE_GENERALIZED_OUTPUT_DIR}. "
     echo ">>>"
     python3 -m graph_net.model_path_handler \
-        --model-path-list ${deduplicated_fusible_subgraphs_list} \
+        --model-path-list ${dtype_generalized_subgraphs_list} \
         --handler-config=$(base64 -w 0 <<EOF
 {
     "handler_path": "${GRAPH_NET_ROOT}/graph_net/sample_pass/agent_unittest_generator.py",
     "handler_class_name": "AgentUnittestGeneratorPass",
     "handler_config": {
         "framework": "torch",
-        "model_path_prefix": "${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}",
+        "model_path_prefix": "${DTYPE_GENERALIZED_OUTPUT_DIR}",
         "output_dir": "${UNITTESTS_OUTPUT_DIR}",
         "device": "cuda",
         "generate_main": false,
@@ -380,8 +409,8 @@ main() {
     suffix="${OP_RANGE}ops_${timestamp}"
 
     # init database
-    python ./sqlite/init_db.py --db_path ${DB_PATH} 2>&1 | tee sqlite/logs/init_db_$(date +"%Y%m%d_%H%M%S").log
-    grpahsample_insert ${GRAPH_NET_ROOT} "github_torch_samples" "full_graph" ${model_list}
+    python ${GRAPH_NET_ROOT}/sqlite/init_db.py --db_path ${DB_PATH} 2>&1 | tee sqlite/logs/init_db_${timestamp}.log
+    insert_graph_sample ${GRAPH_NET_ROOT} "github_torch_samples" "full_graph" ${model_list}
 
     # rewrite the device in model to cuda
     rewrite_device 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rewrite_device_${suffix}.txt
@@ -395,26 +424,121 @@ main() {
     generate_split_point 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_split_point_${suffix}.txt
     range_decompose 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_range_decompose_${suffix}.txt
     generate_subgraph_list ${RANGE_DECOMPOSE_OUTPUT_DIR} ${range_decomposed_subgraph_list}
-
+    
     rename_decomposed_subgraph 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rename_decomposed_subgraph_${suffix}.txt
     remove_duplicate_renamed_graphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_renamed_graphs_${suffix}.txt
     generate_subgraph_list ${DEDUPLICATED_OUTPUT_DIR} ${deduplicated_subgraph_list}
-    grpahsample_insert ${DEDUPLICATED_OUTPUT_DIR} "github_torch_samples" "typical_graph" ${deduplicated_subgraph_list}
+    insert_graph_sample ${DEDUPLICATED_OUTPUT_DIR} "github_torch_samples" "typical_graph" ${deduplicated_subgraph_list}
 
     # generate fusible subgraph ranges
     gen_fusible_subgraph_ranges 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_fusible_subgraphs_${suffix}.txt
 
     # subgraph dimension generalization
     subgraph_dimension_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_subgraph_dimension_generalizer_${suffix}.txt
     generate_generalized_subgraph_list ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR} ${dimension_generalized_subgraph_list}
-
+    
     rename_dimension_generalized_fusible_subgraph 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rename_dimension_generalized_subgraph_${suffix}.txt
     remove_duplicate_dimension_generalized_fusible_graphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_dimension_generalized_subgraphs_${suffix}.txt
     generate_generalized_subgraph_list ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} ${deduplicated_fusible_subgraphs_list}
-    grpahsample_insert ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} "github_torch_samples" "fusible_graph" ${deduplicated_fusible_subgraphs_list}
+
+    # dtype generalization
+    dtype_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_dtype_generalizer_${suffix}.txt
+    generate_generalized_subgraph_list ${DTYPE_GENERALIZED_OUTPUT_DIR} ${dtype_generalized_subgraphs_list}
+    insert_graph_sample ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} "github_torch_samples" "fusible_graph" ${dtype_generalized_subgraphs_list}
 
     # generate kernelbench format unittest
     generate_unittests 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_unittests_${suffix}.txt
 }
 
+summary() {
+    num_original_samples=`cat $model_list | grep "^samples/" | wc -l`
+    echo "Number of original graphnet samples: $num_original_samples"
+
+    num_device_rewrited_samples=`find ${DEVICE_REWRITED_OUTPUT_DIR} -name "model.py" | wc -l`
+    device_rewrited_successed_precent=$(( num_device_rewrited_samples * 100 / num_original_samples ))
+    echo "- [Step  1] device rewrite: successed=${num_device_rewrited_samples}, percent=$device_rewrited_successed_precent%"
+
+    num_successed_dimension_generalized_samples=`find ${DIMENSION_GENERALIZED_OUTPUT_DIR} -name "model.py" | wc -l`
+    dimension_generalized_samples_successed_percent=$((num_successed_dimension_generalized_samples * 100 / (num_original_samples * 9)))
+    echo "- [Step  2] dimension generalization: successed=${num_successed_dimension_generalized_samples}, percent=${dimension_generalized_samples_successed_percent}%"
+    for index in {0..8}; do
+        num_successed_dimension_generalized_samples=`find ${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index} -name "model.py" | wc -l`
+        dimension_generalized_samples_successed_percent=$(( num_successed_dimension_generalized_samples * 100 / num_original_samples ))
+        echo "    ${index}, successed=${num_successed_dimension_generalized_samples}, percent=${dimension_generalized_samples_successed_percent}%"
+    done
+    echo ""
+
+    num_successed_op_names=`find ${OP_NAMES_OUTPUT_DIR} -name op_names.txt | wc -l`
+    op_names_successed_percent=$(( num_successed_op_names * 100 / num_original_samples ))
+    echo "- [Step  3] generate op names: successed=${num_successed_op_names}, percent=${op_names_successed_percent}%"
+
+    num_typical_subgraph_ranges=`find ${SUBGRAPH_RANGES_JSON_ROOT} -name typical_subgraph_ranges.json | wc -l`
+    typical_subgraph_ranges_successed_percent=$(( num_typical_subgraph_ranges * 100 / num_original_samples ))
+    echo "- [Step  4] generate typical subgraph ranges: successed=${num_typical_subgraph_ranges}, percent=${typical_subgraph_ranges_successed_percent}%"
+
+    num_successed_range_decomposed_subgraphs=`find ${RANGE_DECOMPOSE_OUTPUT_DIR} -name "model.py" | wc -l`
+    echo "- [Step  5] range decompose: successed=${num_successed_range_decomposed_subgraphs}"
+
+    num_renamed_subgraphs=`find ${GRAPH_VAR_RENAME_OUTPUT_DIR} -name "model.py" | wc -l`
+    echo "- [Step  6] rename: successed=${num_renamed_subgraphs}"
+
+    num_deduplicated_subgraphs=`find ${DEDUPLICATED_OUTPUT_DIR} -name "model.py" | wc -l`
+    echo "- [Step  7] remove duplicated: successed=${num_deduplicated_subgraphs}"
+
+    num_successed_cumsum_kernels_subgraphs=`find ${CUMSUM_NUM_KERNELS_DIR} -name "cumsum_num_kernels.json" | wc -l`
+    cumsum_kernels_successed_percent=$((num_successed_cumsum_kernels_subgraphs * 100 / num_deduplicated_subgraphs))
+    echo "- [Step  8] cumsum kernels: successed=${num_successed_cumsum_kernels_subgraphs}, percent=${cumsum_kernels_successed_percent}%"
+
+    num_fusible_subgraph_ranges=`find ${FUSIBLE_SUBGRAPH_RANGES_DIR} -name "fusible_subgraph_ranges.json" | wc -l`
+    num_grouped_fusible_subgraph_ranges=`find ${GROUPED_FUSIBLE_SUBGRAPH_RANGES_DIR} -name "grouped_fusible_subgraph_ranges.json" | wc -l`
+    echo "    fusible subgraph ranges: successed=${num_fusible_subgraph_ranges}"
+    echo "    grouped fusible subgraph ranges: successed=${num_grouped_fusible_subgraph_ranges}"
+    echo ""
+
+    num_successed_dimension_generalized_subgraphs=`find ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR} -name "model.py" | wc -l`
+    echo "- [Step  9] subgraph dimension generalization: successed=${num_successed_dimension_generalized_subgraphs}"
+    for index in {0..8}; do
+        num_successed_dimension_generalized_subgraphs=`find ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR}/${index} -name "model.py" | wc -l`
+        echo "    ${index}, successed=${num_successed_dimension_generalized_subgraphs}"
+    done
+    echo ""
+
+    num_renamed_fusible_subgraphs=`find ${RENAMED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} -name "model.py" | wc -l`
+    echo "- [Step 10] rename: successed=${num_renamed_fusible_subgraphs}"
+    for index in {0..8}; do
+        num_renamed_fusible_subgraphs_index=`find ${RENAMED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}/${index} -name "model.py" | wc -l`
+        echo "    ${index}, successed=${num_renamed_fusible_subgraphs_index}"
+    done
+    echo ""
+
+    num_deduplicated_fusible_subgraphs=`find ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} -name "model.py" | wc -l`
+    echo "- [Step 11] remove duplicated: successed=${num_deduplicated_fusible_subgraphs}"
+    for index in {0..8}; do
+        num_deduplicated_fusible_subgraphs_index=`find ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}/${index} -name "model.py" | wc -l`
+        echo "    ${index}, successed=${num_deduplicated_fusible_subgraphs_index}"
+    done
+    echo ""
+
+    num_dtype_generalized_subgraphs=`find ${DTYPE_GENERALIZED_OUTPUT_DIR} -name "model.py" | wc -l`
+    echo "- [Step 12] dtype generalization: successed=${num_dtype_generalized_subgraphs}"
+    for dtype in float32 float16 bfloat16
+    do
+        num_dtype_generalized_subgraphs_index=`find ${DTYPE_GENERALIZED_OUTPUT_DIR}/${dtype} -name "model.py" | wc -l`
+        echo "    ${dtype}, successed=${num_dtype_generalized_subgraphs_index}"
+    done
+    echo ""
+
+    num_successed_unittests=`find ${UNITTESTS_OUTPUT_DIR} -name "*_test.py" | wc -l`
+    unittest_successed_percent=$((num_successed_unittests * 100 / num_dtype_generalized_subgraphs))
+    echo "- [Step 13] generate unittest: successed=${num_successed_unittests}, percent=${unittest_successed_percent}%"
+    for dtype in float32 float16 bfloat16
+    do
+        num_successed_unittests=`find ${UNITTESTS_OUTPUT_DIR}/${dtype} -name "*_test.py" | wc -l`
+        echo "    ${dtype}, successed=${num_successed_unittests}"
+    done
+}
+
 main
+
+set +x
+summary 2>&1 | tee ${DECOMPOSE_WORKSPACE}/summary.txt