diff --git a/graph_net/sample_pass/resumable_sample_pass_mixin.py b/graph_net/sample_pass/resumable_sample_pass_mixin.py index 804005988..7ffb4b2d6 100644 --- a/graph_net/sample_pass/resumable_sample_pass_mixin.py +++ b/graph_net/sample_pass/resumable_sample_pass_mixin.py @@ -45,7 +45,7 @@ def resumable_handle_sample(self, rel_model_path: str): self._inc_num_handled_models_or_exit() def _inc_num_handled_models_or_exit(self): - if self.config["limits_handled_models"] is None: + if self.config.get("limits_handled_models", None) is None: return self.num_handled_models += 1 if self.num_handled_models >= self.config["limits_handled_models"]: diff --git a/graph_net/test/dtype_gen_test.sh b/graph_net/test/dtype_gen_test.sh index 6a9cd8d82..c3e203781 100755 --- a/graph_net/test/dtype_gen_test.sh +++ b/graph_net/test/dtype_gen_test.sh @@ -1,42 +1,45 @@ #!/bin/bash -GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print( -os.path.dirname(graph_net.__file__))") -GRAPHNET_ROOT="$GRAPH_NET_ROOT/../" +GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print(os.path.dirname(os.path.dirname(graph_net.__file__)))") OUTPUT_DIR="/tmp/dtype_gen_samples" -mkdir -p "$OUTPUT_DIR" + +mkdir -p $OUTPUT_DIR + +model_list=${GRAPH_NET_ROOT}/graph_net/config/small10_torch_samples_list.txt +model_path_prefix="${GRAPH_NET_ROOT}" # Step 1: Initialize dtype generalization passes (samples of torchvision) python3 -m graph_net.apply_sample_pass \ - --model-path-list "graph_net/config/small100_torch_samples_list.txt" \ - --sample-pass-file-path "$GRAPH_NET_ROOT/torch/sample_pass/dtype_generalizer.py" \ + --use-subprocess \ + --model-path-list $model_list \ + --sample-pass-file-path "$GRAPH_NET_ROOT/graph_net/torch/sample_pass/dtype_generalizer.py" \ --sample-pass-class-name InitDataTypeGeneralizationPasses \ --sample-pass-config $(base64 -w 0 <>> [2] Apply dimension generalization for samples under ${device_rewrited_sample_list}." echo ">>>" - python3 -m graph_net.apply_sample_pass \ + python3 -m graph_net.apply_sample_pass ${USE_SUBPROCESS_ARGS} \ --model-path-list $device_rewrited_sample_list \ --sample-pass-file-path "$GRAPH_NET_ROOT/graph_net/dimension_generalizer.py" \ --sample-pass-class-name "ApplyDimGenPasses" \ @@ -135,7 +143,7 @@ EOF function generate_op_names() { echo ">>> [3] Generate op_names.txt for samples in ${model_list}." echo ">>>" - python3 -m graph_net.model_path_handler \ + python3 -m graph_net.model_path_handler ${USE_SUBPROCESS_ARGS} \ --model-path-list $model_list \ --handler-config=$(base64 -w 0 <>> [5] Decompose according to subgraph_ranges.json for samples in ${device_rewrited_sample_list}." echo ">>>" - python3 -m graph_net.model_path_handler \ + python3 -m graph_net.model_path_handler ${USE_SUBPROCESS_ARGS} \ --model-path-list "$device_rewrited_sample_list" \ --handler-config=$(base64 -w 0 <>> Generating dimension generalized subgraph variant index: ${index}" dimension_generalized_sample_list="${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index}/dimension_generalized_sample_list.txt" generate_subgraph_list ${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index} ${dimension_generalized_sample_list} - python3 -m graph_net.model_path_handler \ + python3 -m graph_net.model_path_handler ${USE_SUBPROCESS_ARGS} \ --model-path-list "${dimension_generalized_sample_list}" \ --handler-config $(base64 -w 0 <>> [12] Data type generalizer for samples under ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}." + echo ">>>" + python3 -m graph_net.apply_sample_pass \ + --use-subprocess \ + --model-path-list $deduplicated_fusible_subgraphs_list \ + --sample-pass-file-path "$GRAPH_NET_ROOT/graph_net/torch/sample_pass/dtype_generalizer.py" \ + --sample-pass-class-name ApplyDataTypeGeneralizationPasses \ + --sample-pass-config $(base64 -w 0 <>> [12] Generate unittests for subgraph samples under ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}. " + echo ">>> [13] Generate unittests for subgraph samples under ${DTYPE_GENERALIZED_OUTPUT_DIR}. " echo ">>>" python3 -m graph_net.model_path_handler \ - --model-path-list ${deduplicated_fusible_subgraphs_list} \ + --model-path-list ${dtype_generalized_subgraphs_list} \ --handler-config=$(base64 -w 0 <&1 | tee sqlite/logs/init_db_$(date +"%Y%m%d_%H%M%S").log - grpahsample_insert ${GRAPH_NET_ROOT} "github_torch_samples" "full_graph" ${model_list} + python ${GRAPH_NET_ROOT}/sqlite/init_db.py --db_path ${DB_PATH} 2>&1 | tee sqlite/logs/init_db_${timestamp}.log + insert_graph_sample ${GRAPH_NET_ROOT} "github_torch_samples" "full_graph" ${model_list} # rewrite the device in model to cuda rewrite_device 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rewrite_device_${suffix}.txt @@ -395,11 +424,11 @@ main() { generate_split_point 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_split_point_${suffix}.txt range_decompose 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_range_decompose_${suffix}.txt generate_subgraph_list ${RANGE_DECOMPOSE_OUTPUT_DIR} ${range_decomposed_subgraph_list} - + rename_decomposed_subgraph 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rename_decomposed_subgraph_${suffix}.txt remove_duplicate_renamed_graphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_renamed_graphs_${suffix}.txt generate_subgraph_list ${DEDUPLICATED_OUTPUT_DIR} ${deduplicated_subgraph_list} - grpahsample_insert ${DEDUPLICATED_OUTPUT_DIR} "github_torch_samples" "typical_graph" ${deduplicated_subgraph_list} + insert_graph_sample ${DEDUPLICATED_OUTPUT_DIR} "github_torch_samples" "typical_graph" ${deduplicated_subgraph_list} # generate fusible subgraph ranges gen_fusible_subgraph_ranges 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_fusible_subgraphs_${suffix}.txt @@ -407,14 +436,109 @@ main() { # subgraph dimension generalization subgraph_dimension_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_subgraph_dimension_generalizer_${suffix}.txt generate_generalized_subgraph_list ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR} ${dimension_generalized_subgraph_list} - + rename_dimension_generalized_fusible_subgraph 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_rename_dimension_generalized_subgraph_${suffix}.txt remove_duplicate_dimension_generalized_fusible_graphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_dimension_generalized_subgraphs_${suffix}.txt generate_generalized_subgraph_list ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} ${deduplicated_fusible_subgraphs_list} - grpahsample_insert ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} "github_torch_samples" "fusible_graph" ${deduplicated_fusible_subgraphs_list} + + # dtype generalization + dtype_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_dtype_generalizer_${suffix}.txt + generate_generalized_subgraph_list ${DTYPE_GENERALIZED_OUTPUT_DIR} ${dtype_generalized_subgraphs_list} + insert_graph_sample ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} "github_torch_samples" "fusible_graph" ${dtype_generalized_subgraphs_list} # generate kernelbench format unittest generate_unittests 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_unittests_${suffix}.txt } +summary() { + num_original_samples=`cat $model_list | grep "^samples/" | wc -l` + echo "Number of original graphnet samples: $num_original_samples" + + num_device_rewrited_samples=`find ${DEVICE_REWRITED_OUTPUT_DIR} -name "model.py" | wc -l` + device_rewrited_successed_precent=$(( num_device_rewrited_samples * 100 / num_original_samples )) + echo "- [Step 1] device rewrite: successed=${num_device_rewrited_samples}, percent=$device_rewrited_successed_precent%" + + num_successed_dimension_generalized_samples=`find ${DIMENSION_GENERALIZED_OUTPUT_DIR} -name "model.py" | wc -l` + dimension_generalized_samples_successed_percent=$((num_successed_dimension_generalized_samples * 100 / (num_original_samples * 9))) + echo "- [Step 2] dimension generalization: successed=${num_successed_dimension_generalized_samples}, percent=${dimension_generalized_samples_successed_percent}%" + for index in {0..8}; do + num_successed_dimension_generalized_samples=`find ${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index} -name "model.py" | wc -l` + dimension_generalized_samples_successed_percent=$(( num_successed_dimension_generalized_samples * 100 / num_original_samples )) + echo " ${index}, successed=${num_successed_dimension_generalized_samples}, percent=${dimension_generalized_samples_successed_percent}%" + done + echo "" + + num_successed_op_names=`find ${OP_NAMES_OUTPUT_DIR} -name op_names.txt | wc -l` + op_names_successed_percent=$(( num_successed_op_names * 100 / num_original_samples )) + echo "- [Step 3] generate op names: successed=${num_successed_op_names}, percent=${op_names_successed_percent}%" + + num_typical_subgraph_ranges=`find ${SUBGRAPH_RANGES_JSON_ROOT} -name typical_subgraph_ranges.json | wc -l` + typical_subgraph_ranges_successed_percent=$(( num_typical_subgraph_ranges * 100 / num_original_samples )) + echo "- [Step 4] generate typical subgraph ranges: successed=${num_typical_subgraph_ranges}, percent=${typical_subgraph_ranges_successed_percent}%" + + num_successed_range_decomposed_subgraphs=`find ${RANGE_DECOMPOSE_OUTPUT_DIR} -name "model.py" | wc -l` + echo "- [Step 5] range decompose: successed=${num_successed_range_decomposed_subgraphs}" + + num_renamed_subgraphs=`find ${GRAPH_VAR_RENAME_OUTPUT_DIR} -name "model.py" | wc -l` + echo "- [Step 6] rename: successed=${num_renamed_subgraphs}" + + num_deduplicated_subgraphs=`find ${DEDUPLICATED_OUTPUT_DIR} -name "model.py" | wc -l` + echo "- [Step 7] remove duplicated: successed=${num_deduplicated_subgraphs}" + + num_successed_cumsum_kernels_subgraphs=`find ${CUMSUM_NUM_KERNELS_DIR} -name "cumsum_num_kernels.json" | wc -l` + cumsum_kernels_successed_percent=$((num_successed_cumsum_kernels_subgraphs * 100 / num_deduplicated_subgraphs)) + echo "- [Step 8] cumsum kernels: successed=${num_successed_cumsum_kernels_subgraphs}, percent=${cumsum_kernels_successed_percent}%" + + num_fusible_subgraph_ranges=`find ${FUSIBLE_SUBGRAPH_RANGES_DIR} -name "fusible_subgraph_ranges.json" | wc -l` + num_grouped_fusible_subgraph_ranges=`find ${GROUPED_FUSIBLE_SUBGRAPH_RANGES_DIR} -name "grouped_fusible_subgraph_ranges.json" | wc -l` + echo " fusible subgraph ranges: successed=${num_fusible_subgraph_ranges}" + echo " grouped fusible subgraph ranges: successed=${num_grouped_fusible_subgraph_ranges}" + echo "" + + num_successed_dimension_generalized_subgraphs=`find ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR} -name "model.py" | wc -l` + echo "- [Step 9] subgraph dimension generalization: successed=${num_successed_dimension_generalized_subgraphs}" + for index in {0..8}; do + num_successed_dimension_generalized_subgraphs=`find ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR}/${index} -name "model.py" | wc -l` + echo " ${index}, successed=${num_successed_dimension_generalized_subgraphs}" + done + echo "" + + num_renamed_fusible_subgraphs=`find ${RENAMED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} -name "model.py" | wc -l` + echo "- [Step 10] rename: successed=${num_renamed_fusible_subgraphs}" + for index in {0..8}; do + num_renamed_fusible_subgraphs_index=`find ${RENAMED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}/${index} -name "model.py" | wc -l` + echo " ${index}, successed=${num_renamed_fusible_subgraphs_index}" + done + echo "" + + num_deduplicated_fusible_subgraphs=`find ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} -name "model.py" | wc -l` + echo "- [Step 11] remove duplicated: successed=${num_deduplicated_fusible_subgraphs}" + for index in {0..8}; do + num_deduplicated_fusible_subgraphs_index=`find ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}/${index} -name "model.py" | wc -l` + echo " ${index}, successed=${num_deduplicated_fusible_subgraphs_index}" + done + echo "" + + num_dtype_generalized_subgraphs=`find ${DTYPE_GENERALIZED_OUTPUT_DIR} -name "model.py" | wc -l` + echo "- [Step 12] dtype generalization: successed=${num_dtype_generalized_subgraphs}" + for dtype in float32 float16 bfloat16 + do + num_dtype_generalized_subgraphs_index=`find ${DTYPE_GENERALIZED_OUTPUT_DIR}/${dtype} -name "model.py" | wc -l` + echo " ${dtype}, successed=${num_dtype_generalized_subgraphs_index}" + done + echo "" + + num_successed_unittests=`find ${UNITTESTS_OUTPUT_DIR} -name "*_test.py" | wc -l` + unittest_successed_percent=$((num_successed_unittests * 100 / num_dtype_generalized_subgraphs)) + echo "- [Step 13] generate unittest: successed=${num_successed_unittests}, percent=${unittest_successed_percent}%" + for dtype in float32 float16 bfloat16 + do + num_successed_unittests=`find ${UNITTESTS_OUTPUT_DIR}/${dtype} -name "*_test.py" | wc -l` + echo " ${dtype}, successed=${num_successed_unittests}" + done +} + main + +set +x +summary 2>&1 | tee ${DECOMPOSE_WORKSPACE}/summary.txt diff --git a/graph_net/torch/sample_pass/dtype_generalizer.py b/graph_net/torch/sample_pass/dtype_generalizer.py index 48c06803a..9ce86ab2f 100644 --- a/graph_net/torch/sample_pass/dtype_generalizer.py +++ b/graph_net/torch/sample_pass/dtype_generalizer.py @@ -17,6 +17,7 @@ from pathlib import Path from typing import Any, Dict, List +import torch import torch.fx as fx from graph_net.graph_net_json_file_util import ( @@ -27,9 +28,6 @@ update_json, ) from graph_net.torch.constraint_util import RunModelPredicator -from graph_net.torch.fx_graph_cache_util import ( - parse_immutable_model_path_into_sole_graph_module, -) from graph_net.torch.fx_graph_serialize_util import serialize_graph_module_to_str from graph_net.torch.dtype_gen_passes.pass_mgr import get_dtype_generalization_pass from graph_net.torch import utils @@ -107,7 +105,7 @@ def sample_handled(self, rel_model_path: str) -> bool: def __call__(self, model_path: str) -> None: self.resumable_handle_sample(model_path) - def resume(self, model_path: str) -> None: + def resume(self, rel_model_path: str) -> None: """ Initialize dtype passes for the given model. @@ -115,8 +113,7 @@ def resume(self, model_path: str) -> None: model_path: Path to the model directory (may be relative to model_path_prefix) """ # Apply model_path_prefix if provided - if self.model_path_prefix: - model_path = str(Path(self.model_path_prefix) / model_path) + model_path = str(Path(self.model_path_prefix) / rel_model_path) # Parse the computation graph module, inputs = get_torch_module_and_inputs(model_path) @@ -236,9 +233,9 @@ class ApplyDataTypeGeneralizationPasses(SamplePass, ResumableSamplePassMixin): "output_dir": "/path/to/output", "model_path_prefix": "", "model_runnable_predicator_filepath": "...", - "resume": , - "limits_handled_models": , - "try_run": , + "resume": true, + "limits_handled_models": null, + "try_run": true, } """ @@ -268,6 +265,7 @@ def declare_config( output_dir: str, model_path_prefix: str, model_runnable_predicator_filepath: str, + device: str = "auto", resume: bool = False, limits_handled_models: int = None, try_run: bool = True, @@ -281,6 +279,13 @@ def _make_model_runnable_predicator(self, config: Dict[str, Any]): predicator_config = self.model_runnable_predicator_config return cls(predicator_config) + def _choose_device(self, device) -> str: + if device is None: + return None + if device in ["cpu", "cuda"]: + return device + return "cuda" if torch.cuda.is_available() else "cpu" + def sample_handled(self, rel_model_path: str) -> bool: model_path = Path(self.config["model_path_prefix"]) / rel_model_path dtype_pass_names = self._read_dtype_pass_names(model_path) @@ -309,30 +314,27 @@ def resume(self, rel_model_path: str) -> List[str]: List of generated sample directories """ # Apply model_path_prefix if provided - if self.model_path_prefix: - abs_model_path = str(Path(self.model_path_prefix) / rel_model_path) + model_path = str(Path(self.model_path_prefix) / rel_model_path) # Read pass names from graph_net.json - dtype_pass_names = self._read_dtype_pass_names(abs_model_path) + dtype_pass_names = self._read_dtype_pass_names(model_path) + logging.info(f"Apply {dtype_pass_names=} for {rel_model_path=}") + + # Copy the original float32 sample + fp32_output_dir = self._get_output_dir(rel_model_path, "float32") + self._copy_sample(rel_model_path, fp32_output_dir) if not dtype_pass_names: - logging.warning(f"No dtype passes found in {abs_model_path}/graph_net.json") + logging.warning(f"No dtype passes found in {model_path}/graph_net.json") return [] # Parse the computation graph - traced_model = parse_immutable_model_path_into_sole_graph_module(abs_model_path) - - # Copy the originl sample - files_copied = [ - "model.py", - "graph_hash.txt", - "graph_net.json", - "weight_meta.py", - "input_meta.py", - "input_tensor_constraints.py", - "subgraph_sources.json", - ] - self._copy_sample_files(rel_model_path, "float32", files_copied) + module, inputs = get_torch_module_and_inputs( + model_path, device=self._choose_device(self.config["device"]) + ) + traced_model = parse_sole_graph_module(module, inputs) + + ShapeProp(traced_model).propagate(*inputs) # Generate samples for each pass generated_samples = [] @@ -370,13 +372,13 @@ def _read_dtype_pass_names(self, model_path: str) -> List[str]: return metadata.get(kDataTypeGeneralizationPasses, []) def _apply_pass_and_generate( - self, model_path: str, traced_model: fx.GraphModule, pass_name: str + self, rel_model_path: str, traced_model: fx.GraphModule, pass_name: str ) -> str: """ Apply a specific pass and generate a new sample. Args: - model_path: Original model path + rel_model_path: Original model path traced_model: Original traced model pass_name: Name of the pass file (without .py extension), e.g., "dtype_generalization_pass_float16" @@ -402,42 +404,33 @@ def _apply_pass_and_generate( gm_modified = dtype_pass.rewrite(gm_copy) # Generate output directory - output_sample_dir = Path(self.output_dir) / dtype / model_path - output_sample_dir.mkdir(parents=True, exist_ok=True) + output_dir = self._get_output_dir(rel_model_path, dtype) + + # Copy metadata files of original sample + self._copy_sample(rel_model_path, output_dir) - # Write modified model.py + # Update model.py model_code = serialize_graph_module_to_str(gm_modified) - write_code = utils.apply_templates(model_code) - with open(output_sample_dir / "model.py", "w") as f: - f.write(write_code) + templated_model_code = utils.apply_templates(model_code) + (output_dir / "model.py").write_text(templated_model_code) - # Write modified graph_hash.txt + # Update graph_hash.txt model_hash = get_sha256_hash(model_code) - with open(output_sample_dir / "graph_hash.txt", "w") as f: - f.write(model_hash) - - # Copy metadata files - files_copied = [ - "graph_net.json", - "weight_meta.py", - "input_meta.py", - "input_tensor_constraints.py", - "subgraph_sources.json", - ] - self._copy_sample_files(model_path, dtype, files_copied) + (output_dir / "graph_hash.txt").write_text(model_hash) # Update graph_net.json with dtype information - self._update_sample_metadata(output_sample_dir, dtype) + self._update_sample_metadata(output_dir, dtype) # Validate generated sample (required - generated code must be runnable) if self.try_run: - if not self.model_runnable_predicator(str(output_sample_dir)): - raise RuntimeError( - f"Generated sample failed validation: {output_sample_dir}" - ) - logging.info(f"Generated sample validated: {output_sample_dir}") + if not self.model_runnable_predicator(str(output_dir)): + raise RuntimeError(f"Generated sample failed validation: {output_dir}") + logging.info(f"Generated sample validated: {output_dir}") - return str(output_sample_dir) + return str(output_dir) + + def _get_output_dir(self, rel_model_path: str, dtype: str): + return Path(self.output_dir) / dtype / rel_model_path def _update_sample_metadata(self, sample_dir: Path, dtype: str) -> None: """ @@ -452,24 +445,17 @@ def _update_sample_metadata(self, sample_dir: Path, dtype: str) -> None: update_json(graph_net_json_path, kDtypeGeneralizationPrecision, dtype) update_json(graph_net_json_path, kDtypeGeneralizationGenerated, True) - def _copy_sample_files( - self, rel_model_path: str, dtype: str, files_copied: list - ) -> None: + def _copy_sample(self, rel_model_path: str, output_dir: str) -> None: """ Copy files of sample. Args: rel_model_path: relative model path """ - # Generate output directory - output_sample_dir = Path(self.output_dir) / dtype / rel_model_path - output_sample_dir.mkdir(parents=True, exist_ok=True) - - # Copy files of original sample - for fname in files_copied: - src = Path(rel_model_path) / fname - if src.exists(): - shutil.copy(src, output_sample_dir / fname) + model_path = str(Path(self.model_path_prefix) / rel_model_path) + if not output_dir.exists(): + logging.info(f"Copy {model_path} -> {output_dir}") + shutil.copytree(model_path, output_dir) class MultiDtypeFilter: