diff --git a/.github/workflows/build_and_push_docker_image.yml b/.github/workflows/build_and_push_docker_image.yml index 5ff6591d82..ea7ac3b9e3 100644 --- a/.github/workflows/build_and_push_docker_image.yml +++ b/.github/workflows/build_and_push_docker_image.yml @@ -123,6 +123,7 @@ jobs: MODE=${{ inputs.build_mode }} WORKFLOW=${{ inputs.workflow }} PACKAGE_DIR=./src + TESTS_DIR=./tests JAX_VERSION=NONE LIBTPU_VERSION=NONE INCLUDE_TEST_ASSETS=true diff --git a/PREFLIGHT.md b/PREFLIGHT.md index 495e8d87fa..71f2d9e379 100644 --- a/PREFLIGHT.md +++ b/PREFLIGHT.md @@ -1,35 +1,35 @@ # Optimization 1: Multihost recommended network settings -We included all the recommended network settings in [rto_setup.sh](https://github.com/google/maxtext/blob/main/rto_setup.sh). +We included all the recommended network settings in [rto_setup.sh](https://github.com/google/maxtext/blob/main/src/dependencies/scripts/rto_setup.sh). -[preflight.sh](https://github.com/google/maxtext/blob/main/preflight.sh) will help you apply them based on GCE or GKE platform. +[preflight.sh](https://github.com/google/maxtext/blob/main/src/dependencies/scripts/preflight.sh) will help you apply them based on GCE or GKE platform. Before you run ML workload on Multihost with GCE or GKE, simply apply `bash preflight.sh PLATFORM=[GCE or GKE]` to leverage the best DCN network performance. Here is an example for GCE: ``` -bash preflight.sh PLATFORM=GCE && python3 -m maxtext.trainers.pre_train.train run_name=${YOUR_JOB_NAME?} +bash src/dependencies/scripts/preflight.sh PLATFORM=GCE && python3 -m maxtext.trainers.pre_train.train run_name=${YOUR_JOB_NAME?} ``` Here is an example for GKE: ``` -bash preflight.sh PLATFORM=GKE && python3 -m maxtext.trainers.pre_train.train run_name=${YOUR_JOB_NAME?} +bash src/dependencies/scripts/preflight.sh PLATFORM=GKE && python3 -m maxtext.trainers.pre_train.train run_name=${YOUR_JOB_NAME?} ``` # Optimization 2: Numa binding (You can only apply this to v4 and v5p) NUMA binding is recommended for enhanced performance, as it reduces memory latency and maximizes data throughput, ensuring that your high-performance applications operate more efficiently and effectively. For GCE, -[preflight.sh](https://github.com/google/maxtext/blob/main/preflight.sh) will help you install `numactl` dependency, so you can use it directly, here is an example: +[preflight.sh](https://github.com/google/maxtext/blob/main/src/dependencies/scripts/preflight.sh) will help you install `numactl` dependency, so you can use it directly, here is an example: ``` -bash preflight.sh PLATFORM=GCE && numactl --membind 0 --cpunodebind=0 python3 -m maxtext.trainers.pre_train.train run_name=${YOUR_JOB_NAME?} +bash src/dependencies/scripts/preflight.sh PLATFORM=GCE && numactl --membind 0 --cpunodebind=0 python3 -m maxtext.trainers.pre_train.train run_name=${YOUR_JOB_NAME?} ``` For GKE, `numactl` should be built into your docker image from [maxtext_tpu_dependencies.Dockerfile](https://github.com/google/maxtext/blob/main/src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile), so you can use it directly if you built the maxtext docker image. Here is an example ``` -bash preflight.sh PLATFORM=GKE && numactl --membind 0 --cpunodebind=0 python3 -m maxtext.trainers.pre_train.train run_name=${YOUR_JOB_NAME?} +bash src/dependencies/scripts/preflight.sh PLATFORM=GKE && numactl --membind 0 --cpunodebind=0 python3 -m maxtext.trainers.pre_train.train run_name=${YOUR_JOB_NAME?} ``` 1. `numactl`: This is the command-line tool used for controlling NUMA policy for processes or shared memory. It's particularly useful on multi-socket systems where memory locality can impact performance. diff --git a/src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile b/src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile index 6dc978005b..7a08541a86 100644 --- a/src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile +++ b/src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile @@ -41,6 +41,9 @@ ENV ENV_DEVICE=$DEVICE ARG PACKAGE_DIR ENV PACKAGE_DIR=$PACKAGE_DIR +ARG TESTS_DIR +ENV TESTS_DIR=$TESTS_DIR + ENV MAXTEXT_ASSETS_ROOT=/deps/src/maxtext/assets ENV MAXTEXT_TEST_ASSETS_ROOT=/deps/tests/assets ENV MAXTEXT_PKG_DIR=/deps/src/MaxText @@ -63,6 +66,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ # Now copy the remaining code (source files that may change frequently) COPY ${PACKAGE_DIR}/maxtext/ src/MaxText/ +COPY ${TESTS_DIR}*/ tests/ # Download test assets from GCS if building image with test assets ARG INCLUDE_TEST_ASSETS=false diff --git a/src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile b/src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile index f86f233090..81c3f2d8b2 100644 --- a/src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile +++ b/src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile @@ -38,6 +38,9 @@ ENV ENV_DEVICE=$DEVICE ARG PACKAGE_DIR ENV PACKAGE_DIR=$PACKAGE_DIR +ARG TESTS_DIR +ENV TESTS_DIR=$TESTS_DIR + ENV MAXTEXT_ASSETS_ROOT=/deps/src/maxtext/assets ENV MAXTEXT_TEST_ASSETS_ROOT=/deps/tests/assets ENV MAXTEXT_PKG_DIR=/deps/src/maxtext @@ -63,6 +66,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ # Now copy the remaining code (source files that may change frequently) COPY ${PACKAGE_DIR}/maxtext/ src/maxtext/ +COPY ${TESTS_DIR}*/ tests/ # Download test assets from GCS if building image with test assets ARG INCLUDE_TEST_ASSETS=false diff --git a/src/dependencies/scripts/docker_build_dependency_image.sh b/src/dependencies/scripts/docker_build_dependency_image.sh index 3705334014..30a896cc44 100644 --- a/src/dependencies/scripts/docker_build_dependency_image.sh +++ b/src/dependencies/scripts/docker_build_dependency_image.sh @@ -22,6 +22,8 @@ PACKAGE_DIR="${PACKAGE_DIR:-src}" echo "PACKAGE_DIR: $PACKAGE_DIR" +TESTS_DIR="${TESTS_DIR:-tests}" +echo "TESTS_DIR: $TESTS_DIR" # Enable "exit immediately if any command fails" option set -e @@ -71,6 +73,7 @@ docker_build_args=( "MODE=${MODE}" "JAX_VERSION=${JAX_VERSION}" "PACKAGE_DIR=${PACKAGE_DIR}" + "TESTS_DIR=${TESTS_DIR}" ) run_docker_build() { diff --git a/preflight.sh b/src/dependencies/scripts/preflight.sh similarity index 84% rename from preflight.sh rename to src/dependencies/scripts/preflight.sh index 1eb3025e9d..1832b2503c 100644 --- a/preflight.sh +++ b/src/dependencies/scripts/preflight.sh @@ -3,7 +3,7 @@ echo "Running preflight.sh" # Command Flags: # # Example to invoke this script: -# bash preflight.sh +# bash src/dependencies/scripts/preflight.sh # Warning: # For any dependencies, please add them into `setup.sh` or `maxtext_tpu_dependencies.Dockerfile`. @@ -24,11 +24,11 @@ if command -v sudo >/dev/null 2>&1; then echo "running rto_setup.sh with sudo" # apply network settings. - sudo bash rto_setup.sh + sudo bash src/dependencies/scripts/rto_setup.sh else # sudo is not available, run the script without sudo echo "running rto_setup.sh without sudo" # apply network settings. - bash rto_setup.sh + bash src/dependencies/scripts/rto_setup.sh fi \ No newline at end of file diff --git a/rto_setup.sh b/src/dependencies/scripts/rto_setup.sh similarity index 100% rename from rto_setup.sh rename to src/dependencies/scripts/rto_setup.sh diff --git a/src/maxtext/configs/README.md b/src/maxtext/configs/README.md index ec00acf759..da818a8385 100644 --- a/src/maxtext/configs/README.md +++ b/src/maxtext/configs/README.md @@ -19,7 +19,7 @@ This directory contains high performance model configurations for different gene These configurations do 3 things: * Sets various XLA compiler flags (see [below](/src/maxtext/configs#xla-flags-used-by-maxtext)) as `LIBTPU_INIT_ARGS` to optimize runtime performance. -* Runs [rto_setup.sh](https://github.com/google/maxtext/blob/main/rto_setup.sh) to optimize communication protocols for network performance. +* Runs [rto_setup.sh](https://github.com/google/maxtext/blob/main/src/dependencies/scripts/rto_setup.sh) to optimize communication protocols for network performance. (This only needs to be run once on each worker) * Runs [train.py](https://github.com/google/maxtext/blob/main/src/maxtext/trainers/pre_train/train.py) with specific hyper-parameters (batch size, etc.) diff --git a/src/maxtext/configs/experimental/1024b.sh b/src/maxtext/configs/experimental/1024b.sh index 131229509a..20c95d5ef1 100644 --- a/src/maxtext/configs/experimental/1024b.sh +++ b/src/maxtext/configs/experimental/1024b.sh @@ -15,7 +15,7 @@ for ARGUMENT in "$@"; do done # Use preflight.sh to set up env based on platform -bash preflight.sh PLATFORM=$PLATFORM +bash src/dependencies/scripts/preflight.sh PLATFORM=$PLATFORM # Train export LIBTPU_INIT_ARGS="--xla_tpu_megacore_fusion_allow_ags=false --xla_enable_async_collective_permute=true --xla_tpu_enable_ag_backward_pipelining=true --xla_tpu_enable_data_parallel_all_reduce_opt=true --xla_tpu_data_parallel_opt_different_sized_ops=true --xla_tpu_enable_async_collective_fusion=true --xla_tpu_enable_async_collective_fusion_multiple_steps=true --xla_tpu_overlap_compute_collective_tc=true --xla_enable_async_all_gather=true" diff --git a/src/maxtext/configs/experimental/128b.sh b/src/maxtext/configs/experimental/128b.sh index 23b4cc5366..6d23785d3b 100644 --- a/src/maxtext/configs/experimental/128b.sh +++ b/src/maxtext/configs/experimental/128b.sh @@ -15,7 +15,7 @@ for ARGUMENT in "$@"; do done # Use preflight.sh to set up env based on platform -bash preflight.sh PLATFORM=$PLATFORM +bash src/dependencies/scripts/preflight.sh PLATFORM=$PLATFORM # Train export LIBTPU_INIT_ARGS="--xla_tpu_megacore_fusion_allow_ags=false --xla_enable_async_collective_permute=true --xla_tpu_enable_ag_backward_pipelining=true --xla_tpu_enable_data_parallel_all_reduce_opt=true --xla_tpu_data_parallel_opt_different_sized_ops=true --xla_tpu_enable_async_collective_fusion=true --xla_tpu_enable_async_collective_fusion_multiple_steps=true --xla_tpu_overlap_compute_collective_tc=true --xla_enable_async_all_gather=true" diff --git a/src/maxtext/configs/experimental/256b.sh b/src/maxtext/configs/experimental/256b.sh index 3d6f661fd1..a56b925577 100644 --- a/src/maxtext/configs/experimental/256b.sh +++ b/src/maxtext/configs/experimental/256b.sh @@ -15,7 +15,7 @@ for ARGUMENT in "$@"; do done # Use preflight.sh to set up env based on platform -bash preflight.sh PLATFORM=$PLATFORM +bash src/dependencies/scripts/preflight.sh PLATFORM=$PLATFORM # Train export LIBTPU_INIT_ARGS="--xla_tpu_megacore_fusion_allow_ags=false --xla_enable_async_collective_permute=true --xla_tpu_enable_ag_backward_pipelining=true --xla_tpu_enable_data_parallel_all_reduce_opt=true --xla_tpu_data_parallel_opt_different_sized_ops=true --xla_tpu_enable_async_collective_fusion=true --xla_tpu_enable_async_collective_fusion_multiple_steps=true --xla_tpu_overlap_compute_collective_tc=true --xla_enable_async_all_gather=true" diff --git a/src/maxtext/configs/experimental/32b.sh b/src/maxtext/configs/experimental/32b.sh index c474f62b95..a7126629d4 100644 --- a/src/maxtext/configs/experimental/32b.sh +++ b/src/maxtext/configs/experimental/32b.sh @@ -15,7 +15,7 @@ for ARGUMENT in "$@"; do done # Use preflight.sh to set up env based on platform -bash preflight.sh PLATFORM=$PLATFORM +bash src/dependencies/scripts/preflight.sh PLATFORM=$PLATFORM # Train export LIBTPU_INIT_ARGS="--xla_tpu_megacore_fusion_allow_ags=false --xla_enable_async_collective_permute=true --xla_tpu_enable_ag_backward_pipelining=true --xla_tpu_enable_data_parallel_all_reduce_opt=true --xla_tpu_data_parallel_opt_different_sized_ops=true --xla_tpu_enable_async_collective_fusion=true --xla_tpu_enable_async_collective_fusion_multiple_steps=true --xla_tpu_overlap_compute_collective_tc=true --xla_enable_async_all_gather=true" diff --git a/src/maxtext/configs/experimental/512b.sh b/src/maxtext/configs/experimental/512b.sh index 419496cb70..7cd4c36ee2 100644 --- a/src/maxtext/configs/experimental/512b.sh +++ b/src/maxtext/configs/experimental/512b.sh @@ -15,7 +15,7 @@ for ARGUMENT in "$@"; do done # Use preflight.sh to set up env based on platform -bash preflight.sh PLATFORM=$PLATFORM +bash src/dependencies/scripts/preflight.sh PLATFORM=$PLATFORM # Train export LIBTPU_INIT_ARGS="--xla_tpu_megacore_fusion_allow_ags=false --xla_enable_async_collective_permute=true --xla_tpu_enable_ag_backward_pipelining=true --xla_tpu_enable_data_parallel_all_reduce_opt=true --xla_tpu_data_parallel_opt_different_sized_ops=true --xla_tpu_enable_async_collective_fusion=true --xla_tpu_enable_async_collective_fusion_multiple_steps=true --xla_tpu_overlap_compute_collective_tc=true --xla_enable_async_all_gather=true" diff --git a/src/maxtext/configs/experimental/64b.sh b/src/maxtext/configs/experimental/64b.sh index 0173532ce5..9ff7d1cd07 100644 --- a/src/maxtext/configs/experimental/64b.sh +++ b/src/maxtext/configs/experimental/64b.sh @@ -15,7 +15,7 @@ for ARGUMENT in "$@"; do done # Use preflight.sh to set up env based on platform -bash preflight.sh PLATFORM=$PLATFORM +bash src/dependencies/scripts/preflight.sh PLATFORM=$PLATFORM # Train export LIBTPU_INIT_ARGS="--xla_tpu_megacore_fusion_allow_ags=false --xla_enable_async_collective_permute=true --xla_tpu_enable_ag_backward_pipelining=true --xla_tpu_enable_data_parallel_all_reduce_opt=true --xla_tpu_data_parallel_opt_different_sized_ops=true --xla_tpu_enable_async_collective_fusion=true --xla_tpu_enable_async_collective_fusion_multiple_steps=true --xla_tpu_overlap_compute_collective_tc=true --xla_enable_async_all_gather=true" diff --git a/src/maxtext/configs/tpu/v4/22b.sh b/src/maxtext/configs/tpu/v4/22b.sh index 76b845f951..549534882d 100644 --- a/src/maxtext/configs/tpu/v4/22b.sh +++ b/src/maxtext/configs/tpu/v4/22b.sh @@ -50,7 +50,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v4/52b.sh b/src/maxtext/configs/tpu/v4/52b.sh index f36ebf0771..a2efe85c08 100644 --- a/src/maxtext/configs/tpu/v4/52b.sh +++ b/src/maxtext/configs/tpu/v4/52b.sh @@ -50,7 +50,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v5e/128b.sh b/src/maxtext/configs/tpu/v5e/128b.sh index 5eb5ff8516..069abf376e 100644 --- a/src/maxtext/configs/tpu/v5e/128b.sh +++ b/src/maxtext/configs/tpu/v5e/128b.sh @@ -36,7 +36,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v5e/16b.sh b/src/maxtext/configs/tpu/v5e/16b.sh index ac29c2046e..4c45661146 100644 --- a/src/maxtext/configs/tpu/v5e/16b.sh +++ b/src/maxtext/configs/tpu/v5e/16b.sh @@ -36,7 +36,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v5e/32b.sh b/src/maxtext/configs/tpu/v5e/32b.sh index bee8ea77b9..2e72b5da2e 100644 --- a/src/maxtext/configs/tpu/v5e/32b.sh +++ b/src/maxtext/configs/tpu/v5e/32b.sh @@ -36,7 +36,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v5e/64b.sh b/src/maxtext/configs/tpu/v5e/64b.sh index 80ef4b28df..f28eeb6394 100644 --- a/src/maxtext/configs/tpu/v5e/64b.sh +++ b/src/maxtext/configs/tpu/v5e/64b.sh @@ -36,7 +36,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v5e/gpt3_175b.sh b/src/maxtext/configs/tpu/v5e/gpt3_175b.sh index 235eb77c67..d71f9b1a78 100644 --- a/src/maxtext/configs/tpu/v5e/gpt3_175b.sh +++ b/src/maxtext/configs/tpu/v5e/gpt3_175b.sh @@ -35,7 +35,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v5e/llama2_13b.sh b/src/maxtext/configs/tpu/v5e/llama2_13b.sh index 67bb6dbcd0..8dca05f36f 100644 --- a/src/maxtext/configs/tpu/v5e/llama2_13b.sh +++ b/src/maxtext/configs/tpu/v5e/llama2_13b.sh @@ -35,7 +35,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v5e/llama2_70b.sh b/src/maxtext/configs/tpu/v5e/llama2_70b.sh index af2eddb73b..528970ccc2 100644 --- a/src/maxtext/configs/tpu/v5e/llama2_70b.sh +++ b/src/maxtext/configs/tpu/v5e/llama2_70b.sh @@ -35,7 +35,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v5e/llama2_7b.sh b/src/maxtext/configs/tpu/v5e/llama2_7b.sh index 1c1420993a..6b68297a1e 100644 --- a/src/maxtext/configs/tpu/v5e/llama2_7b.sh +++ b/src/maxtext/configs/tpu/v5e/llama2_7b.sh @@ -35,7 +35,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v5p/1024b.sh b/src/maxtext/configs/tpu/v5p/1024b.sh index 7b4f281d21..14bdcc6692 100644 --- a/src/maxtext/configs/tpu/v5p/1024b.sh +++ b/src/maxtext/configs/tpu/v5p/1024b.sh @@ -36,7 +36,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v5p/128b.sh b/src/maxtext/configs/tpu/v5p/128b.sh index 9ed9a7ed22..ce068f7fde 100644 --- a/src/maxtext/configs/tpu/v5p/128b.sh +++ b/src/maxtext/configs/tpu/v5p/128b.sh @@ -36,7 +36,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v5p/256b.sh b/src/maxtext/configs/tpu/v5p/256b.sh index 559f31d433..04e16ca0cc 100644 --- a/src/maxtext/configs/tpu/v5p/256b.sh +++ b/src/maxtext/configs/tpu/v5p/256b.sh @@ -37,7 +37,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v5p/32b.sh b/src/maxtext/configs/tpu/v5p/32b.sh index 676dadf989..813f05d3a8 100644 --- a/src/maxtext/configs/tpu/v5p/32b.sh +++ b/src/maxtext/configs/tpu/v5p/32b.sh @@ -36,7 +36,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v5p/512b.sh b/src/maxtext/configs/tpu/v5p/512b.sh index af1c13c236..293c0b65eb 100644 --- a/src/maxtext/configs/tpu/v5p/512b.sh +++ b/src/maxtext/configs/tpu/v5p/512b.sh @@ -37,7 +37,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v5p/64b.sh b/src/maxtext/configs/tpu/v5p/64b.sh index 80a86e07b2..ec44e130f3 100644 --- a/src/maxtext/configs/tpu/v5p/64b.sh +++ b/src/maxtext/configs/tpu/v5p/64b.sh @@ -36,7 +36,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v5p/gpt3_175b/gpt3_175b_base.sh b/src/maxtext/configs/tpu/v5p/gpt3_175b/gpt3_175b_base.sh index 37a73672cf..0fc5a07429 100644 --- a/src/maxtext/configs/tpu/v5p/gpt3_175b/gpt3_175b_base.sh +++ b/src/maxtext/configs/tpu/v5p/gpt3_175b/gpt3_175b_base.sh @@ -12,7 +12,7 @@ set -euox pipefail -bash preflight.sh PLATFORM=gke +bash src/dependencies/scripts/preflight.sh PLATFORM=gke # flags set as default diff --git a/src/maxtext/configs/tpu/v5p/llama2_70b.sh b/src/maxtext/configs/tpu/v5p/llama2_70b.sh index ea4fa49216..b0e1836ea1 100644 --- a/src/maxtext/configs/tpu/v5p/llama2_70b.sh +++ b/src/maxtext/configs/tpu/v5p/llama2_70b.sh @@ -38,7 +38,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v5p/llama2_7b.sh b/src/maxtext/configs/tpu/v5p/llama2_7b.sh index 25dbc4e613..08683ace65 100644 --- a/src/maxtext/configs/tpu/v5p/llama2_7b.sh +++ b/src/maxtext/configs/tpu/v5p/llama2_7b.sh @@ -39,7 +39,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v6e/gemma2_27b.sh b/src/maxtext/configs/tpu/v6e/gemma2_27b.sh index 3739545d7e..4bbc58643e 100644 --- a/src/maxtext/configs/tpu/v6e/gemma2_27b.sh +++ b/src/maxtext/configs/tpu/v6e/gemma2_27b.sh @@ -33,7 +33,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v6e/gemma2_9b.sh b/src/maxtext/configs/tpu/v6e/gemma2_9b.sh index b9bb024195..2a7deb01aa 100644 --- a/src/maxtext/configs/tpu/v6e/gemma2_9b.sh +++ b/src/maxtext/configs/tpu/v6e/gemma2_9b.sh @@ -33,7 +33,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v6e/gemma3_27b.sh b/src/maxtext/configs/tpu/v6e/gemma3_27b.sh index 13bab0e273..7441dc358a 100644 --- a/src/maxtext/configs/tpu/v6e/gemma3_27b.sh +++ b/src/maxtext/configs/tpu/v6e/gemma3_27b.sh @@ -33,7 +33,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v6e/gpt3_175b.sh b/src/maxtext/configs/tpu/v6e/gpt3_175b.sh index 27458b3117..79ca9d30fc 100644 --- a/src/maxtext/configs/tpu/v6e/gpt3_175b.sh +++ b/src/maxtext/configs/tpu/v6e/gpt3_175b.sh @@ -33,7 +33,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v6e/llama2_7b_4096.sh b/src/maxtext/configs/tpu/v6e/llama2_7b_4096.sh index 3ff6bfa90b..4e08e3d76f 100644 --- a/src/maxtext/configs/tpu/v6e/llama2_7b_4096.sh +++ b/src/maxtext/configs/tpu/v6e/llama2_7b_4096.sh @@ -33,7 +33,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train diff --git a/src/maxtext/configs/tpu/v6e/mixtral_8x7b.sh b/src/maxtext/configs/tpu/v6e/mixtral_8x7b.sh index 5648a647ac..2eac06d68c 100644 --- a/src/maxtext/configs/tpu/v6e/mixtral_8x7b.sh +++ b/src/maxtext/configs/tpu/v6e/mixtral_8x7b.sh @@ -33,7 +33,7 @@ fi # Set up network optimizations if [ "$RUN_PREFLIGHT" = "true" ]; then - bash preflight.sh + bash src/dependencies/scripts/preflight.sh fi # Train