diff --git a/Dockerfile b/Dockerfile index 97c35113..a3aa4ff6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,20 +6,18 @@ RUN curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.s rm Miniconda3-latest-Linux-x86_64.sh ENV PATH="/opt/conda/bin:${PATH}" RUN conda create -n twinkle python=3.12 -y --override-channels -c conda-forge -SHELL ["conda", "run", "-n", "twinkle", "/bin/bash", "-c"] +ENV PATH="/opt/conda/envs/twinkle/bin:${PATH}" # Clone and install twinkle, checkout to latest v-tag RUN git clone https://github.com/modelscope/twinkle.git WORKDIR /twinkle -RUN echo "Available v-tags:" && git tag -l 'v*' --sort=-v:refname && \ - LATEST_TAG=$(git tag -l 'v*' --sort=-v:refname | head -n 1) && \ - echo "Checking out: $LATEST_TAG" && \ - git checkout "$LATEST_TAG" +RUN echo "Available release branches:" && git branch -r -l 'origin/release/*' --sort=-v:refname && \ + LATEST_RELEASE=$(git branch -r -l 'origin/release/*' --sort=-v:refname | head -n 1 | tr -d ' ') && \ + echo "Checking out: $LATEST_RELEASE" && \ + git checkout --track "$LATEST_RELEASE" RUN sh INSTALL_MEGATRON.sh RUN pip install --no-cache-dir tinker==0.14.0 "ray[serve]" transformers peft accelerate -U RUN pip install -e . --no-build-isolation - -CMD ["bash", "cookbook/client/server/megatron/run.sh"] diff --git a/INSTALL_MEGATRON.sh b/INSTALL_MEGATRON.sh index 775f7588..e86e5478 100644 --- a/INSTALL_MEGATRON.sh +++ b/INSTALL_MEGATRON.sh @@ -4,7 +4,7 @@ # which always occur error set -e # Exit immediately on error - +export SETUPTOOLS_USE_DISTUTILS=local echo "==========================================" echo "Starting deep learning dependencies installation..." echo "==========================================" @@ -55,8 +55,8 @@ echo "Using CUDA architecture: $TORCH_CUDA_ARCH_LIST" # Install latest base packages echo "" -echo "Installing peft, accelerate, transformers, modelscope, oss2..." -pip install --upgrade peft accelerate transformers "modelscope[framework]" oss2 +echo "Installing peft, accelerate, transformers, modelscope..." +pip install --upgrade peft accelerate transformers "modelscope[framework]" # Install latest vllm echo "" @@ -71,7 +71,9 @@ echo "Site-packages path: $SITE_PACKAGES" CUDNN_PATH=$SITE_PACKAGES/nvidia/cudnn \ CPLUS_INCLUDE_PATH=$SITE_PACKAGES/nvidia/cudnn/include \ -pip install --no-build-isolation "transformer_engine[pytorch]" megatron_core --no-cache-dir +pip install --no-build-isolation "transformer_engine[pytorch]" --no-cache-dir + +pip install megatron_core mcore_bridge --no-cache-dir # Install flash-attention (force local build) echo "" @@ -87,11 +89,6 @@ pip install flash-linear-attention -U echo "" echo "Installing numpy==2.2 and deep_gemm..." pip install numpy==2.2 -pip uninstall deep_gemm -y -cd /tmp -git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git -cd DeepGEMM -pip install . --no-build-isolation # Verify installation echo "" @@ -100,7 +97,7 @@ echo "" python -c " import pkg_resources -packages = ['peft', 'accelerate', 'transformers', 'modelscope', 'oss2', 'vllm', 'transformer_engine', 'megatron_core', 'flash_attn', 'numpy'] +packages = ['peft', 'accelerate', 'transformers', 'modelscope', 'vllm', 'transformer_engine', 'megatron_core', 'flash_attn', 'numpy'] print('Installed package versions:') print('-' * 40) diff --git a/cookbook/client/server/megatron/run.sh b/cookbook/client/server/megatron/run.sh index 38befef2..c7db36d1 100644 --- a/cookbook/client/server/megatron/run.sh +++ b/cookbook/client/server/megatron/run.sh @@ -3,4 +3,4 @@ export RAY_ROTATION_BACKUP_COUNT=1 CUDA_VISIBLE_DEVICES=0,1,2,3 ray start --head --port=6379 --num-gpus=4 --disable-usage-stats --include-dashboard=false CUDA_VISIBLE_DEVICES=4,5,6,7 ray start --address=127.0.0.1:6379 --num-gpus=4 CUDA_VISIBLE_DEVICES="" ray start --address=127.0.0.1:6379 --num-gpus=0 -python server.py +python "$(dirname "$0")/server.py" diff --git a/cookbook/client/server/megatron/server.py b/cookbook/client/server/megatron/server.py index e38f43a4..d6cb87c5 100644 --- a/cookbook/client/server/megatron/server.py +++ b/cookbook/client/server/megatron/server.py @@ -9,7 +9,7 @@ import os # Enable Ray debug mode for verbose logging during development -os.environ['TWINKLE_TRUST_REMOTE_CODE'] = '1' +os.environ['TWINKLE_TRUST_REMOTE_CODE'] = '0' from twinkle.server import launch_server diff --git a/cookbook/client/server/megatron/server_config.yaml b/cookbook/client/server/megatron/server_config.yaml index 21d8a16b..0f66dd57 100644 --- a/cookbook/client/server/megatron/server_config.yaml +++ b/cookbook/client/server/megatron/server_config.yaml @@ -42,7 +42,7 @@ applications: import_path: sampler args: model_id: "ms://Qwen/Qwen3.5-27B" # ModelScope model identifier - nproc_per_node: 8 # Number of GPU processes per node + nproc_per_node: 4 # Number of GPU processes per node sampler_type: vllm # Inference engine: 'vllm' (fast) or 'torch' (TorchSampler) engine_args: # vLLM engine-specific settings max_model_len: 32000 # Maximum sequence length the engine supports @@ -84,7 +84,7 @@ applications: model_id: "ms://Qwen/Qwen3.5-27B" # ModelScope model identifier max_length: 32000 # model max length max_loras: 5 # model max loras - nproc_per_node: 8 # Number of GPU processes per node + nproc_per_node: 4 # Number of GPU processes per node device_group: name: model ranks: 4 # GPU rank indices diff --git a/cookbook/client/tinker/modelscope/sample.py b/cookbook/client/tinker/modelscope/sample.py index 72bd9f24..40c9b327 100644 --- a/cookbook/client/tinker/modelscope/sample.py +++ b/cookbook/client/tinker/modelscope/sample.py @@ -45,7 +45,7 @@ ] ) -input_feature = template.encode(trajectory, add_generation_prompt=True) +input_feature = template.batch_encode([trajectory], add_generation_prompt=True)[0] input_ids = input_feature['input_ids'].tolist() diff --git a/pyproject.toml b/pyproject.toml index 5ac881e7..af96f65e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ dependencies = [ "datasets>=3.0,<4.0", "omegaconf>=2.3.0,<3.0.0", "fastapi", - "modelscope[framework]>=1.34.0", + "modelscope[framework]>=1.35.0", "safetensors", "peft>=0.11.0,<=0.19.0", "transformers", diff --git a/src/twinkle/dataset/base.py b/src/twinkle/dataset/base.py index 8ffe5cea..98bb9c8f 100644 --- a/src/twinkle/dataset/base.py +++ b/src/twinkle/dataset/base.py @@ -51,6 +51,9 @@ class Dataset(TorchDataset): """ def __init__(self, dataset_meta: DatasetMeta, **kwargs): + trust_remote_code = bool(os.environ.get('TWINKLE_TRUST_REMOTE_CODE', '1')) + if not trust_remote_code: + kwargs['trust_remote_code'] = False dataset = self._load_dataset(dataset_meta, **kwargs) self.datasets = {dataset_meta.get_id(): dataset} self.dataset = dataset @@ -247,6 +250,9 @@ def add_dataset(self, dataset_meta: DatasetMeta, **kwargs): Args: dataset_meta: The dataset_meta information of the loaded dataset. """ + trust_remote_code = bool(os.environ.get('TWINKLE_TRUST_REMOTE_CODE', '1')) + if not trust_remote_code: + kwargs['trust_remote_code'] = False dataset = self._load_dataset(dataset_meta, **kwargs) self.datasets[dataset_meta.get_id()] = dataset diff --git a/src/twinkle/hub/hub.py b/src/twinkle/hub/hub.py index 916a42b2..15fc1ef5 100644 --- a/src/twinkle/hub/hub.py +++ b/src/twinkle/hub/hub.py @@ -401,7 +401,7 @@ def load_dataset(cls, cls.try_login(token) if revision is None or revision == 'main': revision = 'master' - load_kwargs = {'trust_remote_code': True} + load_kwargs = {'trust_remote_code': kwargs.get('trust_remote_code', True)} return MsDataset.load( dataset_id, subset_name=subset_name, @@ -595,6 +595,7 @@ def load_dataset(cls, from datasets import load_dataset if revision is None or revision == 'master': revision = 'main' + trust_remote_code = kwargs.get('trust_remote_code', True) return load_dataset( dataset_id, name=subset_name, @@ -602,6 +603,7 @@ def load_dataset(cls, streaming=streaming, revision=revision, download_mode=download_mode, + trust_remote_code=trust_remote_code, num_proc=num_proc) @classmethod diff --git a/src/twinkle/processor/base.py b/src/twinkle/processor/base.py index da6bb3d5..3269a574 100644 --- a/src/twinkle/processor/base.py +++ b/src/twinkle/processor/base.py @@ -97,7 +97,8 @@ def to_tensor(_input): # so tensor ops like labels != ignore_index or .to(device) would fail without this. if isinstance(value, np.ndarray): value = torch.from_numpy(value) - elif isinstance(value, list) and isinstance(value[0], (int, float, np.number)): + elif (isinstance(value, list) and isinstance(value[0], + (int, float, np.number))) or key == 'position_ids': value = torch.tensor(value) elif key in self.VLM_CONCAT_FIELDS: if not isinstance(value[0], torch.Tensor): diff --git a/src/twinkle/server/gateway/server.py b/src/twinkle/server/gateway/server.py index cd942e61..79a90349 100644 --- a/src/twinkle/server/gateway/server.py +++ b/src/twinkle/server/gateway/server.py @@ -36,7 +36,7 @@ def __init__(self, self.http_options = http_options or {} self.proxy = ServiceProxy(http_options=http_options, route_prefix=self.route_prefix) self.supported_models = self._normalize_models(supported_models) or [ - types.SupportedModel(model_name='Qwen/Qwen3.5-4B'), + types.SupportedModel(model_name='Qwen/Qwen3.5-27B'), ] self._modelscope_config_lock = asyncio.Lock()