From 9072c90a04d58560d244d090a702e45adbdd0694 Mon Sep 17 00:00:00 2001 From: tastelikefeet Date: Sun, 5 Apr 2026 20:09:13 +0800 Subject: [PATCH 01/11] bump version to 0.2.0 --- pyproject.toml | 2 +- src/twinkle/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 584099cb..ce392b26 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "twinkle-kit" -version = "0.2.dev0" +version = "0.2.0" description = "Training API for large language models with efficient data handling and advanced optimization techniques." readme = "README.md" authors = [{ name = "ModelScope", email = "contact@modelscope.cn" }] diff --git a/src/twinkle/version.py b/src/twinkle/version.py index 05103d1d..08a7c147 100644 --- a/src/twinkle/version.py +++ b/src/twinkle/version.py @@ -1,5 +1,5 @@ # Make sure to modify __release_datetime__ to release time when making official release. -__version__ = '0.2.dev0' +__version__ = '0.2.0' # default release datetime for branches under active development is set # to be a time far-far-away-into-the-future __release_datetime__ = '2099-10-13 08:56:12' From 967093a26a252738eae339346916dfc6c1a0ad9f Mon Sep 17 00:00:00 2001 From: tastelikefeet Date: Sun, 5 Apr 2026 21:11:54 +0800 Subject: [PATCH 02/11] fix --- Dockerfile | 10 +++++----- INSTALL_MEGATRON.sh | 5 ----- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/Dockerfile b/Dockerfile index 97c35113..d01937f5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,15 +6,15 @@ RUN curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.s rm Miniconda3-latest-Linux-x86_64.sh ENV PATH="/opt/conda/bin:${PATH}" RUN conda create -n twinkle python=3.12 -y --override-channels -c conda-forge -SHELL ["conda", "run", "-n", "twinkle", "/bin/bash", "-c"] +ENV PATH="/opt/conda/envs/twinkle/bin:${PATH}" # Clone and install twinkle, checkout to latest v-tag RUN git clone https://github.com/modelscope/twinkle.git WORKDIR /twinkle -RUN echo "Available v-tags:" && git tag -l 'v*' --sort=-v:refname && \ - LATEST_TAG=$(git tag -l 'v*' --sort=-v:refname | head -n 1) && \ - echo "Checking out: $LATEST_TAG" && \ - git checkout "$LATEST_TAG" +RUN echo "Available release branches:" && git branch -r -l 'origin/release/*' --sort=-v:refname && \ + LATEST_RELEASE=$(git branch -r -l 'origin/release/*' --sort=-v:refname | head -n 1 | tr -d ' ') && \ + echo "Checking out: $LATEST_RELEASE" && \ + git checkout --track "$LATEST_RELEASE" RUN sh INSTALL_MEGATRON.sh diff --git a/INSTALL_MEGATRON.sh b/INSTALL_MEGATRON.sh index 775f7588..cc7298d2 100644 --- a/INSTALL_MEGATRON.sh +++ b/INSTALL_MEGATRON.sh @@ -87,11 +87,6 @@ pip install flash-linear-attention -U echo "" echo "Installing numpy==2.2 and deep_gemm..." pip install numpy==2.2 -pip uninstall deep_gemm -y -cd /tmp -git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git -cd DeepGEMM -pip install . --no-build-isolation # Verify installation echo "" From 68aa05b6db9277d9b850c3e4218d42ddcb728b78 Mon Sep 17 00:00:00 2001 From: tastelikefeet Date: Sun, 5 Apr 2026 21:14:20 +0800 Subject: [PATCH 03/11] fix --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index d01937f5..ca6d39ed 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,7 @@ FROM modelscope-registry.cn-hangzhou.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.8.1-py311-torch2.9.1-1.35.0 +RUN echo '47.110.159.78 github.com' >> /etc/hosts + # Install miniconda with Python 3.12 RUN curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda && \ From 1eeb423ad2b426b91b8e5599db8c66cbcd95a3fb Mon Sep 17 00:00:00 2001 From: tastelikefeet Date: Sun, 5 Apr 2026 21:58:37 +0800 Subject: [PATCH 04/11] fix dockerfile --- Dockerfile | 5 ++--- INSTALL_MEGATRON.sh | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index ca6d39ed..8bae7c6a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,5 @@ FROM modelscope-registry.cn-hangzhou.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.8.1-py311-torch2.9.1-1.35.0 -RUN echo '47.110.159.78 github.com' >> /etc/hosts - # Install miniconda with Python 3.12 RUN curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda && \ @@ -24,4 +22,5 @@ RUN pip install --no-cache-dir tinker==0.14.0 "ray[serve]" transformers peft acc RUN pip install -e . --no-build-isolation -CMD ["bash", "cookbook/client/server/megatron/run.sh"] +ENV TWINKLE_WORKDIR=/data +CMD ["bash", "-c", "mkdir -p $TWINKLE_WORKDIR && cd $TWINKLE_WORKDIR && bash /twinkle/cookbook/client/server/megatron/run.sh 2>&1 | tee $TWINKLE_WORKDIR/run.log"] diff --git a/INSTALL_MEGATRON.sh b/INSTALL_MEGATRON.sh index cc7298d2..fb23524b 100644 --- a/INSTALL_MEGATRON.sh +++ b/INSTALL_MEGATRON.sh @@ -95,7 +95,7 @@ echo "" python -c " import pkg_resources -packages = ['peft', 'accelerate', 'transformers', 'modelscope', 'oss2', 'vllm', 'transformer_engine', 'megatron_core', 'flash_attn', 'numpy'] +packages = ['peft', 'accelerate', 'transformers', 'modelscope', 'vllm', 'transformer_engine', 'megatron_core', 'flash_attn', 'numpy'] print('Installed package versions:') print('-' * 40) From b4c5fbf8bf971b8ff735d53522e231ff2b650957 Mon Sep 17 00:00:00 2001 From: tastelikefeet Date: Sun, 5 Apr 2026 22:58:15 +0800 Subject: [PATCH 05/11] remove oss2 --- INSTALL_MEGATRON.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/INSTALL_MEGATRON.sh b/INSTALL_MEGATRON.sh index fb23524b..dd6266cc 100644 --- a/INSTALL_MEGATRON.sh +++ b/INSTALL_MEGATRON.sh @@ -55,8 +55,8 @@ echo "Using CUDA architecture: $TORCH_CUDA_ARCH_LIST" # Install latest base packages echo "" -echo "Installing peft, accelerate, transformers, modelscope, oss2..." -pip install --upgrade peft accelerate transformers "modelscope[framework]" oss2 +echo "Installing peft, accelerate, transformers, modelscope..." +pip install --upgrade peft accelerate transformers "modelscope[framework]" # Install latest vllm echo "" From fe6e8663012c31150125d131a25af4e4fd76a15f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9B=A8=E6=B3=93?= Date: Mon, 6 Apr 2026 10:52:46 +0800 Subject: [PATCH 06/11] fix install --- INSTALL_MEGATRON.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/INSTALL_MEGATRON.sh b/INSTALL_MEGATRON.sh index dd6266cc..e2d1db4b 100644 --- a/INSTALL_MEGATRON.sh +++ b/INSTALL_MEGATRON.sh @@ -4,7 +4,7 @@ # which always occur error set -e # Exit immediately on error - +export SETUPTOOLS_USE_DISTUTILS=local echo "==========================================" echo "Starting deep learning dependencies installation..." echo "==========================================" From b2d1466d20e277f1a49cf52d6c92e55010ee1eed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9B=A8=E6=B3=93?= Date: Mon, 6 Apr 2026 11:31:39 +0800 Subject: [PATCH 07/11] fix server config --- cookbook/client/server/megatron/server_config.yaml | 4 ++-- pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cookbook/client/server/megatron/server_config.yaml b/cookbook/client/server/megatron/server_config.yaml index 21d8a16b..0f66dd57 100644 --- a/cookbook/client/server/megatron/server_config.yaml +++ b/cookbook/client/server/megatron/server_config.yaml @@ -42,7 +42,7 @@ applications: import_path: sampler args: model_id: "ms://Qwen/Qwen3.5-27B" # ModelScope model identifier - nproc_per_node: 8 # Number of GPU processes per node + nproc_per_node: 4 # Number of GPU processes per node sampler_type: vllm # Inference engine: 'vllm' (fast) or 'torch' (TorchSampler) engine_args: # vLLM engine-specific settings max_model_len: 32000 # Maximum sequence length the engine supports @@ -84,7 +84,7 @@ applications: model_id: "ms://Qwen/Qwen3.5-27B" # ModelScope model identifier max_length: 32000 # model max length max_loras: 5 # model max loras - nproc_per_node: 8 # Number of GPU processes per node + nproc_per_node: 4 # Number of GPU processes per node device_group: name: model ranks: 4 # GPU rank indices diff --git a/pyproject.toml b/pyproject.toml index ce392b26..f3880b2f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ dependencies = [ "datasets>=3.0,<4.0", "omegaconf>=2.3.0,<3.0.0", "fastapi", - "modelscope[framework]>=1.34.0", + "modelscope[framework]>=1.35.0", "safetensors", "peft>=0.11.0,<=0.19.0", "transformers", From 1c20c5eacb0b2333715afbf4d16e5887c66eb000 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9B=A8=E6=B3=93?= Date: Mon, 6 Apr 2026 11:47:58 +0800 Subject: [PATCH 08/11] fix trust_remote_code --- Dockerfile | 3 --- cookbook/client/server/megatron/run.sh | 2 +- cookbook/client/server/megatron/server.py | 2 +- src/twinkle/dataset/base.py | 6 ++++++ src/twinkle/hub/hub.py | 4 +++- 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8bae7c6a..a3aa4ff6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,6 +21,3 @@ RUN sh INSTALL_MEGATRON.sh RUN pip install --no-cache-dir tinker==0.14.0 "ray[serve]" transformers peft accelerate -U RUN pip install -e . --no-build-isolation - -ENV TWINKLE_WORKDIR=/data -CMD ["bash", "-c", "mkdir -p $TWINKLE_WORKDIR && cd $TWINKLE_WORKDIR && bash /twinkle/cookbook/client/server/megatron/run.sh 2>&1 | tee $TWINKLE_WORKDIR/run.log"] diff --git a/cookbook/client/server/megatron/run.sh b/cookbook/client/server/megatron/run.sh index 38befef2..c7db36d1 100644 --- a/cookbook/client/server/megatron/run.sh +++ b/cookbook/client/server/megatron/run.sh @@ -3,4 +3,4 @@ export RAY_ROTATION_BACKUP_COUNT=1 CUDA_VISIBLE_DEVICES=0,1,2,3 ray start --head --port=6379 --num-gpus=4 --disable-usage-stats --include-dashboard=false CUDA_VISIBLE_DEVICES=4,5,6,7 ray start --address=127.0.0.1:6379 --num-gpus=4 CUDA_VISIBLE_DEVICES="" ray start --address=127.0.0.1:6379 --num-gpus=0 -python server.py +python "$(dirname "$0")/server.py" diff --git a/cookbook/client/server/megatron/server.py b/cookbook/client/server/megatron/server.py index e38f43a4..d6cb87c5 100644 --- a/cookbook/client/server/megatron/server.py +++ b/cookbook/client/server/megatron/server.py @@ -9,7 +9,7 @@ import os # Enable Ray debug mode for verbose logging during development -os.environ['TWINKLE_TRUST_REMOTE_CODE'] = '1' +os.environ['TWINKLE_TRUST_REMOTE_CODE'] = '0' from twinkle.server import launch_server diff --git a/src/twinkle/dataset/base.py b/src/twinkle/dataset/base.py index 8ffe5cea..98bb9c8f 100644 --- a/src/twinkle/dataset/base.py +++ b/src/twinkle/dataset/base.py @@ -51,6 +51,9 @@ class Dataset(TorchDataset): """ def __init__(self, dataset_meta: DatasetMeta, **kwargs): + trust_remote_code = bool(os.environ.get('TWINKLE_TRUST_REMOTE_CODE', '1')) + if not trust_remote_code: + kwargs['trust_remote_code'] = False dataset = self._load_dataset(dataset_meta, **kwargs) self.datasets = {dataset_meta.get_id(): dataset} self.dataset = dataset @@ -247,6 +250,9 @@ def add_dataset(self, dataset_meta: DatasetMeta, **kwargs): Args: dataset_meta: The dataset_meta information of the loaded dataset. """ + trust_remote_code = bool(os.environ.get('TWINKLE_TRUST_REMOTE_CODE', '1')) + if not trust_remote_code: + kwargs['trust_remote_code'] = False dataset = self._load_dataset(dataset_meta, **kwargs) self.datasets[dataset_meta.get_id()] = dataset diff --git a/src/twinkle/hub/hub.py b/src/twinkle/hub/hub.py index 916a42b2..15fc1ef5 100644 --- a/src/twinkle/hub/hub.py +++ b/src/twinkle/hub/hub.py @@ -401,7 +401,7 @@ def load_dataset(cls, cls.try_login(token) if revision is None or revision == 'main': revision = 'master' - load_kwargs = {'trust_remote_code': True} + load_kwargs = {'trust_remote_code': kwargs.get('trust_remote_code', True)} return MsDataset.load( dataset_id, subset_name=subset_name, @@ -595,6 +595,7 @@ def load_dataset(cls, from datasets import load_dataset if revision is None or revision == 'master': revision = 'main' + trust_remote_code = kwargs.get('trust_remote_code', True) return load_dataset( dataset_id, name=subset_name, @@ -602,6 +603,7 @@ def load_dataset(cls, streaming=streaming, revision=revision, download_mode=download_mode, + trust_remote_code=trust_remote_code, num_proc=num_proc) @classmethod From 4c7cdfa133090d4a009952610d0d635058e713f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9B=A8=E6=B3=93?= Date: Mon, 6 Apr 2026 11:49:42 +0800 Subject: [PATCH 09/11] fix install script --- INSTALL_MEGATRON.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/INSTALL_MEGATRON.sh b/INSTALL_MEGATRON.sh index e2d1db4b..e86e5478 100644 --- a/INSTALL_MEGATRON.sh +++ b/INSTALL_MEGATRON.sh @@ -71,7 +71,9 @@ echo "Site-packages path: $SITE_PACKAGES" CUDNN_PATH=$SITE_PACKAGES/nvidia/cudnn \ CPLUS_INCLUDE_PATH=$SITE_PACKAGES/nvidia/cudnn/include \ -pip install --no-build-isolation "transformer_engine[pytorch]" megatron_core --no-cache-dir +pip install --no-build-isolation "transformer_engine[pytorch]" --no-cache-dir + +pip install megatron_core mcore_bridge --no-cache-dir # Install flash-attention (force local build) echo "" From 4086a54945e3153f78c75ee9e54f27ca676984d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9B=A8=E6=B3=93?= Date: Mon, 6 Apr 2026 12:03:49 +0800 Subject: [PATCH 10/11] fix support models --- cookbook/client/tinker/modelscope/sample.py | 2 +- src/twinkle/server/gateway/server.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cookbook/client/tinker/modelscope/sample.py b/cookbook/client/tinker/modelscope/sample.py index 72bd9f24..40c9b327 100644 --- a/cookbook/client/tinker/modelscope/sample.py +++ b/cookbook/client/tinker/modelscope/sample.py @@ -45,7 +45,7 @@ ] ) -input_feature = template.encode(trajectory, add_generation_prompt=True) +input_feature = template.batch_encode([trajectory], add_generation_prompt=True)[0] input_ids = input_feature['input_ids'].tolist() diff --git a/src/twinkle/server/gateway/server.py b/src/twinkle/server/gateway/server.py index cd942e61..79a90349 100644 --- a/src/twinkle/server/gateway/server.py +++ b/src/twinkle/server/gateway/server.py @@ -36,7 +36,7 @@ def __init__(self, self.http_options = http_options or {} self.proxy = ServiceProxy(http_options=http_options, route_prefix=self.route_prefix) self.supported_models = self._normalize_models(supported_models) or [ - types.SupportedModel(model_name='Qwen/Qwen3.5-4B'), + types.SupportedModel(model_name='Qwen/Qwen3.5-27B'), ] self._modelscope_config_lock = asyncio.Lock() From b55ba2c82cb2da7ae2d7a89ca46ee036e5715a83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9B=A8=E6=B3=93?= Date: Mon, 6 Apr 2026 12:50:02 +0800 Subject: [PATCH 11/11] fix torchrun --- src/twinkle/processor/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/twinkle/processor/base.py b/src/twinkle/processor/base.py index da6bb3d5..3269a574 100644 --- a/src/twinkle/processor/base.py +++ b/src/twinkle/processor/base.py @@ -97,7 +97,8 @@ def to_tensor(_input): # so tensor ops like labels != ignore_index or .to(device) would fail without this. if isinstance(value, np.ndarray): value = torch.from_numpy(value) - elif isinstance(value, list) and isinstance(value[0], (int, float, np.number)): + elif (isinstance(value, list) and isinstance(value[0], + (int, float, np.number))) or key == 'position_ids': value = torch.tensor(value) elif key in self.VLM_CONCAT_FIELDS: if not isinstance(value[0], torch.Tensor):