From 9072c90a04d58560d244d090a702e45adbdd0694 Mon Sep 17 00:00:00 2001
From: tastelikefeet <yuze.zyz@alibaba-inc.com>
Date: Sun, 5 Apr 2026 20:09:13 +0800
Subject: [PATCH 01/11] bump version to 0.2.0

---
 pyproject.toml         | 2 +-
 src/twinkle/version.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 584099cb..ce392b26 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "twinkle-kit"
-version = "0.2.dev0"
+version = "0.2.0"
 description = "Training API for large language models with efficient data handling and advanced optimization techniques."
 readme = "README.md"
 authors = [{ name = "ModelScope", email = "contact@modelscope.cn" }]
diff --git a/src/twinkle/version.py b/src/twinkle/version.py
index 05103d1d..08a7c147 100644
--- a/src/twinkle/version.py
+++ b/src/twinkle/version.py
@@ -1,5 +1,5 @@
 # Make sure to modify __release_datetime__ to release time when making official release.
-__version__ = '0.2.dev0'
+__version__ = '0.2.0'
 # default release datetime for branches under active development is set
 # to be a time far-far-away-into-the-future
 __release_datetime__ = '2099-10-13 08:56:12'

From 967093a26a252738eae339346916dfc6c1a0ad9f Mon Sep 17 00:00:00 2001
From: tastelikefeet <yuze.zyz@alibaba-inc.com>
Date: Sun, 5 Apr 2026 21:11:54 +0800
Subject: [PATCH 02/11] fix

---
 Dockerfile          | 10 +++++-----
 INSTALL_MEGATRON.sh |  5 -----
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 97c35113..d01937f5 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,15 +6,15 @@ RUN curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.s
     rm Miniconda3-latest-Linux-x86_64.sh
 ENV PATH="/opt/conda/bin:${PATH}"
 RUN conda create -n twinkle python=3.12 -y --override-channels -c conda-forge
-SHELL ["conda", "run", "-n", "twinkle", "/bin/bash", "-c"]
+ENV PATH="/opt/conda/envs/twinkle/bin:${PATH}"
 
 # Clone and install twinkle, checkout to latest v-tag
 RUN git clone https://github.com/modelscope/twinkle.git
 WORKDIR /twinkle
-RUN echo "Available v-tags:" && git tag -l 'v*' --sort=-v:refname && \
-    LATEST_TAG=$(git tag -l 'v*' --sort=-v:refname | head -n 1) && \
-    echo "Checking out: $LATEST_TAG" && \
-    git checkout "$LATEST_TAG"
+RUN echo "Available release branches:" && git branch -r -l 'origin/release/*' --sort=-v:refname && \
+    LATEST_RELEASE=$(git branch -r -l 'origin/release/*' --sort=-v:refname | head -n 1 | tr -d ' ') && \
+    echo "Checking out: $LATEST_RELEASE" && \
+    git checkout --track "$LATEST_RELEASE"
 
 RUN sh INSTALL_MEGATRON.sh
 
diff --git a/INSTALL_MEGATRON.sh b/INSTALL_MEGATRON.sh
index 775f7588..cc7298d2 100644
--- a/INSTALL_MEGATRON.sh
+++ b/INSTALL_MEGATRON.sh
@@ -87,11 +87,6 @@ pip install flash-linear-attention -U
 echo ""
 echo "Installing numpy==2.2 and deep_gemm..."
 pip install numpy==2.2
-pip uninstall deep_gemm -y
-cd /tmp
-git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git
-cd DeepGEMM
-pip install . --no-build-isolation
 
 # Verify installation
 echo ""

From 68aa05b6db9277d9b850c3e4218d42ddcb728b78 Mon Sep 17 00:00:00 2001
From: tastelikefeet <yuze.zyz@alibaba-inc.com>
Date: Sun, 5 Apr 2026 21:14:20 +0800
Subject: [PATCH 03/11] fix

---
 Dockerfile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index d01937f5..ca6d39ed 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,7 @@
 FROM modelscope-registry.cn-hangzhou.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.8.1-py311-torch2.9.1-1.35.0
 
+RUN echo '47.110.159.78 github.com' >> /etc/hosts
+
 # Install miniconda with Python 3.12
 RUN curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
     bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda && \

From 1eeb423ad2b426b91b8e5599db8c66cbcd95a3fb Mon Sep 17 00:00:00 2001
From: tastelikefeet <yuze.zyz@alibaba-inc.com>
Date: Sun, 5 Apr 2026 21:58:37 +0800
Subject: [PATCH 04/11] fix dockerfile

---
 Dockerfile          | 5 ++---
 INSTALL_MEGATRON.sh | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index ca6d39ed..8bae7c6a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,7 +1,5 @@
 FROM modelscope-registry.cn-hangzhou.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.8.1-py311-torch2.9.1-1.35.0
 
-RUN echo '47.110.159.78 github.com' >> /etc/hosts
-
 # Install miniconda with Python 3.12
 RUN curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
     bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda && \
@@ -24,4 +22,5 @@ RUN pip install --no-cache-dir tinker==0.14.0 "ray[serve]" transformers peft acc
 
 RUN pip install -e . --no-build-isolation
 
-CMD ["bash", "cookbook/client/server/megatron/run.sh"]
+ENV TWINKLE_WORKDIR=/data
+CMD ["bash", "-c", "mkdir -p $TWINKLE_WORKDIR && cd $TWINKLE_WORKDIR && bash /twinkle/cookbook/client/server/megatron/run.sh 2>&1 | tee $TWINKLE_WORKDIR/run.log"]
diff --git a/INSTALL_MEGATRON.sh b/INSTALL_MEGATRON.sh
index cc7298d2..fb23524b 100644
--- a/INSTALL_MEGATRON.sh
+++ b/INSTALL_MEGATRON.sh
@@ -95,7 +95,7 @@ echo ""
 python -c "
 import pkg_resources
 
-packages = ['peft', 'accelerate', 'transformers', 'modelscope', 'oss2', 'vllm', 'transformer_engine', 'megatron_core', 'flash_attn', 'numpy']
+packages = ['peft', 'accelerate', 'transformers', 'modelscope', 'vllm', 'transformer_engine', 'megatron_core', 'flash_attn', 'numpy']
 
 print('Installed package versions:')
 print('-' * 40)

From b4c5fbf8bf971b8ff735d53522e231ff2b650957 Mon Sep 17 00:00:00 2001
From: tastelikefeet <yuze.zyz@alibaba-inc.com>
Date: Sun, 5 Apr 2026 22:58:15 +0800
Subject: [PATCH 05/11] remove oss2

---
 INSTALL_MEGATRON.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/INSTALL_MEGATRON.sh b/INSTALL_MEGATRON.sh
index fb23524b..dd6266cc 100644
--- a/INSTALL_MEGATRON.sh
+++ b/INSTALL_MEGATRON.sh
@@ -55,8 +55,8 @@ echo "Using CUDA architecture: $TORCH_CUDA_ARCH_LIST"
 
 # Install latest base packages
 echo ""
-echo "Installing peft, accelerate, transformers, modelscope, oss2..."
-pip install --upgrade peft accelerate transformers "modelscope[framework]" oss2
+echo "Installing peft, accelerate, transformers, modelscope..."
+pip install --upgrade peft accelerate transformers "modelscope[framework]"
 
 # Install latest vllm
 echo ""

From fe6e8663012c31150125d131a25af4e4fd76a15f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9B=A8=E6=B3=93?= <yuze.zyz@alibaba-inc.com>
Date: Mon, 6 Apr 2026 10:52:46 +0800
Subject: [PATCH 06/11] fix install

---
 INSTALL_MEGATRON.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/INSTALL_MEGATRON.sh b/INSTALL_MEGATRON.sh
index dd6266cc..e2d1db4b 100644
--- a/INSTALL_MEGATRON.sh
+++ b/INSTALL_MEGATRON.sh
@@ -4,7 +4,7 @@
 # which always occur error
 
 set -e  # Exit immediately on error
-
+export SETUPTOOLS_USE_DISTUTILS=local
 echo "=========================================="
 echo "Starting deep learning dependencies installation..."
 echo "=========================================="

From b2d1466d20e277f1a49cf52d6c92e55010ee1eed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9B=A8=E6=B3=93?= <yuze.zyz@alibaba-inc.com>
Date: Mon, 6 Apr 2026 11:31:39 +0800
Subject: [PATCH 07/11] fix server config

---
 cookbook/client/server/megatron/server_config.yaml | 4 ++--
 pyproject.toml                                     | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cookbook/client/server/megatron/server_config.yaml b/cookbook/client/server/megatron/server_config.yaml
index 21d8a16b..0f66dd57 100644
--- a/cookbook/client/server/megatron/server_config.yaml
+++ b/cookbook/client/server/megatron/server_config.yaml
@@ -42,7 +42,7 @@ applications:
     import_path: sampler
     args:
       model_id: "ms://Qwen/Qwen3.5-27B"  # ModelScope model identifier
-      nproc_per_node: 8               # Number of GPU processes per node
+      nproc_per_node: 4               # Number of GPU processes per node
       sampler_type: vllm              # Inference engine: 'vllm' (fast) or 'torch' (TorchSampler)
       engine_args:                    # vLLM engine-specific settings
         max_model_len: 32000           # Maximum sequence length the engine supports
@@ -84,7 +84,7 @@ applications:
       model_id: "ms://Qwen/Qwen3.5-27B" # ModelScope model identifier
       max_length: 32000                           # model max length
       max_loras: 5                                # model max loras
-      nproc_per_node: 8                           # Number of GPU processes per node
+      nproc_per_node: 4                           # Number of GPU processes per node
       device_group:
         name: model
         ranks: 4       # GPU rank indices
diff --git a/pyproject.toml b/pyproject.toml
index ce392b26..f3880b2f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,7 @@ dependencies = [
   "datasets>=3.0,<4.0",
   "omegaconf>=2.3.0,<3.0.0",
   "fastapi",
-  "modelscope[framework]>=1.34.0",
+  "modelscope[framework]>=1.35.0",
   "safetensors",
   "peft>=0.11.0,<=0.19.0",
   "transformers",

From 1c20c5eacb0b2333715afbf4d16e5887c66eb000 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9B=A8=E6=B3=93?= <yuze.zyz@alibaba-inc.com>
Date: Mon, 6 Apr 2026 11:47:58 +0800
Subject: [PATCH 08/11] fix trust_remote_code

---
 Dockerfile                                | 3 ---
 cookbook/client/server/megatron/run.sh    | 2 +-
 cookbook/client/server/megatron/server.py | 2 +-
 src/twinkle/dataset/base.py               | 6 ++++++
 src/twinkle/hub/hub.py                    | 4 +++-
 5 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 8bae7c6a..a3aa4ff6 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -21,6 +21,3 @@ RUN sh INSTALL_MEGATRON.sh
 RUN pip install --no-cache-dir tinker==0.14.0 "ray[serve]" transformers peft accelerate -U
 
 RUN pip install -e . --no-build-isolation
-
-ENV TWINKLE_WORKDIR=/data
-CMD ["bash", "-c", "mkdir -p $TWINKLE_WORKDIR && cd $TWINKLE_WORKDIR && bash /twinkle/cookbook/client/server/megatron/run.sh 2>&1 | tee $TWINKLE_WORKDIR/run.log"]
diff --git a/cookbook/client/server/megatron/run.sh b/cookbook/client/server/megatron/run.sh
index 38befef2..c7db36d1 100644
--- a/cookbook/client/server/megatron/run.sh
+++ b/cookbook/client/server/megatron/run.sh
@@ -3,4 +3,4 @@ export RAY_ROTATION_BACKUP_COUNT=1
 CUDA_VISIBLE_DEVICES=0,1,2,3 ray start --head --port=6379 --num-gpus=4 --disable-usage-stats --include-dashboard=false
 CUDA_VISIBLE_DEVICES=4,5,6,7 ray start --address=127.0.0.1:6379 --num-gpus=4
 CUDA_VISIBLE_DEVICES="" ray start --address=127.0.0.1:6379 --num-gpus=0
-python server.py
+python "$(dirname "$0")/server.py"
diff --git a/cookbook/client/server/megatron/server.py b/cookbook/client/server/megatron/server.py
index e38f43a4..d6cb87c5 100644
--- a/cookbook/client/server/megatron/server.py
+++ b/cookbook/client/server/megatron/server.py
@@ -9,7 +9,7 @@
 import os
 
 # Enable Ray debug mode for verbose logging during development
-os.environ['TWINKLE_TRUST_REMOTE_CODE'] = '1'
+os.environ['TWINKLE_TRUST_REMOTE_CODE'] = '0'
 
 from twinkle.server import launch_server
 
diff --git a/src/twinkle/dataset/base.py b/src/twinkle/dataset/base.py
index 8ffe5cea..98bb9c8f 100644
--- a/src/twinkle/dataset/base.py
+++ b/src/twinkle/dataset/base.py
@@ -51,6 +51,9 @@ class Dataset(TorchDataset):
     """
 
     def __init__(self, dataset_meta: DatasetMeta, **kwargs):
+        trust_remote_code = bool(os.environ.get('TWINKLE_TRUST_REMOTE_CODE', '1'))
+        if not trust_remote_code:
+            kwargs['trust_remote_code'] = False
         dataset = self._load_dataset(dataset_meta, **kwargs)
         self.datasets = {dataset_meta.get_id(): dataset}
         self.dataset = dataset
@@ -247,6 +250,9 @@ def add_dataset(self, dataset_meta: DatasetMeta, **kwargs):
         Args:
             dataset_meta: The dataset_meta information of the loaded dataset.
         """
+        trust_remote_code = bool(os.environ.get('TWINKLE_TRUST_REMOTE_CODE', '1'))
+        if not trust_remote_code:
+            kwargs['trust_remote_code'] = False
         dataset = self._load_dataset(dataset_meta, **kwargs)
         self.datasets[dataset_meta.get_id()] = dataset
 
diff --git a/src/twinkle/hub/hub.py b/src/twinkle/hub/hub.py
index 916a42b2..15fc1ef5 100644
--- a/src/twinkle/hub/hub.py
+++ b/src/twinkle/hub/hub.py
@@ -401,7 +401,7 @@ def load_dataset(cls,
         cls.try_login(token)
         if revision is None or revision == 'main':
             revision = 'master'
-        load_kwargs = {'trust_remote_code': True}
+        load_kwargs = {'trust_remote_code': kwargs.get('trust_remote_code', True)}
         return MsDataset.load(
             dataset_id,
             subset_name=subset_name,
@@ -595,6 +595,7 @@ def load_dataset(cls,
         from datasets import load_dataset
         if revision is None or revision == 'master':
             revision = 'main'
+        trust_remote_code = kwargs.get('trust_remote_code', True)
         return load_dataset(
             dataset_id,
             name=subset_name,
@@ -602,6 +603,7 @@ def load_dataset(cls,
             streaming=streaming,
             revision=revision,
             download_mode=download_mode,
+            trust_remote_code=trust_remote_code,
             num_proc=num_proc)
 
     @classmethod

From 4c7cdfa133090d4a009952610d0d635058e713f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9B=A8=E6=B3=93?= <yuze.zyz@alibaba-inc.com>
Date: Mon, 6 Apr 2026 11:49:42 +0800
Subject: [PATCH 09/11] fix install script

---
 INSTALL_MEGATRON.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/INSTALL_MEGATRON.sh b/INSTALL_MEGATRON.sh
index e2d1db4b..e86e5478 100644
--- a/INSTALL_MEGATRON.sh
+++ b/INSTALL_MEGATRON.sh
@@ -71,7 +71,9 @@ echo "Site-packages path: $SITE_PACKAGES"
 
 CUDNN_PATH=$SITE_PACKAGES/nvidia/cudnn \
 CPLUS_INCLUDE_PATH=$SITE_PACKAGES/nvidia/cudnn/include \
-pip install --no-build-isolation "transformer_engine[pytorch]" megatron_core --no-cache-dir
+pip install --no-build-isolation "transformer_engine[pytorch]" --no-cache-dir
+
+pip install megatron_core mcore_bridge --no-cache-dir
 
 # Install flash-attention (force local build)
 echo ""

From 4086a54945e3153f78c75ee9e54f27ca676984d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9B=A8=E6=B3=93?= <yuze.zyz@alibaba-inc.com>
Date: Mon, 6 Apr 2026 12:03:49 +0800
Subject: [PATCH 10/11] fix support models

---
 cookbook/client/tinker/modelscope/sample.py | 2 +-
 src/twinkle/server/gateway/server.py        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cookbook/client/tinker/modelscope/sample.py b/cookbook/client/tinker/modelscope/sample.py
index 72bd9f24..40c9b327 100644
--- a/cookbook/client/tinker/modelscope/sample.py
+++ b/cookbook/client/tinker/modelscope/sample.py
@@ -45,7 +45,7 @@
     ]
 )
 
-input_feature = template.encode(trajectory, add_generation_prompt=True)
+input_feature = template.batch_encode([trajectory], add_generation_prompt=True)[0]
 
 input_ids = input_feature['input_ids'].tolist()
 
diff --git a/src/twinkle/server/gateway/server.py b/src/twinkle/server/gateway/server.py
index cd942e61..79a90349 100644
--- a/src/twinkle/server/gateway/server.py
+++ b/src/twinkle/server/gateway/server.py
@@ -36,7 +36,7 @@ def __init__(self,
         self.http_options = http_options or {}
         self.proxy = ServiceProxy(http_options=http_options, route_prefix=self.route_prefix)
         self.supported_models = self._normalize_models(supported_models) or [
-            types.SupportedModel(model_name='Qwen/Qwen3.5-4B'),
+            types.SupportedModel(model_name='Qwen/Qwen3.5-27B'),
         ]
         self._modelscope_config_lock = asyncio.Lock()
 

From b55ba2c82cb2da7ae2d7a89ca46ee036e5715a83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9B=A8=E6=B3=93?= <yuze.zyz@alibaba-inc.com>
Date: Mon, 6 Apr 2026 12:50:02 +0800
Subject: [PATCH 11/11] fix torchrun

---
 src/twinkle/processor/base.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/twinkle/processor/base.py b/src/twinkle/processor/base.py
index da6bb3d5..3269a574 100644
--- a/src/twinkle/processor/base.py
+++ b/src/twinkle/processor/base.py
@@ -97,7 +97,8 @@ def to_tensor(_input):
                 # so tensor ops like labels != ignore_index or .to(device) would fail without this.
                 if isinstance(value, np.ndarray):
                     value = torch.from_numpy(value)
-                elif isinstance(value, list) and isinstance(value[0], (int, float, np.number)):
+                elif (isinstance(value, list) and isinstance(value[0],
+                                                             (int, float, np.number))) or key == 'position_ids':
                     value = torch.tensor(value)
                 elif key in self.VLM_CONCAT_FIELDS:
                     if not isinstance(value[0], torch.Tensor):