Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,18 @@ RUN curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.s
rm Miniconda3-latest-Linux-x86_64.sh
ENV PATH="/opt/conda/bin:${PATH}"
RUN conda create -n twinkle python=3.12 -y --override-channels -c conda-forge
SHELL ["conda", "run", "-n", "twinkle", "/bin/bash", "-c"]
ENV PATH="/opt/conda/envs/twinkle/bin:${PATH}"

# Clone and install twinkle, checkout to latest v-tag
RUN git clone https://github.com/modelscope/twinkle.git
WORKDIR /twinkle
RUN echo "Available v-tags:" && git tag -l 'v*' --sort=-v:refname && \
LATEST_TAG=$(git tag -l 'v*' --sort=-v:refname | head -n 1) && \
echo "Checking out: $LATEST_TAG" && \
git checkout "$LATEST_TAG"
RUN echo "Available release branches:" && git branch -r -l 'origin/release/*' --sort=-v:refname && \
LATEST_RELEASE=$(git branch -r -l 'origin/release/*' --sort=-v:refname | head -n 1 | tr -d ' ') && \
echo "Checking out: $LATEST_RELEASE" && \
git checkout --track "$LATEST_RELEASE"

RUN sh INSTALL_MEGATRON.sh

RUN pip install --no-cache-dir tinker==0.14.0 "ray[serve]" transformers peft accelerate -U

RUN pip install -e . --no-build-isolation

CMD ["bash", "cookbook/client/server/megatron/run.sh"]
17 changes: 7 additions & 10 deletions INSTALL_MEGATRON.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# which always occur error

set -e # Exit immediately on error

export SETUPTOOLS_USE_DISTUTILS=local
echo "=========================================="
echo "Starting deep learning dependencies installation..."
echo "=========================================="
Expand Down Expand Up @@ -55,8 +55,8 @@ echo "Using CUDA architecture: $TORCH_CUDA_ARCH_LIST"

# Install latest base packages
echo ""
echo "Installing peft, accelerate, transformers, modelscope, oss2..."
pip install --upgrade peft accelerate transformers "modelscope[framework]" oss2
echo "Installing peft, accelerate, transformers, modelscope..."
pip install --upgrade peft accelerate transformers "modelscope[framework]"

# Install latest vllm
echo ""
Expand All @@ -71,7 +71,9 @@ echo "Site-packages path: $SITE_PACKAGES"

CUDNN_PATH=$SITE_PACKAGES/nvidia/cudnn \
CPLUS_INCLUDE_PATH=$SITE_PACKAGES/nvidia/cudnn/include \
pip install --no-build-isolation "transformer_engine[pytorch]" megatron_core --no-cache-dir
pip install --no-build-isolation "transformer_engine[pytorch]" --no-cache-dir

pip install megatron_core mcore_bridge --no-cache-dir

# Install flash-attention (force local build)
echo ""
Expand All @@ -87,11 +89,6 @@ pip install flash-linear-attention -U
echo ""
echo "Installing numpy==2.2 and deep_gemm..."
pip install numpy==2.2
pip uninstall deep_gemm -y
cd /tmp
git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git
cd DeepGEMM
pip install . --no-build-isolation

# Verify installation
echo ""
Expand All @@ -100,7 +97,7 @@ echo ""
python -c "
import pkg_resources

packages = ['peft', 'accelerate', 'transformers', 'modelscope', 'oss2', 'vllm', 'transformer_engine', 'megatron_core', 'flash_attn', 'numpy']
packages = ['peft', 'accelerate', 'transformers', 'modelscope', 'vllm', 'transformer_engine', 'megatron_core', 'flash_attn', 'numpy']

print('Installed package versions:')
print('-' * 40)
Expand Down
2 changes: 1 addition & 1 deletion cookbook/client/server/megatron/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ export RAY_ROTATION_BACKUP_COUNT=1
CUDA_VISIBLE_DEVICES=0,1,2,3 ray start --head --port=6379 --num-gpus=4 --disable-usage-stats --include-dashboard=false
CUDA_VISIBLE_DEVICES=4,5,6,7 ray start --address=127.0.0.1:6379 --num-gpus=4
CUDA_VISIBLE_DEVICES="" ray start --address=127.0.0.1:6379 --num-gpus=0
python server.py
python "$(dirname "$0")/server.py"
2 changes: 1 addition & 1 deletion cookbook/client/server/megatron/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import os

# Enable Ray debug mode for verbose logging during development
os.environ['TWINKLE_TRUST_REMOTE_CODE'] = '1'
os.environ['TWINKLE_TRUST_REMOTE_CODE'] = '0'

from twinkle.server import launch_server

Expand Down
4 changes: 2 additions & 2 deletions cookbook/client/server/megatron/server_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ applications:
import_path: sampler
args:
model_id: "ms://Qwen/Qwen3.5-27B" # ModelScope model identifier
nproc_per_node: 8 # Number of GPU processes per node
nproc_per_node: 4 # Number of GPU processes per node
sampler_type: vllm # Inference engine: 'vllm' (fast) or 'torch' (TorchSampler)
engine_args: # vLLM engine-specific settings
max_model_len: 32000 # Maximum sequence length the engine supports
Expand Down Expand Up @@ -84,7 +84,7 @@ applications:
model_id: "ms://Qwen/Qwen3.5-27B" # ModelScope model identifier
max_length: 32000 # model max length
max_loras: 5 # model max loras
nproc_per_node: 8 # Number of GPU processes per node
nproc_per_node: 4 # Number of GPU processes per node
device_group:
name: model
ranks: 4 # GPU rank indices
Expand Down
2 changes: 1 addition & 1 deletion cookbook/client/tinker/modelscope/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
]
)

input_feature = template.encode(trajectory, add_generation_prompt=True)
input_feature = template.batch_encode([trajectory], add_generation_prompt=True)[0]

input_ids = input_feature['input_ids'].tolist()

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ dependencies = [
"datasets>=3.0,<4.0",
"omegaconf>=2.3.0,<3.0.0",
"fastapi",
"modelscope[framework]>=1.34.0",
"modelscope[framework]>=1.35.0",
"safetensors",
"peft>=0.11.0,<=0.19.0",
"transformers",
Expand Down
6 changes: 6 additions & 0 deletions src/twinkle/dataset/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ class Dataset(TorchDataset):
"""

def __init__(self, dataset_meta: DatasetMeta, **kwargs):
trust_remote_code = bool(os.environ.get('TWINKLE_TRUST_REMOTE_CODE', '1'))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

There are two issues with this line:

  1. Logic Error: In Python, bool() on any non-empty string returns True. Since os.environ.get returns a string, bool("0") will be True, meaning the environment variable cannot be used to set trust_remote_code to False as intended.
  2. NameError: The os module is not imported in this file (only os.path is), so accessing os.environ will raise a NameError at runtime.
Suggested change
trust_remote_code = bool(os.environ.get('TWINKLE_TRUST_REMOTE_CODE', '1'))
trust_remote_code = os.environ.get('TWINKLE_TRUST_REMOTE_CODE', '1') == '1'

if not trust_remote_code:
kwargs['trust_remote_code'] = False
dataset = self._load_dataset(dataset_meta, **kwargs)
self.datasets = {dataset_meta.get_id(): dataset}
self.dataset = dataset
Expand Down Expand Up @@ -247,6 +250,9 @@ def add_dataset(self, dataset_meta: DatasetMeta, **kwargs):
Args:
dataset_meta: The dataset_meta information of the loaded dataset.
"""
trust_remote_code = bool(os.environ.get('TWINKLE_TRUST_REMOTE_CODE', '1'))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Same issues as in the constructor: the bool() conversion on a string does not work for boolean flags, and the os module is missing from the imports.

Suggested change
trust_remote_code = bool(os.environ.get('TWINKLE_TRUST_REMOTE_CODE', '1'))
trust_remote_code = os.environ.get('TWINKLE_TRUST_REMOTE_CODE', '1') == '1'

if not trust_remote_code:
kwargs['trust_remote_code'] = False
dataset = self._load_dataset(dataset_meta, **kwargs)
self.datasets[dataset_meta.get_id()] = dataset

Expand Down
4 changes: 3 additions & 1 deletion src/twinkle/hub/hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ def load_dataset(cls,
cls.try_login(token)
if revision is None or revision == 'main':
revision = 'master'
load_kwargs = {'trust_remote_code': True}
load_kwargs = {'trust_remote_code': kwargs.get('trust_remote_code', True)}
return MsDataset.load(
dataset_id,
subset_name=subset_name,
Expand Down Expand Up @@ -595,13 +595,15 @@ def load_dataset(cls,
from datasets import load_dataset
if revision is None or revision == 'master':
revision = 'main'
trust_remote_code = kwargs.get('trust_remote_code', True)
return load_dataset(
dataset_id,
name=subset_name,
split=split,
streaming=streaming,
revision=revision,
download_mode=download_mode,
trust_remote_code=trust_remote_code,
num_proc=num_proc)

@classmethod
Expand Down
3 changes: 2 additions & 1 deletion src/twinkle/processor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ def to_tensor(_input):
# so tensor ops like labels != ignore_index or .to(device) would fail without this.
if isinstance(value, np.ndarray):
value = torch.from_numpy(value)
elif isinstance(value, list) and isinstance(value[0], (int, float, np.number)):
elif (isinstance(value, list) and isinstance(value[0],
(int, float, np.number))) or key == 'position_ids':
Comment on lines +100 to +101
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Accessing value[0] without verifying that the list is non-empty will raise an IndexError if value is []. It is safer to check the list's truthiness (which is False for empty lists) before indexing.

Suggested change
elif (isinstance(value, list) and isinstance(value[0],
(int, float, np.number))) or key == 'position_ids':
elif (isinstance(value, list) and value and isinstance(value[0],
(int, float, np.number))) or key == 'position_ids':

value = torch.tensor(value)
elif key in self.VLM_CONCAT_FIELDS:
if not isinstance(value[0], torch.Tensor):
Expand Down
2 changes: 1 addition & 1 deletion src/twinkle/server/gateway/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(self,
self.http_options = http_options or {}
self.proxy = ServiceProxy(http_options=http_options, route_prefix=self.route_prefix)
self.supported_models = self._normalize_models(supported_models) or [
types.SupportedModel(model_name='Qwen/Qwen3.5-4B'),
types.SupportedModel(model_name='Qwen/Qwen3.5-27B'),
]
self._modelscope_config_lock = asyncio.Lock()

Expand Down
Loading