Skip to content

Commit 6493400

Browse files
committed
fix: fix package installation on windows
1 parent 809fb62 commit 6493400

13 files changed

Lines changed: 964 additions & 12 deletions

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@
66
/output_images
77
*.egg-info
88
*.pyc
9-
/dist
9+
/dist
10+
/build

pyproject.toml

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,19 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "samexporter"
7-
version = "0.4.4"
7+
version = "0.4.5"
88
description = "Exporting Segment Anything models ONNX format"
99
authors = [
1010
{name = "Viet Anh Nguyen", email = "vietanh.dev@gmail.com"},
1111
]
1212
readme = "README.md"
1313
requires-python = ">=3.11"
14-
license = {file = "LICENSE"}
14+
license = "MIT"
15+
license-files = ["LICENSE"]
1516
classifiers = [
1617
"Programming Language :: Python :: 3.11",
1718
"Programming Language :: Python :: 3.12",
1819
"Programming Language :: Python :: 3.13",
19-
"License :: OSI Approved :: MIT License",
2020
"Operating System :: OS Independent",
2121
]
2222

@@ -28,7 +28,6 @@ dependencies = [
2828
"torch==2.10.0",
2929
"torchvision==0.25.0",
3030
"timm==0.9.2",
31-
"onnxsim==0.5.0",
3231
"numpy==1.26.4",
3332
"onnxscript==0.6.2",
3433
"osam", # CLIP tokeniser for SAM3 text prompts
@@ -39,13 +38,16 @@ dependencies = [
3938
"Bug Tracker" = "https://github.com/vietanhdev/samexporter/issues"
4039

4140
[project.optional-dependencies]
42-
dev = ["ruff", "pre-commit"]
41+
# onnxsim has no Windows wheel; the source tarball exceeds Windows 260-char
42+
# path limits. Install with: pip install samexporter[export]
43+
export = ["onnxsim==0.5.0"]
44+
dev = ["ruff", "pre-commit", "pytest"]
4345

4446
[tool.setuptools]
45-
packages = ["samexporter"]
47+
packages = ["samexporter", "samexporter.mobile_encoder", "samexporter.sam2_configs"]
4648

4749
[tool.setuptools.package-data]
48-
"samexporter.sam2_configs" = ["*.py", "*.yaml"]
50+
"samexporter.sam2_configs" = ["*.yaml", "sam2.1/*.yaml"]
4951

5052
[tool.pytest.ini_options]
5153
testpaths = ["tests"]

samexporter/export_sam2.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
import onnx
66
import torch
7-
from onnxsim import simplify
87
from sam2.build_sam import build_sam2
98
from sam2.modeling.sam2_base import SAM2Base
109
from torch import nn
@@ -217,9 +216,9 @@ def _embed_masks(
217216
# Clear any existing Hydra instance
218217
GlobalHydra.instance().clear()
219218

220-
# Get absolute path to sam2_configs
219+
# Get absolute path to sam2_configs (bundled inside the samexporter package)
221220
config_dir = os.path.abspath(
222-
os.path.join(os.path.dirname(__file__), "..", "sam2_configs")
221+
os.path.join(os.path.dirname(__file__), "sam2_configs")
223222
)
224223

225224
with initialize_config_dir(config_dir=config_dir, version_base="1.2"):
@@ -242,6 +241,8 @@ def _embed_masks(
242241
print("Saved encoder to", args.output_encoder)
243242
if args.simplify:
244243
print("Simplifying encoder...")
244+
from onnxsim import simplify
245+
245246
onnx_model = onnx.load(args.output_encoder)
246247
model_simp, check = simplify(onnx_model)
247248
assert check, "Simplified ONNX model could not be validated"
@@ -312,6 +313,8 @@ def _embed_masks(
312313
print("Saved decoder to", args.output_decoder)
313314
if args.simplify:
314315
print("Simplifying decoder...")
316+
from onnxsim import simplify
317+
315318
onnx_model = onnx.load(args.output_decoder)
316319
model_simp, check = simplify(onnx_model)
317320
assert check, "Simplified ONNX model could not be validated"

samexporter/export_sam3.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66

77
import onnx
88
import torch
9-
from onnxsim import simplify
109
from torchvision.transforms import v2
1110

1211
# Mock triton for Windows – must happen before any sam3 imports.
@@ -285,6 +284,8 @@ def export_sam3(output_dir: str, opset: int = 18, simplify_model: bool = False):
285284

286285
# ── Simplify models conditionally ─────────────────────────────────────────
287286
if simplify_model:
287+
from onnxsim import simplify
288+
288289
for path in [encoder_path, language_path, decoder_path]:
289290
print(f"Simplifying {path}...")
290291
try:
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
4+
# This source code is licensed under the license found in the
5+
# LICENSE file in the root directory of this source tree.
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
# @package _global_
2+
3+
# Model
4+
model:
5+
_target_: sam2.modeling.sam2_base.SAM2Base
6+
image_encoder:
7+
_target_: sam2.modeling.backbones.image_encoder.ImageEncoder
8+
scalp: 1
9+
trunk:
10+
_target_: sam2.modeling.backbones.hieradet.Hiera
11+
embed_dim: 112
12+
num_heads: 2
13+
neck:
14+
_target_: sam2.modeling.backbones.image_encoder.FpnNeck
15+
position_encoding:
16+
_target_: sam2.modeling.position_encoding.PositionEmbeddingSine
17+
num_pos_feats: 256
18+
normalize: true
19+
scale: null
20+
temperature: 10000
21+
d_model: 256
22+
backbone_channel_list: [896, 448, 224, 112]
23+
fpn_top_down_levels: [2, 3] # output level 0 and 1 directly use the backbone features
24+
fpn_interp_model: nearest
25+
26+
memory_attention:
27+
_target_: sam2.modeling.memory_attention.MemoryAttention
28+
d_model: 256
29+
pos_enc_at_input: true
30+
layer:
31+
_target_: sam2.modeling.memory_attention.MemoryAttentionLayer
32+
activation: relu
33+
dim_feedforward: 2048
34+
dropout: 0.1
35+
pos_enc_at_attn: false
36+
self_attention:
37+
_target_: sam2.modeling.sam.transformer.RoPEAttention
38+
rope_theta: 10000.0
39+
feat_sizes: [64, 64]
40+
embedding_dim: 256
41+
num_heads: 1
42+
downsample_rate: 1
43+
dropout: 0.1
44+
d_model: 256
45+
pos_enc_at_cross_attn_keys: true
46+
pos_enc_at_cross_attn_queries: false
47+
cross_attention:
48+
_target_: sam2.modeling.sam.transformer.RoPEAttention
49+
rope_theta: 10000.0
50+
feat_sizes: [64, 64]
51+
rope_k_repeat: True
52+
embedding_dim: 256
53+
num_heads: 1
54+
downsample_rate: 1
55+
dropout: 0.1
56+
kv_in_dim: 64
57+
num_layers: 4
58+
59+
memory_encoder:
60+
_target_: sam2.modeling.memory_encoder.MemoryEncoder
61+
out_dim: 64
62+
position_encoding:
63+
_target_: sam2.modeling.position_encoding.PositionEmbeddingSine
64+
num_pos_feats: 64
65+
normalize: true
66+
scale: null
67+
temperature: 10000
68+
mask_downsampler:
69+
_target_: sam2.modeling.memory_encoder.MaskDownSampler
70+
kernel_size: 3
71+
stride: 2
72+
padding: 1
73+
fuser:
74+
_target_: sam2.modeling.memory_encoder.Fuser
75+
layer:
76+
_target_: sam2.modeling.memory_encoder.CXBlock
77+
dim: 256
78+
kernel_size: 7
79+
padding: 3
80+
layer_scale_init_value: 1e-6
81+
use_dwconv: True # depth-wise convs
82+
num_layers: 2
83+
84+
num_maskmem: 7
85+
image_size: 1024
86+
# apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
87+
sigmoid_scale_for_mem_enc: 20.0
88+
sigmoid_bias_for_mem_enc: -10.0
89+
use_mask_input_as_output_without_sam: true
90+
# Memory
91+
directly_add_no_mem_embed: true
92+
no_obj_embed_spatial: true
93+
# use high-resolution feature map in the SAM mask decoder
94+
use_high_res_features_in_sam: true
95+
# output 3 masks on the first click on initial conditioning frames
96+
multimask_output_in_sam: true
97+
# SAM heads
98+
iou_prediction_use_sigmoid: True
99+
# cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
100+
use_obj_ptrs_in_encoder: true
101+
add_tpos_enc_to_obj_ptrs: true
102+
proj_tpos_enc_in_obj_ptrs: true
103+
use_signed_tpos_enc_to_obj_ptrs: true
104+
only_obj_ptrs_in_the_past_for_eval: true
105+
# object occlusion prediction
106+
pred_obj_scores: true
107+
pred_obj_scores_mlp: true
108+
fixed_no_obj_ptr: true
109+
# multimask tracking settings
110+
multimask_output_for_tracking: true
111+
use_multimask_token_for_obj_ptr: true
112+
multimask_min_pt_num: 0
113+
multimask_max_pt_num: 1
114+
use_mlp_for_obj_ptr_proj: true
115+
# Compilation flag
116+
compile_image_encoder: False
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# @package _global_
2+
3+
# Model
4+
model:
5+
_target_: sam2.modeling.sam2_base.SAM2Base
6+
image_encoder:
7+
_target_: sam2.modeling.backbones.image_encoder.ImageEncoder
8+
scalp: 1
9+
trunk:
10+
_target_: sam2.modeling.backbones.hieradet.Hiera
11+
embed_dim: 144
12+
num_heads: 2
13+
stages: [2, 6, 36, 4]
14+
global_att_blocks: [23, 33, 43]
15+
window_pos_embed_bkg_spatial_size: [7, 7]
16+
window_spec: [8, 4, 16, 8]
17+
neck:
18+
_target_: sam2.modeling.backbones.image_encoder.FpnNeck
19+
position_encoding:
20+
_target_: sam2.modeling.position_encoding.PositionEmbeddingSine
21+
num_pos_feats: 256
22+
normalize: true
23+
scale: null
24+
temperature: 10000
25+
d_model: 256
26+
backbone_channel_list: [1152, 576, 288, 144]
27+
fpn_top_down_levels: [2, 3] # output level 0 and 1 directly use the backbone features
28+
fpn_interp_model: nearest
29+
30+
memory_attention:
31+
_target_: sam2.modeling.memory_attention.MemoryAttention
32+
d_model: 256
33+
pos_enc_at_input: true
34+
layer:
35+
_target_: sam2.modeling.memory_attention.MemoryAttentionLayer
36+
activation: relu
37+
dim_feedforward: 2048
38+
dropout: 0.1
39+
pos_enc_at_attn: false
40+
self_attention:
41+
_target_: sam2.modeling.sam.transformer.RoPEAttention
42+
rope_theta: 10000.0
43+
feat_sizes: [64, 64]
44+
embedding_dim: 256
45+
num_heads: 1
46+
downsample_rate: 1
47+
dropout: 0.1
48+
d_model: 256
49+
pos_enc_at_cross_attn_keys: true
50+
pos_enc_at_cross_attn_queries: false
51+
cross_attention:
52+
_target_: sam2.modeling.sam.transformer.RoPEAttention
53+
rope_theta: 10000.0
54+
feat_sizes: [64, 64]
55+
rope_k_repeat: True
56+
embedding_dim: 256
57+
num_heads: 1
58+
downsample_rate: 1
59+
dropout: 0.1
60+
kv_in_dim: 64
61+
num_layers: 4
62+
63+
memory_encoder:
64+
_target_: sam2.modeling.memory_encoder.MemoryEncoder
65+
out_dim: 64
66+
position_encoding:
67+
_target_: sam2.modeling.position_encoding.PositionEmbeddingSine
68+
num_pos_feats: 64
69+
normalize: true
70+
scale: null
71+
temperature: 10000
72+
mask_downsampler:
73+
_target_: sam2.modeling.memory_encoder.MaskDownSampler
74+
kernel_size: 3
75+
stride: 2
76+
padding: 1
77+
fuser:
78+
_target_: sam2.modeling.memory_encoder.Fuser
79+
layer:
80+
_target_: sam2.modeling.memory_encoder.CXBlock
81+
dim: 256
82+
kernel_size: 7
83+
padding: 3
84+
layer_scale_init_value: 1e-6
85+
use_dwconv: True # depth-wise convs
86+
num_layers: 2
87+
88+
num_maskmem: 7
89+
image_size: 1024
90+
# apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
91+
sigmoid_scale_for_mem_enc: 20.0
92+
sigmoid_bias_for_mem_enc: -10.0
93+
use_mask_input_as_output_without_sam: true
94+
# Memory
95+
directly_add_no_mem_embed: true
96+
no_obj_embed_spatial: true
97+
# use high-resolution feature map in the SAM mask decoder
98+
use_high_res_features_in_sam: true
99+
# output 3 masks on the first click on initial conditioning frames
100+
multimask_output_in_sam: true
101+
# SAM heads
102+
iou_prediction_use_sigmoid: True
103+
# cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
104+
use_obj_ptrs_in_encoder: true
105+
add_tpos_enc_to_obj_ptrs: true
106+
proj_tpos_enc_in_obj_ptrs: true
107+
use_signed_tpos_enc_to_obj_ptrs: true
108+
only_obj_ptrs_in_the_past_for_eval: true
109+
# object occlusion prediction
110+
pred_obj_scores: true
111+
pred_obj_scores_mlp: true
112+
fixed_no_obj_ptr: true
113+
# multimask tracking settings
114+
multimask_output_for_tracking: true
115+
use_multimask_token_for_obj_ptr: true
116+
multimask_min_pt_num: 0
117+
multimask_max_pt_num: 1
118+
use_mlp_for_obj_ptr_proj: true
119+
# Compilation flag
120+
compile_image_encoder: False

0 commit comments

Comments
 (0)