Skip to content

Commit ce4bbda

Browse files
Add files via upload
1 parent e1e8408 commit ce4bbda

1 file changed

Lines changed: 87 additions & 103 deletions

File tree

pyarchivefile.py

Lines changed: 87 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,38 @@
125125
except Exception:
126126
PATH_TYPES = (basestring,)
127127

128+
def _ensure_text(s, encoding="utf-8", errors="replace", allow_none=False):
129+
"""
130+
Normalize any input to text_type (unicode on Py2, str on Py3).
131+
132+
- bytes/bytearray/memoryview -> decode
133+
- os.PathLike -> fspath then normalize
134+
- None -> "" (unless allow_none=True, then return None)
135+
- everything else -> text_type(s)
136+
"""
137+
if s is None:
138+
return None if allow_none else text_type("")
139+
140+
if isinstance(s, text_type):
141+
return s
142+
143+
if isinstance(s, (bytes_type, bytearray, memoryview)):
144+
return bytes(s).decode(encoding, errors)
145+
146+
# Handle pathlib.Path & other path-like objects
147+
try:
148+
import os
149+
if hasattr(os, "fspath"):
150+
fs = os.fspath(s)
151+
if isinstance(fs, text_type):
152+
return fs
153+
if isinstance(fs, (bytes_type, bytearray, memoryview)):
154+
return bytes(fs).decode(encoding, errors)
155+
except Exception:
156+
pass
157+
158+
return text_type(s)
159+
128160
def to_text(s, encoding="utf-8", errors="ignore"):
129161
if s is None:
130162
return u""
@@ -884,36 +916,64 @@ def VerbosePrintOutReturn(dbgtxt, outtype="log", dbgenable=True, dgblevel=20, **
884916
return dbgtxt
885917

886918

919+
def _split_posix(name):
920+
"""
921+
Return a list of path parts without collapsing '..'.
922+
- Normalize backslashes to '/'
923+
- Strip leading './' (repeated)
924+
- Remove '' and '.' parts; keep '..' for traversal detection
925+
"""
926+
if not name:
927+
return []
928+
n = name.replace(u"\\", u"/")
929+
while n.startswith(u"./"):
930+
n = n[2:]
931+
return [p for p in n.split(u"/") if p not in (u"", u".")]
887932

888-
def _split_posix(path_text):
889-
"""Split POSIX paths regardless of OS; return list of components."""
890-
# Normalize leading './'
891-
if path_text.startswith(u'./'):
892-
path_text = path_text[2:]
893-
# Strip redundant slashes
894-
path_text = re.sub(u'/+', u'/', path_text)
895-
# Drop trailing '/' so 'dir/' -> ['dir']
896-
if path_text.endswith(u'/'):
897-
path_text = path_text[:-1]
898-
return path_text.split(u'/') if path_text else []
933+
def _is_abs_like(name):
934+
"""Detect absolute-like paths across platforms (/, \\, drive letters, UNC)."""
935+
if not name:
936+
return False
937+
n = name.replace(u"\\", u"/")
938+
939+
# POSIX absolute
940+
if n.startswith(u"/"):
941+
return True
899942

900-
def _is_abs_like(s):
901-
"""Absolute targets (POSIX or Windows-drive style)."""
902-
return s.startswith(u'/') or s.startswith(u'\\') or re.match(u'^[A-Za-z]:[/\\\\]', s)
943+
# Windows UNC (\\server\share\...) -> after replace: startswith '//'
944+
if n.startswith(u"//"):
945+
return True
946+
947+
# Windows drive: 'C:/', 'C:\', or bare 'C:' (treat as absolute-like conservatively)
948+
if len(n) >= 2 and n[1] == u":":
949+
if len(n) == 2:
950+
return True
951+
if n[2:3] in (u"/", u"\\"):
952+
return True
953+
return False
903954

904-
def _resolves_outside(base_rel, target_rel):
955+
def _resolves_outside(parent, target):
905956
"""
906-
Given a base directory (relative, POSIX) and a target (relative),
907-
return True if base/target resolves outside of base.
908-
We anchor under '/' so normpath is root-anchored and portable.
957+
Does a symlink from 'parent' to 'target' escape parent?
958+
- Absolute-like target => escape.
959+
- Compare normalized '/<parent>/<target>' against '/<parent>'.
960+
- 'parent' is POSIX-style ('' means archive root).
909961
"""
910-
base_clean = u'/'.join(_split_posix(base_rel))
911-
target_clean = u'/'.join(_split_posix(target_rel))
912-
base_abs = u'/' + base_clean if base_clean else u'/'
913-
combined = pp.normpath(pp.join(base_abs, target_clean))
914-
if combined == base_abs or combined.startswith(base_abs + u'/'):
915-
return False
916-
return True
962+
parent = _ensure_text(parent or u"")
963+
target = _ensure_text(target or u"")
964+
965+
# Absolute target is unsafe by definition
966+
if _is_abs_like(target):
967+
return True
968+
969+
import posixpath as pp
970+
root = u"/"
971+
base = pp.normpath(pp.join(root, parent)) # '/dir/sub' or '/'
972+
cand = pp.normpath(pp.join(base, target)) # resolved target under '/'
973+
974+
# ensure trailing slash on base for the prefix test
975+
base_slash = base if base.endswith(u"/") else (base + u"/")
976+
return not (cand == base or cand.startswith(base_slash))
917977

918978
def _to_bytes(data, encoding="utf-8", errors="strict"):
919979
"""
@@ -1017,9 +1077,6 @@ def _to_text(s, encoding="utf-8", errors="replace", normalize=None, prefer_surro
10171077

10181078
return out
10191079

1020-
def ensure_text(s, **kw):
1021-
return _to_text(s, **kw)
1022-
10231080
def _quote_path_for_wire(path_text):
10241081
# Percent-encode as UTF-8; return ASCII bytes text
10251082
try:
@@ -1385,7 +1442,7 @@ def _guess_filename(url, filename):
13851442
return filename
13861443
path = urlparse(url).path or ''
13871444
base = os.path.basename(path)
1388-
return base or 'OutFile.'+__file_format_extension__
1445+
return base or 'ArchiveFile'+__file_format_extension__
13891446

13901447
# ---- progress + rate limiting helpers ----
13911448
try:
@@ -1680,79 +1737,6 @@ def _pace_rate(last_ts, sent_bytes_since_ts, rate_limit_bps, add_bytes):
16801737
sent_bytes_since_ts = 0
16811738
return (sleep_s, last_ts, sent_bytes_since_ts)
16821739

1683-
1684-
def _split_posix(name):
1685-
"""
1686-
Return a list of path parts without collapsing '..'.
1687-
- Normalize backslashes to '/'
1688-
- Strip leading './' and redundant slashes
1689-
- Keep '..' parts for traversal detection
1690-
"""
1691-
if not name:
1692-
return []
1693-
n = name.replace(u"\\", u"/")
1694-
# drop leading ./ repeatedly
1695-
while n.startswith(u"./"):
1696-
n = n[2:]
1697-
# split and filter empty and '.'
1698-
parts = [p for p in n.split(u"/") if p not in (u"", u".")]
1699-
return parts
1700-
1701-
def _is_abs_like(name):
1702-
"""Detect absolute-like paths across platforms (/, \, drive letters)."""
1703-
if not name:
1704-
return False
1705-
n = name.replace(u"\\", u"/")
1706-
if n.startswith(u"/"):
1707-
return True
1708-
# Windows drive: C:/ or C:\ (allow lowercase too)
1709-
if len(n) >= 3 and n[1] == u":" and n[2] in (u"/", u"\\"):
1710-
return True
1711-
return False
1712-
1713-
def _resolves_outside(parent, target):
1714-
"""
1715-
Does a symlink from 'parent' to 'target' escape parent?
1716-
- Treat absolute target as escaping.
1717-
- For relative target, join parent + target, normpath, then check if it starts with parent.
1718-
- Parent is POSIX-style path ('' means root of archive).
1719-
"""
1720-
parent = _ensure_text(parent or u"")
1721-
target = _ensure_text(target or u"")
1722-
# absolute target is unsafe by definition
1723-
if _is_abs_like(target):
1724-
return True
1725-
1726-
# Build a virtual root '/' so we can compare safely
1727-
# e.g., parent='dir/sub', target='../../etc' -> '/dir/sub/../../etc' -> '/etc' (escapes)
1728-
import posixpath
1729-
root = u"/"
1730-
base = posixpath.normpath(posixpath.join(root, parent)) # '/dir/sub'
1731-
candidate = posixpath.normpath(posixpath.join(base, target)) # resolved path under '/'
1732-
1733-
# Ensure base always ends with a slash for prefix test
1734-
base_slash = base if base.endswith(u"/") else (base + u"/")
1735-
# candidate must be base itself or inside base
1736-
if candidate == base or candidate.startswith(base_slash):
1737-
return False
1738-
return True
1739-
1740-
def _symlink_type(ftype):
1741-
"""
1742-
Return True if ftype denotes a symlink.
1743-
Accepts: 2 (int), '2', 'symlink', 'link' (case-insensitive).
1744-
"""
1745-
if ftype is None:
1746-
return False
1747-
# numeric or numeric string
1748-
try:
1749-
if int(ftype) == 2:
1750-
return True
1751-
except Exception:
1752-
pass
1753-
s = ensure_text(ftype).strip().lower()
1754-
return s in (u"2", u"symlink", u"link", u"symbolic_link", u"symbolic-link")
1755-
17561740
def DetectTarBombFoxFileArray(listarrayfiles,
17571741
top_file_ratio_threshold=0.6,
17581742
min_members_for_ratio=4,
@@ -1768,7 +1752,7 @@ def DetectTarBombFoxFileArray(listarrayfiles,
17681752
has_symlinks (bool)
17691753
"""
17701754
if to_text is None:
1771-
to_text = ensure_text
1755+
to_text = _ensure_text
17721756

17731757
files = listarrayfiles or {}
17741758
members = files.get('ffilelist') or []

0 commit comments

Comments
 (0)