Skip to content

Commit 3dd8489

Browse files
bgmellokdeden
andauthored
Add CLI support for listing and downloading exports (#60)
* adding support for aleph exports * fixing tests * fixing formatting * change name of exportdir to exports * use normal exception pattern --------- Co-authored-by: Klil Eden <keden@protonmail.com>
1 parent 3cf049d commit 3dd8489

3 files changed

Lines changed: 231 additions & 0 deletions

File tree

alephclient/cli.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from alephclient.errors import AlephException
99
from alephclient.crawldir import crawl_dir
1010
from alephclient.fetchdir import fetch_collection, fetch_entity
11+
from alephclient.exports import list_exports, format_exports_table, download_export
1112

1213
log = logging.getLogger(__name__)
1314

@@ -432,5 +433,38 @@ def make_list(ctx, foreign_id, outfile, label, summary):
432433
raise click.Abort()
433434

434435

436+
@cli.group()
437+
@click.pass_context
438+
def export(ctx):
439+
"""Manage exports."""
440+
pass
441+
442+
443+
@export.command("list")
444+
@click.pass_context
445+
def export_list(ctx):
446+
"""List all exports."""
447+
api = ctx.obj["api"]
448+
try:
449+
exports = list_exports(api)
450+
click.echo(format_exports_table(exports))
451+
except AlephException as exc:
452+
raise click.ClickException(str(exc))
453+
454+
455+
@export.command("download")
456+
@click.argument("export_id", required=True)
457+
@click.argument("destination", required=True, type=click.Path())
458+
@click.pass_context
459+
def export_download(ctx, export_id, destination):
460+
"""Download an export by ID to a destination path."""
461+
api = ctx.obj["api"]
462+
try:
463+
path = download_export(api, export_id, destination)
464+
click.echo(f"Export downloaded to {path}")
465+
except AlephException as exc:
466+
raise click.ClickException(str(exc))
467+
468+
435469
if __name__ == "__main__":
436470
cli()

alephclient/exports.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
from pathlib import Path
2+
from typing import List, Dict
3+
4+
from requests import RequestException
5+
from requests.exceptions import HTTPError
6+
7+
from alephclient.api import AlephAPI, APIResultSet
8+
from alephclient.errors import AlephException
9+
10+
11+
def list_exports(api: AlephAPI) -> List[Dict]:
12+
"""Fetch all exports from the API, handling pagination."""
13+
url = api._make_url("exports")
14+
return list(APIResultSet(api, url))
15+
16+
17+
def format_exports_table(exports: List[Dict]) -> str:
18+
"""Format a list of exports as a plain-text table."""
19+
if not exports:
20+
return "No exports found."
21+
22+
headers = ["ID", "Label", "Status", "Created At", "Content Hash"]
23+
keys = ["id", "label", "status", "created_at", "content_hash"]
24+
25+
rows = []
26+
for export in exports:
27+
rows.append([str(export.get(k, "")) for k in keys])
28+
29+
col_widths = [len(h) for h in headers]
30+
for row in rows:
31+
for i, val in enumerate(row):
32+
col_widths[i] = max(col_widths[i], len(val))
33+
34+
def format_row(values):
35+
return " ".join(v.ljust(col_widths[i]) for i, v in enumerate(values))
36+
37+
lines = [format_row(headers), format_row(["-" * w for w in col_widths])]
38+
for row in rows:
39+
lines.append(format_row(row))
40+
return "\n".join(lines)
41+
42+
43+
def _get_export(api: AlephAPI, export_id: str) -> Dict:
44+
"""Fetch a single export by ID from the exports list."""
45+
for export in list_exports(api):
46+
if str(export.get("id")) == str(export_id):
47+
return export
48+
raise AlephException(f"Export {export_id} not found")
49+
50+
51+
def download_export(api: AlephAPI, export_id: str, destination: str) -> Path:
52+
"""Download an export archive to the given destination path."""
53+
export = _get_export(api, export_id)
54+
download_url = export.get("links", {}).get("download")
55+
if not download_url:
56+
raise AlephException(f"No download link for export {export_id}")
57+
58+
file_name = export.get("file_name", export_id)
59+
dest = Path(destination)
60+
if dest.is_dir():
61+
dest = dest / file_name
62+
dest.parent.mkdir(parents=True, exist_ok=True)
63+
64+
try:
65+
response = api.session.get(download_url, stream=True)
66+
response.raise_for_status()
67+
except (RequestException, HTTPError) as exc:
68+
raise AlephException(exc) from exc
69+
70+
with open(dest, "wb") as fh:
71+
for chunk in response.iter_content(chunk_size=512 * 1024):
72+
if chunk:
73+
fh.write(chunk)
74+
75+
return dest

alephclient/tests/test_export.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
from unittest.mock import MagicMock
2+
3+
from alephclient.api import AlephAPI
4+
from alephclient.exports import list_exports, format_exports_table, download_export
5+
6+
7+
FAKE_EXPORT = {
8+
"id": "123",
9+
"label": "My Export",
10+
"status": "complete",
11+
"file_name": "export.zip",
12+
"links": {"download": "http://aleph.test/api/2/archive?token=abc"},
13+
}
14+
15+
16+
class TestListExports:
17+
fake_url = "http://aleph.test/api/2/"
18+
19+
def setup_method(self):
20+
self.api = AlephAPI(host=self.fake_url, api_key="fake_key")
21+
22+
def test_single_page(self, mocker):
23+
exports = [{"id": "1", "label": "Export 1"}]
24+
mocker.patch.object(
25+
self.api,
26+
"_request",
27+
return_value={"results": exports, "next": None, "offset": 0, "limit": 20},
28+
)
29+
result = list_exports(self.api)
30+
assert result == exports
31+
32+
def test_pagination(self, mocker):
33+
page1 = {
34+
"results": [{"id": "1"}],
35+
"next": self.fake_url + "exports?page=2",
36+
"offset": 0,
37+
"limit": 1,
38+
}
39+
page2 = {
40+
"results": [{"id": "2"}],
41+
"next": None,
42+
"offset": 1,
43+
"limit": 1,
44+
}
45+
mocker.patch.object(
46+
self.api,
47+
"_request",
48+
side_effect=[page1, page2],
49+
)
50+
result = list_exports(self.api)
51+
assert len(result) == 2
52+
assert result[0]["id"] == "1"
53+
assert result[1]["id"] == "2"
54+
55+
def test_empty(self, mocker):
56+
mocker.patch.object(
57+
self.api,
58+
"_request",
59+
return_value={"results": [], "next": None, "offset": 0, "limit": 20},
60+
)
61+
result = list_exports(self.api)
62+
assert result == []
63+
64+
65+
class TestFormatExportsTable:
66+
def test_empty_list(self):
67+
assert format_exports_table([]) == "No exports found."
68+
69+
def test_with_data(self):
70+
exports = [
71+
{
72+
"id": "abc",
73+
"label": "My Export",
74+
"status": "completed",
75+
"created_at": "2025-01-01",
76+
"content_hash": "sha1:deadbeef",
77+
}
78+
]
79+
table = format_exports_table(exports)
80+
lines = table.split("\n")
81+
assert len(lines) == 3
82+
assert "ID" in lines[0]
83+
assert "Label" in lines[0]
84+
assert "Status" in lines[0]
85+
assert "abc" in lines[2]
86+
assert "My Export" in lines[2]
87+
assert "completed" in lines[2]
88+
89+
90+
class TestDownloadExport:
91+
fake_url = "http://aleph.test/api/2/"
92+
93+
def setup_method(self):
94+
self.api = AlephAPI(host=self.fake_url, api_key="fake_key")
95+
96+
def _mock_download(self, mocker, content=b"file content"):
97+
mock_response = MagicMock()
98+
mock_response.iter_content.return_value = [content]
99+
mock_response.raise_for_status = MagicMock()
100+
mocker.patch.object(
101+
self.api.session,
102+
"get",
103+
return_value=mock_response,
104+
)
105+
mocker.patch(
106+
"alephclient.exports.list_exports",
107+
return_value=[FAKE_EXPORT],
108+
)
109+
110+
def test_download_to_file(self, mocker, tmp_path):
111+
dest = tmp_path / "output.zip"
112+
self._mock_download(mocker)
113+
result = download_export(self.api, "123", str(dest))
114+
assert result == dest
115+
assert dest.read_bytes() == b"file content"
116+
117+
def test_download_to_directory(self, mocker, tmp_path):
118+
self._mock_download(mocker, content=b"data")
119+
result = download_export(self.api, "123", str(tmp_path))
120+
expected = tmp_path / "export.zip"
121+
assert result == expected
122+
assert expected.read_bytes() == b"data"

0 commit comments

Comments
 (0)