Skip to content

Commit 01335f0

Browse files
committed
Rename crawler_id to just id. Polish.
1 parent 415299f commit 01335f0

2 files changed

Lines changed: 18 additions & 15 deletions

File tree

src/crawlee/crawlers/_basic/_basic_crawler.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,10 @@ class _BasicCrawlerOptions(TypedDict):
213213
"""Allows overriding the default status message. The default status message is provided in the parameters.
214214
Returning `None` suppresses the status message."""
215215

216+
id: NotRequired[int]
217+
"""Id of the crawler used for state tracking. You can use same explicit id to share state between two crawlers.
218+
By default, each crawler will use own state."""
219+
216220

217221
class _BasicCrawlerOptionsGeneric(TypedDict, Generic[TCrawlingContext, TStatisticsState]):
218222
"""Generic options the `BasicCrawler` constructor."""
@@ -298,7 +302,7 @@ def __init__(
298302
status_message_logging_interval: timedelta = timedelta(seconds=10),
299303
status_message_callback: Callable[[StatisticsState, StatisticsState | None, str], Awaitable[str | None]]
300304
| None = None,
301-
crawler_id: int | None = None,
305+
id: int | None = None,
302306
_context_pipeline: ContextPipeline[TCrawlingContext] | None = None,
303307
_additional_context_managers: Sequence[AbstractAsyncContextManager] | None = None,
304308
_logger: logging.Logger | None = None,
@@ -351,21 +355,20 @@ def __init__(
351355
status_message_logging_interval: Interval for logging the crawler status messages.
352356
status_message_callback: Allows overriding the default status message. The default status message is
353357
provided in the parameters. Returning `None` suppresses the status message.
354-
crawler_id: Id of the crawler used for state and statistics tracking. You can use same explicit id to share
355-
state and statistics between two crawlers. By default, each crawler will use own state and statistics.
358+
id: Id of the crawler used for state tracking. You can use same explicit id to share state and between two
359+
crawlers. By default, each crawler will use own state.
356360
_context_pipeline: Enables extending the request lifecycle and modifying the crawling context.
357361
Intended for use by subclasses rather than direct instantiation of `BasicCrawler`.
358362
_additional_context_managers: Additional context managers used throughout the crawler lifecycle.
359363
Intended for use by subclasses rather than direct instantiation of `BasicCrawler`.
360364
_logger: A logger instance, typically provided by a subclass, for consistent logging labels.
361365
Intended for use by subclasses rather than direct instantiation of `BasicCrawler`.
362366
"""
363-
if crawler_id is None:
364-
# This could look into set of already used ids, but lets not overengineer this.
365-
self.id = BasicCrawler.__next_id
367+
if id is None:
368+
self._id = BasicCrawler.__next_id
366369
BasicCrawler.__next_id += 1
367370
else:
368-
self.id = crawler_id
371+
self._id = id
369372

370373
implicit_event_manager_with_explicit_config = False
371374
if not configuration:
@@ -842,7 +845,7 @@ async def _use_state(
842845
default_value: dict[str, JsonSerializable] | None = None,
843846
) -> dict[str, JsonSerializable]:
844847
kvs = await self.get_key_value_store()
845-
return await kvs.get_auto_saved_value(f'{self._CRAWLEE_STATE_KEY}_{self.id}', default_value)
848+
return await kvs.get_auto_saved_value(f'{self._CRAWLEE_STATE_KEY}_{self._id}', default_value)
846849

847850
async def _save_crawler_state(self) -> None:
848851
store = await self.get_key_value_store()

tests/unit/crawlers/_basic/test_basic_crawler.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -821,15 +821,15 @@ async def handler(context: BasicCrawlingContext) -> None:
821821
assert isinstance(state['urls'], list)
822822
state['urls'].append(context.request.url)
823823

824-
crawler_1 = BasicCrawler(crawler_id=0, request_handler=handler)
825-
crawler_2 = BasicCrawler(crawler_id=0, request_handler=handler)
824+
crawler_1 = BasicCrawler(id=0, request_handler=handler)
825+
crawler_2 = BasicCrawler(id=0, request_handler=handler)
826826

827827
await crawler_1.run(['https://a.com'])
828828
await crawler_2.run(['https://b.com'])
829829

830830
kvs = await KeyValueStore.open()
831-
assert crawler_1.id == crawler_2.id == 0
832-
assert await kvs.get_value(f'{BasicCrawler._CRAWLEE_STATE_KEY}_{crawler_1.id}') == {
831+
assert crawler_1._id == crawler_2._id == 0
832+
assert await kvs.get_value(f'{BasicCrawler._CRAWLEE_STATE_KEY}_{crawler_1._id}') == {
833833
'urls': ['https://a.com', 'https://b.com']
834834
}
835835

@@ -838,8 +838,8 @@ async def test_crawlers_share_stats() -> None:
838838
async def handler(context: BasicCrawlingContext) -> None:
839839
await context.use_state({'urls': []})
840840

841-
crawler_1 = BasicCrawler(crawler_id=0, request_handler=handler)
842-
crawler_2 = BasicCrawler(crawler_id=0, request_handler=handler, statistics=crawler_1.statistics)
841+
crawler_1 = BasicCrawler(id=0, request_handler=handler)
842+
crawler_2 = BasicCrawler(id=0, request_handler=handler, statistics=crawler_1.statistics)
843843

844844
result1 = await crawler_1.run(['https://a.com'])
845845
result2 = await crawler_2.run(['https://b.com'])
@@ -1722,7 +1722,7 @@ async def test_add_requests_with_rq_param(queue_name: str | None, queue_alias: s
17221722
crawler = BasicCrawler()
17231723
rq = await RequestQueue.open(name=queue_name, alias=queue_alias)
17241724
if by_id:
1725-
queue_id = rq.id
1725+
queue_id = rq._id
17261726
queue_name = None
17271727
else:
17281728
queue_id = None

0 commit comments

Comments
 (0)