Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions crawl4ai/async_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,18 @@ class BrowserConfig:
light_mode (bool): Disables certain background features for performance gains. Default: False.
extra_args (list): Additional command-line arguments passed to the browser.
Default: [].
executable_path (str or None): Path to a custom browser executable (e.g., ungoogled-chromium,
Brave, or a stealth-patched binary). If None, uses the default
Playwright-managed browser. Default: None.
ignore_default_args (list or None): List of default Chromium flags to exclude from launch.
Passed directly to Playwright's chromium.launch(). Default: None.
skip_default_browser_args (bool): If True, skips the hardcoded browser args in _build_browser_args()
and only uses extra_args. Useful when a custom browser binary manages
its own flags and the defaults cause conflicts. Default: False.
skip_default_headers (bool): If True, skips the forced User-Agent and sec-ch-ua header overrides
in setup_context(). Useful when a custom browser binary manages its own
fingerprint and Crawl4AI's overrides create detectable mismatches.
Default: False.
enable_stealth (bool): If True, applies playwright-stealth to bypass basic bot detection.
Cannot be used with use_undetected browser mode. Default: False.
memory_saving_mode (bool): If True, adds aggressive cache discard and V8 heap cap flags
Expand Down Expand Up @@ -644,6 +656,10 @@ def __init__(
text_mode: bool = False,
light_mode: bool = False,
extra_args: list = None,
executable_path: str = None,
ignore_default_args: list = None,
skip_default_browser_args: bool = False,
skip_default_headers: bool = False,
debugging_port: int = 9222,
host: str = "localhost",
enable_stealth: bool = False,
Expand Down Expand Up @@ -709,6 +725,10 @@ def __init__(
self.text_mode = text_mode
self.light_mode = light_mode
self.extra_args = extra_args if extra_args is not None else []
self.executable_path = executable_path
self.ignore_default_args = ignore_default_args
self.skip_default_browser_args = skip_default_browser_args
self.skip_default_headers = skip_default_headers
self.sleep_on_close = sleep_on_close
self.verbose = verbose
self.debugging_port = debugging_port
Expand Down Expand Up @@ -804,6 +824,10 @@ def to_dict(self):
"text_mode": self.text_mode,
"light_mode": self.light_mode,
"extra_args": self.extra_args,
"executable_path": self.executable_path,
"ignore_default_args": self.ignore_default_args,
"skip_default_browser_args": self.skip_default_browser_args,
"skip_default_headers": self.skip_default_headers,
"sleep_on_close": self.sleep_on_close,
"verbose": self.verbose,
"debugging_port": self.debugging_port,
Expand Down
201 changes: 108 additions & 93 deletions crawl4ai/browser_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,51 +69,56 @@ class ManagedBrowser:
@staticmethod
def build_browser_flags(config: BrowserConfig) -> List[str]:
"""Common CLI flags for launching Chromium"""
flags = [
"--no-sandbox",
"--disable-dev-shm-usage",
"--no-first-run",
"--no-default-browser-check",
"--disable-infobars",
"--window-position=0,0",
"--ignore-certificate-errors",
"--ignore-certificate-errors-spki-list",
"--disable-blink-features=AutomationControlled",
"--window-position=400,0",
"--disable-renderer-backgrounding",
"--disable-ipc-flooding-protection",
"--force-color-profile=srgb",
"--mute-audio",
"--disable-background-timer-throttling",
# Memory-saving flags: disable unused Chrome features
"--disable-features=OptimizationHints,MediaRouter,DialMediaRouteProvider",
"--disable-component-update",
"--disable-domain-reliability",
]
# GPU flags disable WebGL which anti-bot sensors detect as headless.
# Keep WebGL working (via SwiftShader) when stealth mode is active.
if not config.enable_stealth:
flags.extend([
"--disable-gpu",
"--disable-gpu-compositing",
"--disable-software-rasterizer",
])
if config.memory_saving_mode:
flags.extend([
"--aggressive-cache-discard",
'--js-flags=--max-old-space-size=512',
])
if config.light_mode:
flags.extend(BROWSER_DISABLE_OPTIONS)
if config.text_mode:
flags.extend([
"--blink-settings=imagesEnabled=false",
"--disable-remote-fonts",
"--disable-images",
"--disable-javascript",
"--disable-software-rasterizer",
if config.skip_default_browser_args:
flags = list(config.extra_args) if config.extra_args else []
else:
flags = [
"--no-sandbox",
"--disable-dev-shm-usage",
])
"--no-first-run",
"--no-default-browser-check",
"--disable-infobars",
"--window-position=0,0",
"--ignore-certificate-errors",
"--ignore-certificate-errors-spki-list",
"--disable-blink-features=AutomationControlled",
"--window-position=400,0",
"--disable-renderer-backgrounding",
"--disable-ipc-flooding-protection",
"--force-color-profile=srgb",
"--mute-audio",
"--disable-background-timer-throttling",
# Memory-saving flags: disable unused Chrome features
"--disable-features=OptimizationHints,MediaRouter,DialMediaRouteProvider",
"--disable-component-update",
"--disable-domain-reliability",
]
# GPU flags disable WebGL which anti-bot sensors detect as headless.
# Keep WebGL working (via SwiftShader) when stealth mode is active.
if not config.enable_stealth:
flags.extend([
"--disable-gpu",
"--disable-gpu-compositing",
"--disable-software-rasterizer",
])
if config.memory_saving_mode:
flags.extend([
"--aggressive-cache-discard",
'--js-flags=--max-old-space-size=512',
])
if config.light_mode:
flags.extend(BROWSER_DISABLE_OPTIONS)
if config.text_mode:
flags.extend([
"--blink-settings=imagesEnabled=false",
"--disable-remote-fonts",
"--disable-images",
"--disable-javascript",
"--disable-software-rasterizer",
"--disable-dev-shm-usage",
])
if config.extra_args:
flags.extend(config.extra_args)
# proxy support — only pass server URL, never credentials.
# Chromium's --proxy-server flag silently ignores inline user:pass@.
# Auth credentials are handled at the Playwright context level instead.
Expand Down Expand Up @@ -1056,62 +1061,72 @@ async def _verify_cdp_ready(self, cdp_url: str) -> bool:

def _build_browser_args(self) -> dict:
"""Build browser launch arguments from config."""
args = [
"--disable-gpu",
"--disable-gpu-compositing",
"--disable-software-rasterizer",
"--no-sandbox",
"--disable-dev-shm-usage",
"--no-first-run",
"--no-default-browser-check",
"--disable-infobars",
"--window-position=0,0",
"--ignore-certificate-errors",
"--ignore-certificate-errors-spki-list",
"--disable-blink-features=AutomationControlled",
"--window-position=400,0",
"--disable-renderer-backgrounding",
"--disable-ipc-flooding-protection",
"--force-color-profile=srgb",
"--mute-audio",
"--disable-background-timer-throttling",
# Memory-saving flags: disable unused Chrome features
"--disable-features=OptimizationHints,MediaRouter,DialMediaRouteProvider",
"--disable-component-update",
"--disable-domain-reliability",
# "--single-process",
f"--window-size={self.config.viewport_width},{self.config.viewport_height}",
]

if self.config.memory_saving_mode:
args.extend([
"--aggressive-cache-discard",
'--js-flags=--max-old-space-size=512',
])

if self.config.light_mode:
args.extend(BROWSER_DISABLE_OPTIONS)
if self.config.skip_default_browser_args:
# Skip all hardcoded args — only use extra_args
args = list(self.config.extra_args) if self.config.extra_args else []
else:
args = [
"--disable-gpu",
"--disable-gpu-compositing",
"--disable-software-rasterizer",
"--no-sandbox",
"--disable-dev-shm-usage",
"--no-first-run",
"--no-default-browser-check",
"--disable-infobars",
"--window-position=0,0",
"--ignore-certificate-errors",
"--ignore-certificate-errors-spki-list",
"--disable-blink-features=AutomationControlled",
"--window-position=400,0",
"--disable-renderer-backgrounding",
"--disable-ipc-flooding-protection",
"--force-color-profile=srgb",
"--mute-audio",
"--disable-background-timer-throttling",
# Memory-saving flags: disable unused Chrome features
"--disable-features=OptimizationHints,MediaRouter,DialMediaRouteProvider",
"--disable-component-update",
"--disable-domain-reliability",
# "--single-process",
f"--window-size={self.config.viewport_width},{self.config.viewport_height}",
]

if self.config.text_mode:
args.extend(
[
"--blink-settings=imagesEnabled=false",
"--disable-remote-fonts",
"--disable-images",
"--disable-javascript",
"--disable-software-rasterizer",
"--disable-dev-shm-usage",
]
)
if self.config.memory_saving_mode:
args.extend([
"--aggressive-cache-discard",
'--js-flags=--max-old-space-size=512',
])

if self.config.light_mode:
args.extend(BROWSER_DISABLE_OPTIONS)

if self.config.text_mode:
args.extend(
[
"--blink-settings=imagesEnabled=false",
"--disable-remote-fonts",
"--disable-images",
"--disable-javascript",
"--disable-software-rasterizer",
"--disable-dev-shm-usage",
]
)

if self.config.extra_args:
args.extend(self.config.extra_args)
if self.config.extra_args:
args.extend(self.config.extra_args)

# Deduplicate args
args = list(dict.fromkeys(args))

browser_args = {"headless": self.config.headless, "args": args}

if self.config.executable_path:
browser_args["executable_path"] = self.config.executable_path

if self.config.ignore_default_args:
browser_args["ignore_default_args"] = self.config.ignore_default_args

if self.config.chrome_channel:
browser_args["channel"] = self.config.chrome_channel

Expand Down Expand Up @@ -1191,7 +1206,7 @@ async def setup_context(
] = self.config.downloads_path

# Handle user agent and browser hints
if self.config.user_agent:
if self.config.user_agent and not self.config.skip_default_headers:
combined_headers = {
"User-Agent": self.config.user_agent,
"sec-ch-ua": self.config.browser_hint,
Expand Down