archivebox.services.runner

Module Contents

Classes

CrawlRunner

Functions

_bus_name

_count_selected_hooks

_normalize_runtime_config

_runner_task_context

_is_external_task_cancelled

_emit_machine_config

_run_event_now

ensure_background_runner

run_crawl

_run_binary

run_binary

_run_install

run_install

recover_orphaned_crawls

recover_orphaned_snapshots

run_pending_crawls

API

archivebox.services.runner._bus_name(prefix: str, identifier: str) str[source]
archivebox.services.runner._count_selected_hooks(plugins: dict[str, abx_dl.models.Plugin], selected_plugins: list[str] | None) int[source]
archivebox.services.runner._normalize_runtime_config(config: archivebox.config.configset.BaseConfigSet | collections.abc.Mapping[str, Any] | str | None) dict[str, Any][source]
archivebox.services.runner._runner_task_context() contextvars.Context[source]
archivebox.services.runner._is_external_task_cancelled(error: asyncio.CancelledError) bool[source]
async archivebox.services.runner._emit_machine_config(bus, *, config: dict[str, Any], derived_config: dict[str, Any], parent_event=None) None[source]
async archivebox.services.runner._run_event_now(event, timeout: float | None = None)[source]
archivebox.services.runner.ensure_background_runner(*, allow_under_pytest: bool = False) bool[source]
class archivebox.services.runner.CrawlRunner(crawl, *, snapshot_ids: list[str] | None = None, selected_plugins: list[str] | None = None, process_discovered_snapshots_inline: bool = True)[source]

Initialization

MAX_CONCURRENT_SNAPSHOTS[source]

8

async crawl_is_cancelled() bool[source]
async watch_for_cancelled_crawl(parent_event: abxbus.BaseEvent, *, poll_interval: float = 1.0) None[source]
runtime_plugins() dict[str, abx_dl.models.Plugin][source]
async run() None[source]
async enqueue_snapshot(snapshot_id: str) None[source]
async wait_for_snapshot_tasks() None[source]
async enqueue_pending_snapshots_from_projection() None[source]
load_run_state() list[str][source]
finalize_run_state() None[source]
_create_live_ui() archivebox.services.live_ui.LiveBusUI | None[source]
load_snapshot_payload(snapshot_id: str) dict[str, Any][source]
async enqueue_discovered_snapshots_from_outputs(snapshot_payload: dict[str, Any]) None[source]
async run_crawl(root_snapshot_id: str, snapshot_ids: list[str]) None[source]
async run_snapshot(snapshot_id: str) None[source]
seal_snapshot_due_to_limit(snapshot_id: str) None[source]
archivebox.services.runner.run_crawl(crawl_id: str, *, snapshot_ids: list[str] | None = None, selected_plugins: list[str] | None = None, process_discovered_snapshots_inline: bool = True) None[source]
async archivebox.services.runner._run_binary(binary_id: str) None[source]
archivebox.services.runner.run_binary(binary_id: str) None[source]
async archivebox.services.runner._run_install(plugin_names: list[str] | None = None) None[source]
archivebox.services.runner.run_install(*, plugin_names: list[str] | None = None) None[source]
archivebox.services.runner.recover_orphaned_crawls() int[source]
archivebox.services.runner.recover_orphaned_snapshots() int[source]
archivebox.services.runner.run_pending_crawls(*, daemon: bool = False, crawl_id: str | None = None) int[source]