archivebox.workers.models

Module Contents

Classes

DefaultStatusChoices

BaseModelWithStateMachine

ModelWithStateMachine

EventDict

EventManager

EventQuerySet

Event

Data

default_status_field

default_retry_at_field

ObjectState

ObjectStateList

API

class archivebox.workers.models.DefaultStatusChoices[source]

Bases: django.db.models.TextChoices

QUEUED[source]

(‘queued’, ‘Queued’)

STARTED[source]

(‘started’, ‘Started’)

SEALED[source]

(‘sealed’, ‘Sealed’)

archivebox.workers.models.default_status_field: django.db.models.CharField[source]

‘CharField(…)’

archivebox.workers.models.default_retry_at_field: django.db.models.DateTimeField[source]

‘DateTimeField(…)’

archivebox.workers.models.ObjectState[source]

None

archivebox.workers.models.ObjectStateList[source]

None

class archivebox.workers.models.BaseModelWithStateMachine[source]

Bases: django.db.models.Model, statemachine.mixins.MachineMixin

id: django.db.models.UUIDField[source]

None

StatusChoices: ClassVar[Type[django.db.models.TextChoices]][source]

None

state_machine_name: ClassVar[str][source]

None

state_field_name: ClassVar[str][source]

None

state_machine_attr: ClassVar[str][source]

‘sm’

bind_events_as_methods: ClassVar[bool][source]

True

active_state: ClassVar[archivebox.workers.models.ObjectState][source]

None

retry_at_field_name: ClassVar[str][source]

None

class Meta[source]
abstract[source]

True

classmethod check(sender=None, **kwargs)[source]
static _state_to_str(state: archivebox.workers.models.ObjectState) str[source]

Convert a statemachine.State, models.TextChoices.choices value, or Enum value to a str

property RETRY_AT: datetime.datetime[source]
property STATE: str[source]
bump_retry_at(seconds: int = 10)[source]
ACTIVE_STATE() str[source]
INITIAL_STATE() str[source]
FINAL_STATES() list[str][source]
FINAL_OR_ACTIVE_STATES() list[str][source]
classmethod extend_choices(base_choices: Type[django.db.models.TextChoices])[source]

Decorator to extend the base choices with extra choices, e.g.:

class MyModel(ModelWithStateMachine):

@ModelWithStateMachine.extend_choices(ModelWithStateMachine.StatusChoices)
class StatusChoices(models.TextChoices):
    SUCCEEDED = 'succeeded'
    FAILED = 'failed'
    SKIPPED = 'skipped'
classmethod StatusField(**kwargs) django.db.models.CharField[source]

Used on subclasses to extend/modify the status field with updated kwargs. e.g.:

class MyModel(ModelWithStateMachine): class StatusChoices(ModelWithStateMachine.StatusChoices): QUEUED = ‘queued’, ‘Queued’ STARTED = ‘started’, ‘Started’ SEALED = ‘sealed’, ‘Sealed’ BACKOFF = ‘backoff’, ‘Backoff’ FAILED = ‘failed’, ‘Failed’ SKIPPED = ‘skipped’, ‘Skipped’

status = ModelWithStateMachine.StatusField(choices=StatusChoices.choices, default=StatusChoices.QUEUED)
classmethod RetryAtField(**kwargs) django.db.models.DateTimeField[source]

Used on subclasses to extend/modify the retry_at field with updated kwargs. e.g.:

class MyModel(ModelWithStateMachine): retry_at = ModelWithStateMachine.RetryAtField(editable=False)

StateMachineClass() Type[statemachine.StateMachine][source]

Get the StateMachine class for the given django Model that inherits from MachineMixin

class archivebox.workers.models.ModelWithStateMachine[source]

Bases: archivebox.workers.models.BaseModelWithStateMachine

StatusChoices: ClassVar[Type[archivebox.workers.models.DefaultStatusChoices]][source]

None

status: django.db.models.CharField[source]

‘StatusField(…)’

retry_at: django.db.models.DateTimeField[source]

‘RetryAtField(…)’

state_machine_name: ClassVar[str][source]

None

state_field_name: ClassVar[str][source]

‘status’

state_machine_attr: ClassVar[str][source]

‘sm’

bind_events_as_methods: ClassVar[bool][source]

True

active_state: ClassVar[str][source]

None

retry_at_field_name: ClassVar[str][source]

‘retry_at’

class Meta[source]
abstract[source]

True

class archivebox.workers.models.EventDict[source]

Bases: typing.TypedDict

name: str[source]

None

id: str | uuid.UUID[source]

None

path: str[source]

None

content: str[source]

None

status: str[source]

None

retry_at: datetime.datetime | None[source]

None

url: str[source]

None

seed_id: str | uuid.UUID[source]

None

crawl_id: str | uuid.UUID[source]

None

snapshot_id: str | uuid.UUID[source]

None

process_id: str | uuid.UUID[source]

None

extractor: str[source]

None

error: str[source]

None

on_success: dict | None[source]

None

on_failure: dict | None[source]

None

class archivebox.workers.models.EventManager[source]

Bases: django.db.models.Manager

class archivebox.workers.models.EventQuerySet[source]

Bases: django.db.models.QuerySet

get_next_unclaimed() Event | None[source]
expired(older_than: int = 60 * 10) django.db.models.QuerySet[archivebox.workers.models.Event][source]
class archivebox.workers.models.Event(*args: Any, **kwargs: Any)[source]

Bases: base_models.models.ABIDModel

abid_prefix[source]

‘evn_’

abid_ts_src[source]

‘self.deliver_at’

abid_uri_src[source]

self.name

abid_subtype_src[source]

‘self.emitted_by’

abid_rand_src[source]

self.id

abid_drift_allowed: bool[source]

False

read_only_fields[source]

(‘id’, ‘deliver_at’, ‘name’, ‘kwargs’, ‘timeout’, ‘parent’, ‘emitted_by’, ‘on_success’, ‘on_failure’…

id[source]

‘UUIDField(…)’

abid[source]

None

created_at[source]

None

created_by[source]

None

created_by_id[source]

None

deliver_at[source]

‘DateTimeField(…)’

name[source]

‘CharField(…)’

kwargs[source]

‘JSONField(…)’

timeout[source]

‘IntegerField(…)’

parent[source]

‘ForeignKey(…)’

emitted_by[source]

‘ForeignKey(…)’

on_success[source]

‘JSONField(…)’

on_failure[source]

‘JSONField(…)’

modified_at[source]

‘DateTimeField(…)’

claimed_proc[source]

‘ForeignKey(…)’

claimed_at[source]

‘DateTimeField(…)’

finished_at[source]

‘DateTimeField(…)’

error[source]

‘TextField(…)’

objects: archivebox.workers.models.EventManager[source]

‘(…)’

child_events: django.db.models.RelatedManager[archivebox.workers.models.Event][source]

None

classmethod get_next_timestamp()[source]

Get the next monotonically increasing timestamp for the next event.dispatch_at

classmethod dispatch(name: str | archivebox.workers.models.EventDict | None = None, event: archivebox.workers.models.EventDict | None = None, **event_init_kwargs) archivebox.workers.models.Event[source]

Create a new Event and save it to the database.

Can be called as either: >>> Event.dispatch(name, {**kwargs}, **event_init_kwargs) # OR >>> Event.dispatch({name, **kwargs}, **event_init_kwargs)

clean(*args, **kwargs) None[source]

Fill and validate all the event fields

save(*args, **kwargs)[source]
reset()[source]

Force-update an event to a pending/unclaimed state (without running any of its handlers or callbacks)

abort()[source]

Force-update an event to a completed/failed state (without running any of its handlers or callbacks)

__repr__() str[source]
__str__() str[source]
property type: str[source]
property is_queued[source]
property is_claimed[source]
property is_expired[source]
property is_processing[source]
property is_finished[source]
property is_failed[source]
property is_succeeded[source]
__getattr__(key: str)[source]

Allow access to the event kwargs as attributes e.g. Event(name=’CRAWL_CREATE’, kwargs={‘some_key’: ‘some_val’}).some_key -> ‘some_val’