archivebox.index.schema

WARNING: THIS FILE IS ALL LEGACY CODE TO BE REMOVED.

DO NOT ADD ANY NEW FEATURES TO THIS FILE, NEW CODE GOES HERE: core/models.py

These are the old types we used to use before ArchiveBox v0.4 (before we switched to Django).

Module Contents

Classes

ArchiveResult

Link

Data

LinkDict

ArchiveOutput

API

exception archivebox.index.schema.ArchiveError(message, hints=None)[source]

Bases: Exception

archivebox.index.schema.LinkDict[source]

None

archivebox.index.schema.ArchiveOutput[source]

None

class archivebox.index.schema.ArchiveResult[source]

Bases: pydantic.BaseModel

model_config[source]

β€˜ConfigDict(…)’

TYPE: str[source]

β€˜index.schema.ArchiveResult’

cmd: list[str][source]

None

pwd: str | None[source]

None

cmd_version: str | None[source]

None

output: archivebox.index.schema.ArchiveOutput | None[source]

None

status: str[source]

None

start_ts: datetime.datetime[source]

None

end_ts: datetime.datetime[source]

None

index_texts: list[str] | None[source]

None

_field_names: ClassVar[list[str] | None][source]

None

classmethod validate_status(v: str) str[source]
classmethod validate_cmd(v: List[str]) List[str][source]
classmethod validate_pwd(v: Optional[str]) Optional[str][source]
classmethod validate_cmd_version(v: Optional[str]) Optional[str][source]
model_dump(**kwargs) dict[source]

Backwards compatible with _asdict()

classmethod field_names() List[str][source]

Get all field names of the model

classmethod guess_ts(dict_info: dict) tuple[datetime.datetime, datetime.datetime][source]

Guess timestamps from dictionary info

classmethod from_json(json_info: dict, guess: bool = False) archivebox.index.schema.ArchiveResult[source]

Create instance from JSON data

to_dict(*keys: str) dict[source]

Convert to dictionary, optionally filtering by keys

to_json(indent: int = 4, sort_keys: bool = True) str[source]

Convert to JSON string

to_csv(cols: Optional[List[str]] = None, separator: str = ',', ljust: int = 0) str[source]

Convert to CSV string

duration() int[source]

Calculate duration in seconds between start and end timestamps

Bases: pydantic.BaseModel

model_config[source]

β€˜ConfigDict(…)’

TYPE: str[source]

β€˜index.schema.Link’

timestamp: str[source]

None

url: str[source]

None

title: str | None[source]

None

tags: str | None[source]

None

sources: list[str][source]

β€˜Field(…)’

history: dict[str, list[archivebox.index.schema.ArchiveResult]][source]

β€˜Field(…)’

downloaded_at: datetime.datetime | None[source]

None

_field_names: ClassVar[list[str] | None][source]

None

__str__() str[source]
__eq__(other: Any) bool[source]
__gt__(other: Any) bool[source]
classmethod validate_timestamp(v: str) str[source]
classmethod validate_url(v: str) str[source]
classmethod validate_title(v: Optional[str]) Optional[str][source]
classmethod validate_sources(v: List[str]) List[str][source]
_asdict(extended: bool = False) dict[source]
overwrite(**kwargs) archivebox.index.schema.Link[source]

Pure functional version of dict.update that returns a new instance

classmethod field_names() list[str][source]
classmethod from_json(json_info: dict, guess: bool = False) archivebox.index.schema.Link[source]
to_json(indent: int = 4, sort_keys: bool = True) str[source]
to_csv(cols: Optional[List[str]] = None, separator: str = ',', ljust: int = 0) str[source]
property archive_path: str[source]
bookmarked_date() Optional[str][source]
downloaded_datestr() Optional[str][source]
property archive_dates: list[datetime.datetime][source]
property oldest_archive_date: Optional[datetime.datetime][source]
property newest_archive_date: Optional[datetime.datetime][source]
property num_outputs: int[source]
property num_failures: int[source]
latest_outputs(status: Optional[str] = None) dict[str, Any][source]

Get the latest output that each archive method produced for link

canonical_outputs() Dict[str, Optional[str]][source]

Predict the expected output paths that should be present after archiving

property url_hash: str[source]
property scheme: str[source]
property domain: str[source]
property path: str[source]
property basename: str[source]
property extension: str[source]
property base_url: str[source]
property is_static: bool[source]
property is_archived: bool[source]
as_snapshot()[source]

Implement this based on your Django model requirements

static _ts_to_date_str(dt: Optional[datetime.datetime]) Optional[str][source]
static _parse_date(date_str: Optional[str]) Optional[datetime.datetime][source]