archivebox.crawls.admin

Module Contents

Classes

MaxDepthListFilter

URLFiltersField

CrawlAdminForm

Custom form for Crawl admin to render urls field as textarea.

CrawlAdmin

CrawlScheduleAdmin

Functions

render_snapshots_list

Render a nice inline list view of snapshots with status, title, URL, and progress.

register_admin

API

class archivebox.crawls.admin.MaxDepthListFilter[source]

Bases: django.contrib.admin.SimpleListFilter

title[source]

‘max depth’

parameter_name[source]

‘max_depth’

lookups(request, model_admin)[source]
queryset(request, queryset)[source]
archivebox.crawls.admin.render_snapshots_list(snapshots_qs, request=None, crawl=None, page_size=50, prefix='snapshots')[source]

Render a nice inline list view of snapshots with status, title, URL, and progress.

class archivebox.crawls.admin.URLFiltersField[source]

Bases: django.forms.Field

widget[source]

‘URLFiltersWidget(…)’

to_python(value)[source]
class archivebox.crawls.admin.CrawlAdminForm(*args, **kwargs)[source]

Bases: django.forms.ModelForm

Custom form for Crawl admin to render urls field as textarea.

Initialization

tags_editor[source]

‘CharField(…)’

url_filters[source]

‘URLFiltersField(…)’

class Meta[source]
model[source]

None

fields[source]

all

widgets[source]

None

static extract_url_line(line)[source]
static regex_escape(text)[source]
classmethod generated_host_allowlist(urls)[source]
static subpath_prefix(pathname)[source]
static parsed_host_and_port(parsed)[source]
classmethod generated_subpath_allowlist(urls)[source]
classmethod derive_filter_toggles(urls, allowlist)[source]
static effective_only_new(crawl=None)[source]
static inherited_only_new(crawl)[source]
clean_tags_editor()[source]
clean_url_filters()[source]
save(commit=True)[source]
class archivebox.crawls.admin.CrawlAdmin(model, admin_site)[source]

Bases: archivebox.base_models.admin.ConfigEditorMixin, archivebox.base_models.admin.BaseModelAdmin

form[source]

None

change_form_template[source]

‘admin/crawls/crawl/change_form.html’

()

paginator[source]

None

show_full_result_count[source]

False

list_display[source]

(‘short_id’, ‘permissions_badge’, ‘created_at’, ‘owner’, ‘depth’, ‘status_with_stop_reason’, ‘pause_…

sort_fields[source]

(‘id’, ‘created_at’, ‘created_by’, ‘max_depth’, ‘label’, ‘notes’, ‘schedule_str’, ‘status’, ‘retry_a…

search_fields[source]

(‘id’, ‘created_by__username’, ‘max_depth’, ‘label’, ‘notes’, ‘schedule_id’, ‘status’, ‘urls’)

readonly_fields[source]

(‘created_at’, ‘modified_at’, ‘stop_reason_display’)

fieldsets[source]

((‘URLs’,), (‘Overview’,), (‘Config’,))

add_fieldsets[source]

((‘URLs’,), (‘Overview’,), (‘Config’,))

list_filter[source]

()

ordering[source]

[‘-created_at’, ‘-retry_at’]

list_per_page[source]

50

actions[source]

[‘pause_selected_crawls’, ‘resume_selected_crawls’, ‘seal_selected_crawls’, ‘delete_selected_batched…

change_actions[source]

[‘recrawl’]

class Media[source]
css[source]

None

js[source]

(‘admin/crawls/crawl_admin.js’,)

changelist_view(request, extra_context=None)[source]
should_annotate_snapshot_counts(request)[source]
hydrate_visible_snapshot_counts(crawls)[source]
get_queryset(request)[source]

Keep joins page-local while computing per-row snapshot counts in the page query.

change_view(request, object_id, form_url='', extra_context=None)[source]
add_view(request, form_url='', extra_context=None)[source]
get_fieldsets(request, obj=None)[source]
get_urls()[source]
get_actions(request)[source]
delete_selected_batched(request, queryset)[source]

Delete crawls in a single transaction to avoid SQLite concurrency issues.

pause_selected_crawls(request, queryset)[source]
resume_selected_crawls(request, queryset)[source]
seal_selected_crawls(request, queryset)[source]
set_crawl_permissions(request, queryset)[source]
update_crawl_permissions(queryset, permissions)[source]
recrawl(request, obj)[source]

Duplicate this crawl as a new crawl with the same URLs and settings.

stop_reason_display(obj)[source]
stop_reason_for_crawl(obj)[source]
limit_config_for_crawl(obj, output_dir)[source]
status_with_stop_reason(obj)[source]
short_id(obj)[source]
owner(obj)[source]
depth(obj)[source]
permissions_badge(obj)[source]
pause_resume_control(obj)[source]
num_archived_snapshots(obj)[source]
num_total_snapshots(obj)[source]
snapshots_changelist(obj)[source]
delete_snapshot_view(request: django.http.HttpRequest, object_id: str, snapshot_id: str)[source]
exclude_domain_view(request: django.http.HttpRequest, object_id: str, snapshot_id: str)[source]
set_permissions_view(request: django.http.HttpRequest, object_id: str)[source]
schedule_str(obj)[source]
urls_preview(obj)[source]
urls_editor(obj)[source]

Editor for crawl URLs.

class archivebox.crawls.admin.CrawlScheduleAdmin[source]

Bases: archivebox.base_models.admin.BaseModelAdmin

list_display[source]

(‘id’, ‘created_at’, ‘created_by’, ‘label’, ‘notes’, ‘template_str’, ‘crawls’, ‘num_crawls’, ‘num_sn…

sort_fields[source]

(‘id’, ‘created_at’, ‘created_by’, ‘label’, ‘notes’, ‘template_str’)

search_fields[source]

(‘id’, ‘created_by__username’, ‘label’, ‘notes’, ‘schedule_id’, ‘template_id’, ‘template__urls’)

readonly_fields[source]

(‘created_at’, ‘modified_at’, ‘crawls’, ‘snapshots’)

fieldsets[source]

((‘Schedule Info’,), (‘Configuration’,), (‘Metadata’,), (‘Crawls’,), (‘Snapshots’,))

list_filter[source]

(‘created_by’,)

ordering[source]

[‘-created_at’]

list_per_page[source]

100

actions[source]

[‘delete_selected’]

get_queryset(request)[source]
change_view(request, object_id, form_url='', extra_context=None)[source]
add_view(request, form_url='', extra_context=None)[source]
get_fieldsets(request, obj=None)[source]
save_model(request, obj, form, change)[source]
template_str(obj)[source]
num_crawls(obj)[source]
num_snapshots(obj)[source]
crawls(obj)[source]
snapshots(obj)[source]
archivebox.crawls.admin.register_admin(admin_site)[source]