pip
archivebox setup
ONLY_NEW
OVERWRITE
TIMEOUT
MAX_URL_ATTEMPTS
RESOLUTION
CHECK_SSL_VALIDITY
USER_AGENT
COOKIES_FILE
DEFAULT_PERSONA
URL_DENYLIST
URL_ALLOWLIST
SAVE_ALLOWLIST
SAVE_DENYLIST
TAG_SEPARATOR_PATTERN
ADMIN_USERNAME
ADMIN_PASSWORD
PUBLIC_INDEX
PUBLIC_SNAPSHOTS
PUBLIC_ADD_VIEW
SECRET_KEY
BIND_ADDR
LISTEN_HOST
ALLOWED_HOSTS
CSRF_TRUSTED_ORIGINS
ADMIN_BASE_URL
ARCHIVE_BASE_URL
SNAPSHOTS_PER_PAGE
PREVIEW_ORIGINALS
FOOTER_INFO
CUSTOM_TEMPLATES_DIR
REVERSE_PROXY_USER_HEADER
REVERSE_PROXY_WHITELIST
LOGOUT_REDIRECT_URL
LDAP_ENABLED
LDAP_SERVER_URI
LDAP_BIND_DN
LDAP_BIND_PASSWORD
LDAP_USER_BASE
LDAP_USER_FILTER
LDAP_USERNAME_ATTR
LDAP_FIRSTNAME_ATTR
LDAP_LASTNAME_ATTR
LDAP_EMAIL_ATTR
LDAP_CREATE_SUPERUSER
OUTPUT_PERMISSIONS
PUID
PGID
RESTRICT_FILE_NAMES
ENFORCE_ATOMIC_WRITES
TMP_DIR
LIB_DIR
LIB_BIN_DIR
USE_INDEXING_BACKEND
USE_SEARCHING_BACKEND
SEARCH_BACKEND_ENGINE
SEARCH_PROCESS_HTML
DEBUG
IS_TTY
USE_COLOR
SHOW_PROGRESS
IN_DOCKER
IN_QEMU
TITLE_ENABLED
TITLE_TIMEOUT
FAVICON_ENABLED
FAVICON_TIMEOUT
FAVICON_USER_AGENT
WGET_ARGS
WGET_ARGS_EXTRA
WGET_BINARY
WGET_CHECK_SSL_VALIDITY
WGET_COOKIES_FILE
WGET_ENABLED
WGET_TIMEOUT
WGET_USER_AGENT
WGET_WARC_ENABLED
SCREENSHOT_ENABLED
SCREENSHOT_RESOLUTION
SCREENSHOT_TIMEOUT
PDF_ENABLED
PDF_RESOLUTION
PDF_TIMEOUT
DOM_ENABLED
DOM_TIMEOUT
SINGLEFILE_ARGS
SINGLEFILE_ARGS_EXTRA
SINGLEFILE_BINARY
SINGLEFILE_CHECK_SSL_VALIDITY
SINGLEFILE_CHROME_ARGS
SINGLEFILE_COOKIES_FILE
SINGLEFILE_ENABLED
SINGLEFILE_TIMEOUT
SINGLEFILE_USER_AGENT
READABILITY_ARGS
READABILITY_ARGS_EXTRA
READABILITY_BINARY
READABILITY_ENABLED
READABILITY_TIMEOUT
MERCURY_ARGS
MERCURY_ARGS_EXTRA
MERCURY_BINARY
MERCURY_ENABLED
MERCURY_TIMEOUT
DEFUDDLE_ARGS
DEFUDDLE_ARGS_EXTRA
DEFUDDLE_BINARY
DEFUDDLE_ENABLED
DEFUDDLE_TIMEOUT
HTMLTOTEXT_ENABLED
HTMLTOTEXT_TIMEOUT
TRAFILATURA_BINARY
TRAFILATURA_ENABLED
TRAFILATURA_OUTPUT_CSV
TRAFILATURA_OUTPUT_HTML
TRAFILATURA_OUTPUT_JSON
TRAFILATURA_OUTPUT_MARKDOWN
TRAFILATURA_OUTPUT_TXT
TRAFILATURA_OUTPUT_XML
TRAFILATURA_OUTPUT_XMLTEI
TRAFILATURA_TIMEOUT
GIT_ARGS
GIT_ARGS_EXTRA
GIT_BINARY
GIT_DOMAINS
GIT_ENABLED
GIT_TIMEOUT
YTDLP_ARGS
YTDLP_ARGS_EXTRA
YTDLP_BINARY
YTDLP_CHECK_SSL_VALIDITY
YTDLP_COOKIES_FILE
YTDLP_ENABLED
YTDLP_MAX_SIZE
YTDLP_TIMEOUT
GALLERYDL_ARGS
GALLERYDL_ARGS_EXTRA
GALLERYDL_BINARY
GALLERYDL_CHECK_SSL_VALIDITY
GALLERYDL_COOKIES_FILE
GALLERYDL_ENABLED
GALLERYDL_TIMEOUT
FORUMDL_ARGS
FORUMDL_ARGS_EXTRA
FORUMDL_BINARY
FORUMDL_ENABLED
FORUMDL_OUTPUT_FORMAT
FORUMDL_TIMEOUT
PAPERSDL_ARGS
PAPERSDL_ARGS_EXTRA
PAPERSDL_BINARY
PAPERSDL_ENABLED
PAPERSDL_TIMEOUT
ARCHIVEDOTORG_ENABLED
ARCHIVEDOTORG_TIMEOUT
ARCHIVEDOTORG_USER_AGENT
CHROME_ARGS
CHROME_ARGS_EXTRA
CHROME_BINARY
CHROME_CHECK_SSL_VALIDITY
CHROME_DELAY_AFTER_LOAD
CHROME_ENABLED
CHROME_HEADLESS
CHROME_PAGELOAD_TIMEOUT
CHROME_RESOLUTION
CHROME_SANDBOX
CHROME_TIMEOUT
CHROME_USER_AGENT
CHROME_USER_DATA_DIR
CHROME_WAIT_FOR
DNS_ENABLED
DNS_TIMEOUT
SSL_ENABLED
SSL_TIMEOUT
HEADERS_ENABLED
HEADERS_TIMEOUT
REDIRECTS_ENABLED
REDIRECTS_TIMEOUT
RESPONSES_ENABLED
RESPONSES_TIMEOUT
CONSOLELOG_ENABLED
CONSOLELOG_TIMEOUT
ACCESSIBILITY_ENABLED
ACCESSIBILITY_TIMEOUT
SEO_ENABLED
SEO_TIMEOUT
HASHES_ENABLED
HASHES_TIMEOUT
STATICFILE_ENABLED
STATICFILE_TIMEOUT
UBLOCK_ENABLED
ISTILLDONTCAREABOUTCOOKIES_ENABLED
TWOCAPTCHA_API_KEY
TWOCAPTCHA_AUTO_SUBMIT
TWOCAPTCHA_ENABLED
TWOCAPTCHA_RETRY_COUNT
TWOCAPTCHA_RETRY_DELAY
TWOCAPTCHA_TIMEOUT
MODALCLOSER_ENABLED
MODALCLOSER_POLL_INTERVAL
MODALCLOSER_TIMEOUT
INFINISCROLL_ENABLED
INFINISCROLL_EXPAND_DETAILS
INFINISCROLL_MIN_HEIGHT
INFINISCROLL_SCROLL_DELAY
INFINISCROLL_SCROLL_DISTANCE
INFINISCROLL_SCROLL_LIMIT
INFINISCROLL_TIMEOUT
PARSE_DOM_OUTLINKS_ENABLED
PARSE_DOM_OUTLINKS_TIMEOUT
PARSE_HTML_URLS_ENABLED
PARSE_JSONL_URLS_ENABLED
PARSE_NETSCAPE_URLS_ENABLED
PARSE_TXT_URLS_ENABLED
PARSE_RSS_URLS_ENABLED
ANTHROPIC_API_KEY
CLAUDECODE_BINARY
CLAUDECODE_ENABLED
CLAUDECODE_MAX_TURNS
CLAUDECODE_MODEL
CLAUDECODE_TIMEOUT
CLAUDECHROME_ENABLED
CLAUDECHROME_MAX_ACTIONS
CLAUDECHROME_MODEL
CLAUDECHROME_PROMPT
CLAUDECHROME_TIMEOUT
CLAUDECODEEXTRACT_ENABLED
CLAUDECODEEXTRACT_MAX_TURNS
CLAUDECODEEXTRACT_MODEL
CLAUDECODEEXTRACT_PROMPT
CLAUDECODEEXTRACT_TIMEOUT
CLAUDECODECLEANUP_ENABLED
CLAUDECODECLEANUP_MAX_TURNS
CLAUDECODECLEANUP_MODEL
CLAUDECODECLEANUP_PROMPT
CLAUDECODECLEANUP_TIMEOUT
RIPGREP_ARGS
RIPGREP_ARGS_EXTRA
RIPGREP_BINARY
RIPGREP_TIMEOUT
SEARCH_BACKEND_SONIC_BUCKET
SEARCH_BACKEND_SONIC_COLLECTION
SEARCH_BACKEND_SONIC_HOST_NAME
SEARCH_BACKEND_SONIC_PASSWORD
SEARCH_BACKEND_SONIC_PORT
SEARCH_BACKEND_SQLITE_DB
SEARCH_BACKEND_SQLITE_SEPARATE_DATABASE
SEARCH_BACKEND_SQLITE_TOKENIZERS
[Default, recommended for most people]
[Advanced users only]
EXT4
APFS
ZFS
NTFS
HFS+
BTRFS
EXT2
EXT3
FAT32
exFAT
NFS
SMB
Ceph
ripgrep
ripgrep-all
rga
ugrep
sonic
SQLite FTS5
Crawl
Snapshot
ArchiveResult
archivebox
archivebox.config
archivebox.misc
archivebox.search
archivebox.cli
archivebox.api
archivebox.base_models
archivebox.services
archivebox.ldap
archivebox.mcp
archivebox.crawls
archivebox.personas
archivebox.core
archivebox.ideas
archivebox.workers
archivebox.machine
archivebox.manage
archivebox.__main__
archivebox.hooks
archivebox.uuid_compat
v0.7: Schema improvements
v0.8: Security
v0.9: Performance
v1.0: Full headless browser control
v2.0 Federated or distributed archiving + paid hosted service offering
archivebox.core.admin
register_admin