Source code for archivebox.search.backends.ripgrep

import re
from subprocess import run, PIPE
from typing import List, Generator

from archivebox.config import ARCHIVE_DIR, RIPGREP_VERSION, SEARCH_BACKEND_TIMEOUT
from archivebox.util import enforce_types

RG_IGNORE_EXTENSIONS = ('css','js','orig','svg')

RG_ADD_TYPE = '--type-add'
RG_IGNORE_ARGUMENTS = f"ignore:*.{{{','.join(RG_IGNORE_EXTENSIONS)}}}"
RG_DEFAULT_ARGUMENTS = "-ilTignore" # Case insensitive(i), matching files results(l)
RG_REGEX_ARGUMENT = '-e'

TIMESTAMP_REGEX = r'\/([\d]+\.[\d]+)\/'

ts_regex =  re.compile(TIMESTAMP_REGEX)

[docs] @enforce_types def index(snapshot_id: str, texts: List[str]): return
[docs] @enforce_types def flush(snapshot_ids: Generator[str, None, None]): return