diff --git a/cyberdrop_dl/appdata.py b/cyberdrop_dl/appdata.py new file mode 100644 index 000000000..462f8c1e0 --- /dev/null +++ b/cyberdrop_dl/appdata.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +import dataclasses +from contextvars import ContextVar +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pathlib import Path + +_appdata: ContextVar[AppData] = ContextVar("_appdata") + + +@dataclasses.dataclass(slots=True) +class AppData: + path: Path + cookies_dir: Path = dataclasses.field(init=False) + cache_file: Path = dataclasses.field(init=False) + default_config: Path = dataclasses.field(init=False) + db_file: Path = dataclasses.field(init=False) + + def __post_init__(self) -> None: + self.cookies_dir = self.path / "cookies" + self.cache_file = self.path / "cache.yaml" + self.default_config = self.path / "config.yaml" + self.db_file = self.path / "cyberdrop.db" + + def __fspath__(self) -> str: + return str(self) + + def __str__(self) -> str: + return str(self.path) + + def mkdirs(self) -> None: + for dir in (self.cookies_dir,): + dir.mkdir(parents=True, exist_ok=True) + + +def get() -> AppData: + return _appdata.get() diff --git a/cyberdrop_dl/cache.py b/cyberdrop_dl/cache.py new file mode 100644 index 000000000..d3b1b50b6 --- /dev/null +++ b/cyberdrop_dl/cache.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import dataclasses +from collections.abc import Iterator, MutableMapping +from contextvars import ContextVar, Token +from typing import TYPE_CHECKING, Any, Self + +from cyberdrop_dl import __version__ +from cyberdrop_dl.utils import yaml + +if TYPE_CHECKING: + from pathlib import Path + +_cache: ContextVar[Cache] = ContextVar("_cache") + + +@dataclasses.dataclass(slots=True) +class Cache(MutableMapping[str, Any]): + file: Path + _cache: dict[str, Any] = dataclasses.field(init=False) + _token: Token[Cache] | None = None + + def __post_init__(self) -> None: + self._cache = yaml.load(self.file) + + def __getitem__(self, key: str) -> Any: + return self.get(key) + + def __iter__(self) -> Iterator[str]: + return iter(self._cache) + + def __len__(self) -> int: + return len(self._cache) + + def __delitem__(self, key: str) -> None: + try: + _ = self._cache.pop(key) + except KeyError: + pass + else: + self._save() + + def __setitem__(self, key: str, value: Any, /) -> None: + self._cache[key] = value + self._save() + + def __enter__(self) -> Self: + self._token = _cache.set(self) + return self + + def __exit__(self, *_) -> None: + assert self._token is not None + self._token = _cache.reset(self._token) + self.close() + + def _save(self) -> None: + if self._token is None: + yaml.save(self.file, self._cache) + + def close(self) -> None: + self["version"] = __version__ + + +def get(): + return _cache.get() diff --git a/cyberdrop_dl/cli/__init__.py b/cyberdrop_dl/cli/__init__.py index d7d1af189..6455b6d3b 100644 --- a/cyberdrop_dl/cli/__init__.py +++ b/cyberdrop_dl/cli/__init__.py @@ -1,21 +1,14 @@ -from __future__ import annotations +import shutil +from typing import Annotated, Literal -import dataclasses -import sys -from argparse import SUPPRESS, ArgumentParser, RawDescriptionHelpFormatter -from shutil import get_terminal_size -from typing import TYPE_CHECKING, Any, Final, NoReturn +import cyclopts +import pydantic +from cyclopts import Parameter -from pydantic import BaseModel, ValidationError - -from cyberdrop_dl import __version__, env -from cyberdrop_dl.cli import arguments -from cyberdrop_dl.cli.model import CLIargs, ParsedArgs -from cyberdrop_dl.config import ConfigSettings, GlobalSettings - -if TYPE_CHECKING: - from argparse import _ArgumentGroup as ArgGroup # pyright: ignore[reportPrivateUsage] - from collections.abc import Sequence +from cyberdrop_dl import __version__, env, signature +from cyberdrop_dl.cli.model import CLIargs, ParsedArgs, RetryArgs +from cyberdrop_dl.models.types import HttpURL +from cyberdrop_dl.utils.yaml import format_validation_error def is_terminal_in_portrait() -> bool: @@ -24,7 +17,7 @@ def is_terminal_in_portrait() -> bool: if env.PORTRAIT_MODE: return True - terminal_size = get_terminal_size() + terminal_size = shutil.get_terminal_size() width, height = terminal_size.columns, terminal_size.lines aspect_ratio = width / height @@ -33,139 +26,65 @@ def is_terminal_in_portrait() -> bool: return False # Check for mobile device in portrait mode - if (aspect_ratio < 1.5 and height >= 40) or (width <= 85 and aspect_ratio < 2.3): + if (aspect_ratio < 1.5 and height >= 40) or (aspect_ratio < 2.3 and width <= 85): return True # Assume landscape mode for other cases return False -class CustomHelpFormatter(RawDescriptionHelpFormatter): - MAX_HELP_POS: Final = 80 - INDENT_INCREMENT: Final = 2 - - def __init__(self, prog: str, width: int | None = None) -> None: - super().__init__(prog, self.INDENT_INCREMENT, self.MAX_HELP_POS, width) - - def _get_help_string(self, action) -> str | None: - if action.help: - return action.help.replace("program's", "CDL") # The ' messes up the markdown formatting - return action.help - - -@dataclasses.dataclass(slots=True) -class CLIParser: - parser: ArgumentParser - groups: dict[str, list[ArgGroup]] - - def parse_args(self, args: Sequence[str] | None = None) -> dict[str, dict[str, Any]]: - return self._unflatten(self._parse_args(args)) - - def _parse_args(self, args: Sequence[str] | None = None) -> dict[str, Any]: - return dict(sorted(vars(self.parser.parse_intermixed_args(args)).items())) - - def _unflatten(self, namespace: dict[str, Any]) -> dict[str, dict[str, Any]]: - parsed_args: dict[str, dict[str, Any]] = {} - - for name, groups in self.groups.items(): - parsed_args[name] = {} - for group in groups: - group_dict = {arg.dest: v for arg in group._group_actions if (v := namespace.get(arg.dest)) is not None} - if group_dict: - assert group.title - parsed_args[name][group.title] = _unflatten_nested_args(group_dict) - - parsed_args["cli_only_args"] = parsed_args["cli_only_args"]["CLI-only options"] - return parsed_args - - -def make_parser() -> CLIParser: - kwargs: dict[str, Any] = {"color": True} if sys.version_info > (3, 14) else {} - parser = ArgumentParser( - description="Bulk asynchronous downloader for multiple file hosts", - usage="cyberdrop-dl [OPTIONS] URL [URL...]", - allow_abbrev=False, - formatter_class=CustomHelpFormatter, - **kwargs, - ) - _ = parser.add_argument("-V", "--version", action="version", version=f"%(prog)s {__version__}") - - cli_only = parser.add_argument_group("CLI-only options") - _add_args_from_model(cli_only, CLIargs) - - groups = { - "config_settings": _create_groups_from_nested_models(parser, ConfigSettings), - "global_settings": _create_groups_from_nested_models(parser, GlobalSettings), - "cli_only_args": [cli_only], - } - - return CLIParser(parser, groups) - - -def parse_args(args: Sequence[str] | None = None) -> ParsedArgs: - """Parses the command line arguments passed into the program.""" - from cyberdrop_dl.utils.yaml import handle_validation_error - - parsed_args = make_parser().parse_args(args) - try: - model = ParsedArgs.model_validate(parsed_args, extra="forbid") - - except ValidationError as e: - handle_validation_error(e, title="CLI arguments") - sys.exit(1) - - if model.cli_only_args.show_supported_sites: - show_supported_sites() - - return model - - -def show_supported_sites() -> NoReturn: - from rich import print - +class App(cyclopts.App): + @signature.copy(cyclopts.App._parse_known_args) + def _parse_known_args(self, *args, **kwargs): + try: + return super()._parse_known_args(*args, **kwargs) + except cyclopts.ValidationError as e: + if isinstance(e.__cause__, pydantic.ValidationError): + e.exception_message = format_validation_error(e.__cause__, title="CLI arguments") + raise + + +app = App( + name="cyberdrop-dl", + help="Bulk asynchronous downloader for multiple file hosts", + version=f"{__version__}.NTFS", + default_parameter=Parameter(negative_iterable=[]), +) + + +@app.command() +def download( + links: Annotated[ + list[HttpURL] | None, + Parameter( + name="links", + negative=[], + help="link(s) to content to download", + ), + ] = None, + /, + *, + cli_args: CLIargs = CLIargs(), # noqa: B008 # pyright: ignore[reportCallInDefaultInitializer] + parsed_settings: ParsedArgs = ParsedArgs(), # pyright: ignore[reportCallInDefaultInitializer] # noqa: B008 +): + """Scrape and download files from a list of URLs (from a file or stdin)""" + return links, cli_args, parsed_settings + + +@app.command() +def show_supported_sites() -> None: + """Show a list of all supported sites""" from cyberdrop_dl.utils.markdown import get_crawlers_info_as_rich_table table = get_crawlers_info_as_rich_table() - print(table) - sys.exit(0) - - -def _unflatten_nested_args(data: dict[str, Any]) -> dict[str, Any]: - result: dict[str, Any] = {} - - for command_name, value in data.items(): - inner_names = command_name.split(".") - current_level = result - for index, key in enumerate(inner_names): - if index < len(inner_names) - 1: - if key not in current_level: - current_level[key] = {} - current_level = current_level[key] - else: - current_level[key] = value - return result - - -def _add_args_from_model(parser: ArgumentParser | ArgGroup, model: type[BaseModel]) -> None: - cli_args = model is CLIargs - - for arg in arguments.parse(model): - options = arg.compose_options() - - if cli_args and arg.arg_type is bool and not (arg.cli_name == "portrait" and env.RUNNING_IN_TERMUX): - default = arg.default if cli_args else SUPPRESS - options["action"] = "store_false" if default else "store_true" + app.console.print(table) - _ = parser.add_argument(*arg.name_or_flags, **options) +@app.command() +def retry(choice: Literal["all", "failed", "maintenance"], /, *, retry: RetryArgs | None = None): + """Retry failed downloads""" + return choice, retry or RetryArgs() -def _create_groups_from_nested_models(parser: ArgumentParser, model: type[BaseModel]) -> list[ArgGroup]: - groups: list[ArgGroup] = [] - for name, field in model.model_fields.items(): - submodel = field.annotation - assert submodel and issubclass(submodel, BaseModel) - submodel_group = parser.add_argument_group(name) - _add_args_from_model(submodel_group, submodel) - groups.append(submodel_group) - return groups +if __name__ == "__main__": + app() diff --git a/cyberdrop_dl/cli/arguments.py b/cyberdrop_dl/cli/arguments.py deleted file mode 100644 index 85536a4ee..000000000 --- a/cyberdrop_dl/cli/arguments.py +++ /dev/null @@ -1,122 +0,0 @@ -import dataclasses -from argparse import BooleanOptionalAction -from collections.abc import Generator, Iterable -from typing import Any, Literal, TypedDict - -from pydantic import BaseModel - -_NOT_SET: Any = object() - - -class _ArgumentParams(TypedDict, total=False): - action: str - nargs: int | str | None - const: Any - default: Any - choices: Iterable[Any] | None - required: bool - help: str | None - metavar: str | tuple[str, ...] | None - dest: str | None - - -@dataclasses.dataclass(slots=True, frozen=True, kw_only=True) -class ArgumentParams: - positional_only: bool = dataclasses.field(default=False, metadata={"exclude": True}) - nargs: Literal["?", "*", "+"] | None = _NOT_SET - const: Any = _NOT_SET - dest: str = _NOT_SET - choices: Iterable[Any] | None = _NOT_SET - metavar: str | tuple[str, ...] | None = _NOT_SET - - def as_dict(self) -> _ArgumentParams: - return {name: v for name in _params if (v := getattr(self, name)) is not _NOT_SET} # pyright: ignore[reportReturnType] - - -_params = tuple(f.name for f in dataclasses.fields(ArgumentParams) if not f.metadata.get("exclude")) - - -@dataclasses.dataclass(slots=True, kw_only=True) -class Argument: - name_or_flags: list[str] = dataclasses.field(init=False) - python_name: str - cli_name: str = dataclasses.field(init=False) - aliases: tuple[str, ...] - required: bool - default: Any - annotation: Any - help: str | None - metadata: list[Any] - positional_only: bool = dataclasses.field(init=False) - arg_type: type = dataclasses.field(init=False) - - def __post_init__(self) -> None: - self.cli_name = self.python_name.replace("_", "-") - self.arg_type = type(self.default) - - if self.arg_type not in (list, set, bool): - self.arg_type = str - - self.positional_only = override.positional_only if (override := self._overrides()) else False - cli_command = f"{'' if self.positional_only else '--'}{self.cli_name}" - self.name_or_flags = [cli_command] - - for alias in self.aliases: - if alias and len(alias) == 1: - self.name_or_flags.insert(0, f"-{alias}") - else: - self.name_or_flags.append(alias) - - def compose_options(self) -> _ArgumentParams: - options = self._options() - if override := self._overrides(): - return options | override.as_dict() - - return options - - def _overrides(self) -> ArgumentParams | None: - for meta in self.metadata: - if isinstance(meta, ArgumentParams): - return meta - - def _options(self) -> _ArgumentParams: - options = dict( # noqa: C408 - default=self.default, - help=self.help, - action="store", - ) - if not self.positional_only: - options["dest"] = self.python_name - - if self.arg_type is bool: - options["action"] = BooleanOptionalAction - - elif self.arg_type in (list, set): - options.update(nargs="*", action="extend") - - else: - options["type"] = self.arg_type - - return options # pyright: ignore[reportReturnType] - - -def parse(model: type[BaseModel]) -> Generator[Argument]: - for python_name, field in model.model_fields.items(): - aliases = filter( - None, - ( - field.alias, - field.validation_alias, - field.serialization_alias, - ), - ) - - yield Argument( - python_name=python_name, - aliases=tuple(map(str, aliases)), - annotation=field.annotation, - default=field.default, - required=field.is_required(), - metadata=field.metadata, - help=field.description or None, - ) diff --git a/cyberdrop_dl/cli/model.py b/cyberdrop_dl/cli/model.py index 59a1ad8b5..22d70cfb5 100644 --- a/cyberdrop_dl/cli/model.py +++ b/cyberdrop_dl/cli/model.py @@ -1,14 +1,13 @@ import datetime -from collections.abc import Iterable from enum import StrEnum, auto from pathlib import Path -from typing import Annotated, Any, Literal, Self +from typing import Literal -from pydantic import BaseModel, Field, computed_field, field_validator, model_validator +from cyclopts import Parameter +from pydantic import Field -from cyberdrop_dl.cli.arguments import ArgumentParams -from cyberdrop_dl.config import ConfigSettings, GlobalSettings -from cyberdrop_dl.models.types import HttpURL +from cyberdrop_dl.config import Config +from cyberdrop_dl.models import Settings class UIOptions(StrEnum): @@ -18,45 +17,19 @@ class UIOptions(StrEnum): FULLSCREEN = auto() -class CLIargs(BaseModel): - links: Annotated[ - list[HttpURL], - ArgumentParams(positional_only=True, metavar="LINK(s)"), - ] = Field( - default=[], - description="link(s) to content to download (passing multiple links is supported)", - ) +@Parameter(name="*", negative_bool=[]) +class CLIargs(Settings): appdata_folder: Path | None = Field( default=None, description="AppData folder path", ) - completed_after: datetime.date | None = Field( - default=None, - description="only retry downloads that were completed on or after this date", - ) - completed_before: datetime.date | None = Field( - default=None, - description="only retry downloads that were completed on or before this date", - ) config_file: Path | None = Field( default=None, description="path to the CDL settings.yaml file to load", ) - download: bool = Field( - default=False, - description="skips UI, start download immediately", - ) - download_tiktok_audios: bool = Field( - default=False, - description="download TikTok audios from posts and save them as separate files", - ) - download_tiktok_src_quality_videos: bool = Field( - default=False, - description="download TikTok videos in source quality", - ) - impersonate: Annotated[ + impersonate: ( Literal[ "chrome", "edge", @@ -65,17 +38,12 @@ class CLIargs(BaseModel): "chrome_android", "firefox", ] - | bool - | None, - ArgumentParams(nargs="?", const=True), - ] = Field( + | None + ) = Field( default=None, description="Use this target as impersonation for all scrape requests", ) - max_items_retry: int = Field( - default=0, - description="max number of links to retry", - ) + portrait: bool = Field( default=False, description="force CDL to run with a vertical layout", @@ -84,70 +52,25 @@ class CLIargs(BaseModel): default=True, description="show stats report at the end of a run", ) - retry_all: bool = Field( - default=False, - description="retry all downloads", - ) - retry_failed: bool = Field( - default=False, - description="retry failed downloads", - ) - retry_maintenance: bool = Field( - default=False, - description="retry download of maintenance files (bunkr). Requires files to be hashed", - ) - show_supported_sites: bool = Field( - default=False, - description="shows a list of supported sites and exits", - ) - ui: UIOptions = Field( - default=UIOptions.FULLSCREEN, - description="DISABLED, ACTIVITY, SIMPLE or FULLSCREEN", - ) - - @property - def retry_any(self) -> bool: - return any((self.retry_all, self.retry_failed, self.retry_maintenance)) - @property - def fullscreen_ui(self) -> bool: - return self.ui == UIOptions.FULLSCREEN - - @computed_field - def __computed__(self) -> dict[str, bool]: - return {"retry_any": self.retry_any, "fullscreen_ui": self.fullscreen_ui} - - @model_validator(mode="after") - def mutually_exclusive(self) -> Self: - group1 = [self.links, self.retry_all, self.retry_failed, self.retry_maintenance] - msg1 = "`--links`, '--retry-all', '--retry-maintenace' and '--retry-failed' are mutually exclusive" - _check_mutually_exclusive(group1, msg1) - return self - - @field_validator("ui", mode="before") - @classmethod - def lower(cls, value: str) -> str: - return value.lower() +@Parameter(name="*", negative_bool="") +class RetryArgs(Settings): + completed_after: datetime.date | None = Field( + default=None, + description="only retry downloads that were completed on or after this date", + ) + completed_before: datetime.date | None = Field( + default=None, + description="only retry downloads that were completed on or before this date", + ) -def _check_mutually_exclusive(group: Iterable[Any], msg: str) -> None: - if sum(1 for value in group if value) >= 2: - raise ValueError(msg) + max_items_retry: int = Field( + default=0, + description="max number of links to retry", + ) -class ParsedArgs(BaseModel): +class ParsedArgs(Settings): cli_only_args: CLIargs = CLIargs() - config_settings: ConfigSettings = ConfigSettings() - global_settings: GlobalSettings = GlobalSettings() - - def model_post_init(self, *_) -> None: - if self.cli_only_args.retry_all or self.cli_only_args.retry_maintenance: - self.config_settings.runtime_options.ignore_history = True - - if ( - not self.cli_only_args.fullscreen_ui - or self.cli_only_args.retry_any - or self.cli_only_args.config_file - or self.config_settings.sorting.sort_downloads - ): - self.cli_only_args.download = True + config: Config = Config() diff --git a/cyberdrop_dl/clients/download_client.py b/cyberdrop_dl/clients/download_client.py index 738cea3ec..ab8b1746e 100644 --- a/cyberdrop_dl/clients/download_client.py +++ b/cyberdrop_dl/clients/download_client.py @@ -2,41 +2,36 @@ import asyncio import contextlib -import itertools import time -from collections.abc import Generator from http import HTTPStatus from typing import TYPE_CHECKING, Any import aiofiles -from cyberdrop_dl import constants +from cyberdrop_dl import config, constants from cyberdrop_dl.clients.response import AbstractResponse -from cyberdrop_dl.constants import FILE_FORMATS -from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL -from cyberdrop_dl.exceptions import DDOSGuardError, DownloadError, InvalidContentTypeError, SlowDownloadError +from cyberdrop_dl.exceptions import DownloadError, InvalidContentTypeError, SlowDownloadError from cyberdrop_dl.utils import aio, dates from cyberdrop_dl.utils.aio import WeakAsyncLocks -from cyberdrop_dl.utils.logger import log, log_debug +from cyberdrop_dl.utils.logger import log from cyberdrop_dl.utils.utilities import get_size_or_none if TYPE_CHECKING: - from collections.abc import AsyncGenerator, Callable, Coroutine, Generator, Mapping + from collections.abc import AsyncGenerator, Callable, Coroutine, Mapping from pathlib import Path from typing import Any import aiohttp - from cyberdrop_dl.data_structures.url_objects import MediaItem + from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, MediaItem + from cyberdrop_dl.managers import Manager from cyberdrop_dl.managers.client_manager import ClientManager - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.progress._common import ProgressHook _CONTENT_TYPES_OVERRIDES: dict[str, str] = {"text/vnd.trolltech.linguist": "video/MP2T"} _SLOW_DOWNLOAD_PERIOD: int = 10 # seconds -_CHROME_ANDROID_USER_AGENT: str = ( - "Mozilla/5.0 (Linux; Android 16) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.7204.180 Mobile Safari/537.36" -) + _FREE_SPACE_CHECK_PERIOD: int = 5 # Check every 5 chunks _NULL_CONTEXT: contextlib.nullcontext[None] = contextlib.nullcontext() _USE_IMPERSONATION: set[str] = {"vsco", "celebforum"} @@ -48,7 +43,7 @@ class DownloadClient: def __init__(self, manager: Manager, client_manager: ClientManager) -> None: self.manager = manager self.client_manager = client_manager - self.download_speed_threshold = self.manager.config_manager.settings_data.runtime_options.slow_download_speed + self.download_speed_threshold = config.get().runtime_options.slow_download_speed self._server_locks = WeakAsyncLocks[str]() self.server_locked_domains: set[str] = set() self._supports_ranges: bool = True @@ -65,58 +60,20 @@ async def _track_errors(self, domain: str): await self.client_manager.manager.states.RUNNING.wait() yield - def _get_download_headers(self, domain: str, referer: AbsoluteHttpURL) -> dict[str, str]: - download_headers = { - "User-Agent": self.manager.global_config.general.user_agent, - "Referer": str(referer), - } - auth_data = self.manager.config_manager.authentication_data - if domain == "pixeldrain" and auth_data.pixeldrain.api_key: - download_headers["Authorization"] = self.manager.client_manager.basic_auth( - "Cyberdrop-DL", auth_data.pixeldrain.api_key - ) - elif domain == "gofile": - gofile_cookies = self.client_manager.cookies.filter_cookies(AbsoluteHttpURL("https://gofile.io")) - api_key = gofile_cookies.get("accountToken", "") - if api_key: - download_headers["Authorization"] = f"Bearer {api_key.value}" # type: ignore - elif domain == "odnoklassniki": - # TODO: Add "headers" attribute to MediaItem to use custom headers for downloads - download_headers |= { - "Accept-Language": "en-gb, en;q=0.8", - "User-Agent": _CHROME_ANDROID_USER_AGENT, - "Referer": "https://m.ok.ru/", - "Origin": "https://m.ok.ru", - } - elif domain == "megacloud": - download_headers["Referer"] = "https://megacloud.blog/" - return download_headers - async def _download(self, domain: str, media_item: MediaItem) -> bool: - """Downloads a file.""" - download_headers = self._get_download_headers(domain, media_item.referer) - downloaded_filename = await self.manager.db_manager.history_table.get_downloaded_filename(domain, media_item) - download_dir = self.get_download_dir(media_item) - if media_item.is_segment: - media_item.partial_file = media_item.complete_file = download_dir / media_item.filename - else: - media_item.partial_file = download_dir / f"{downloaded_filename}{constants.TempExt.PART}" - resume_point = 0 - if ( - self._supports_ranges - and media_item.partial_file - and (size := await asyncio.to_thread(get_size_or_none, media_item.partial_file)) - ): + if self._supports_ranges and (size := await asyncio.to_thread(get_size_or_none, media_item.partial_file)): resume_point = size - download_headers["Range"] = f"bytes={size}-" + media_item.headers["Range"] = f"bytes={size}-" - await asyncio.sleep(self.manager.config_manager.global_settings_data.rate_limiting_options.total_delay) + await asyncio.sleep(config.get().rate_limiting_options.total_delay) def process_response(resp: aiohttp.ClientResponse | AbstractResponse): return self._process_response(media_item, domain, resume_point, resp) - return await self._request_download(media_item, download_headers, process_response) + download_url = media_item.debrid_link or media_item.url + async with self.__request_context(download_url, media_item.domain, media_item.headers) as resp: + return await process_response(resp) async def _process_response( self, @@ -128,23 +85,23 @@ async def _process_response( if resp.status == HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE: await asyncio.to_thread(media_item.partial_file.unlink) - await self.client_manager.check_http_status(resp, download=True) + _ = await self.client_manager.check_http_status(resp, download=True) if not media_item.is_segment: - _ = get_content_type(media_item.ext, resp.headers) + _ = _get_content_type(media_item.ext, resp.headers) media_item.filesize = int(resp.headers.get("Content-Length", "0")) or None if not media_item.complete_file: proceed, skip = await self.get_final_file_info(media_item, domain) self.client_manager.check_content_length(resp.headers) if skip: - self.manager.progress_manager.download_progress.add_skipped() + self.manager.progress_manager.files.add_skipped() return False if not proceed: if media_item.is_segment: return True log(f"Skipping {media_item.url} as it has already been downloaded", 10) - self.manager.progress_manager.download_progress.add_previously_completed(False) + self.manager.progress_manager.files.add_previously_completed(False) await self.process_completed(media_item, domain) await self.handle_media_item_completion(media_item, downloaded=False) @@ -153,20 +110,19 @@ async def _process_response( if resp.status != HTTPStatus.PARTIAL_CONTENT: await asyncio.to_thread(media_item.partial_file.unlink, missing_ok=True) - if not media_item.is_segment and not media_item.datetime and (last_modified := get_last_modified(resp.headers)): + if ( + not media_item.is_segment + and not media_item.timestamp + and (last_modified := _get_last_modified(resp.headers)) + ): msg = f"Unable to parse upload date for {media_item.url}, using `Last-Modified` header as file datetime" log(msg, 30) - media_item.datetime = last_modified + media_item.timestamp = last_modified - task_id = media_item.task_id - if task_id is None: - size = (media_item.filesize + resume_point) if media_item.filesize is not None else None - task_id = self.manager.progress_manager.file_progress.add_task( - domain=domain, filename=media_item.filename, expected_size=size - ) - media_item.set_task_id(task_id) + size = (media_item.filesize + resume_point) if media_item.filesize is not None else None - self.manager.progress_manager.file_progress.advance_file(task_id, resume_point) + if not media_item.is_segment: + self.manager.progress_manager.downloads.new_hook(media_item.filename, size) await self._append_content(media_item, self._get_resp_reader(resp)) return True @@ -193,62 +149,24 @@ async def __request_context( async with self.client_manager._download_session.get(url, headers=headers) as resp: yield resp - async def _request_download( - self, - media_item: MediaItem, - download_headers: dict[str, str], - process_response: Callable[[aiohttp.ClientResponse | AbstractResponse], Coroutine[None, None, bool]], - ) -> bool: - download_url = media_item.debrid_link or media_item.url - await self.manager.states.RUNNING.wait() - fallback_url_generator = _fallback_generator(media_item) - fallback_count = 0 - - while True: - resp = None - try: - async with self.__request_context(download_url, media_item.domain, download_headers) as resp: - return await process_response(resp) - except (DownloadError, DDOSGuardError): - if resp is None: - raise - try: - next_download_url = fallback_url_generator.send(resp) - except StopIteration: - pass - else: - if not next_download_url: - raise - if media_item.debrid_link and media_item.debrid_link == download_url: - msg = f" with debrid URL {download_url} failed, retrying with fallback URL: " - elif media_item.url == download_url: - msg = " failed, retrying with fallback URL: " - else: - fallback_count += 1 - msg = f" with fallback URL #{fallback_count} {download_url} failed, retrying with new fallback URL: " - log(f"Download of {media_item.url}{msg}{next_download_url}", 40) - download_url = next_download_url - continue - raise - async def _append_content(self, media_item: MediaItem, content: aiohttp.StreamReader | AbstractResponse) -> None: """Appends content to a file.""" - assert media_item.task_id is not None check_free_space = self.make_free_space_checker(media_item) - check_download_speed = self.make_speed_checker(media_item) await check_free_space() await self._pre_download_check(media_item) - async with aiofiles.open(media_item.partial_file, mode="ab") as f: - async for chunk in content.iter_chunked(self.client_manager.speed_limiter.chunk_size): - await self.manager.states.RUNNING.wait() - await check_free_space() - chunk_size = len(chunk) - await self.client_manager.speed_limiter.acquire(chunk_size) - await f.write(chunk) - self.manager.progress_manager.file_progress.advance_file(media_item.task_id, chunk_size) - check_download_speed() + with self.manager.progress_manager.downloads.current_hook as hook: + check_download_speed = self.make_speed_checker(hook) + + async with aiofiles.open(media_item.partial_file, mode="ab") as f: + async for chunk in content.iter_chunked(self.client_manager.speed_limiter.chunk_size): + await check_free_space() + chunk_size = len(chunk) + await self.client_manager.speed_limiter.acquire(chunk_size) + await f.write(chunk) + hook.advance(chunk_size) + check_download_speed() await self._post_download_check(media_item) @@ -276,29 +194,28 @@ async def check_free_space() -> None: return check_free_space - def make_speed_checker(self, media_item: MediaItem) -> Callable[[], None]: + def make_speed_checker(self, hook: ProgressHook) -> Callable[[], None]: last_slow_speed_read = None def check_download_speed() -> None: nonlocal last_slow_speed_read if not self.download_speed_threshold: return - assert media_item.task_id is not None - speed = self.manager.progress_manager.file_progress.get_speed(media_item.task_id) - if speed > self.download_speed_threshold: + + if hook.speed() > self.download_speed_threshold: last_slow_speed_read = None elif not last_slow_speed_read: last_slow_speed_read = time.perf_counter() elif time.perf_counter() - last_slow_speed_read > _SLOW_DOWNLOAD_PERIOD: - raise SlowDownloadError(origin=media_item) + raise SlowDownloadError return check_download_speed async def download_file(self, domain: str, media_item: MediaItem) -> bool: """Starts a file.""" - if self.manager.config.download_options.skip_download_mark_completed and not media_item.is_segment: + if config.get().download_options.skip_download_mark_completed and not media_item.is_segment: log(f"Download Removed {media_item.url} due to mark completed option", 10) - self.manager.progress_manager.download_progress.add_skipped() + self.manager.progress_manager.files.add_skipped() # set completed path await self.process_completed(media_item, domain) return False @@ -307,15 +224,15 @@ async def download_file(self, domain: str, media_item: MediaItem) -> bool: downloaded = await self._download(domain, media_item) if downloaded: - await asyncio.to_thread(media_item.partial_file.rename, media_item.complete_file) + _ = await asyncio.to_thread(media_item.partial_file.rename, media_item.complete_file) if not media_item.is_segment: proceed = await self.client_manager.check_file_duration(media_item) await self.manager.db_manager.history_table.add_duration(domain, media_item) if not proceed: log(f"Download Skip {media_item.url} due to runtime restrictions", 10) await asyncio.to_thread(media_item.complete_file.unlink) - await self.mark_incomplete(media_item, domain) - self.manager.progress_manager.download_progress.add_skipped() + await self.mark_incomplete(media_item, media_item.domain) + self.manager.progress_manager.files.add_skipped() return False await self.process_completed(media_item, domain) await self.handle_media_item_completion(media_item, downloaded=True) @@ -323,14 +240,13 @@ async def download_file(self, domain: str, media_item: MediaItem) -> bool: """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - async def mark_incomplete(self, media_item: MediaItem, domain: str) -> None: + async def mark_incomplete(self, media_item: MediaItem) -> None: """Marks the media item as incomplete in the database.""" if media_item.is_segment: return - await self.manager.db_manager.history_table.insert_incompleted(domain, media_item) + await self.manager.db_manager.history_table.insert_incompleted(media_item.domain, media_item) async def process_completed(self, media_item: MediaItem, domain: str) -> None: - """Marks the media item as completed in the database and adds to the completed list.""" await self.mark_completed(domain, media_item) await self.add_file_size(domain, media_item) @@ -338,8 +254,6 @@ async def mark_completed(self, domain: str, media_item: MediaItem) -> None: await self.manager.db_manager.history_table.mark_complete(domain, media_item) async def add_file_size(self, domain: str, media_item: MediaItem) -> None: - if not media_item.complete_file: - media_item.complete_file = self.get_file_location(media_item) if await asyncio.to_thread(media_item.complete_file.is_file): await self.manager.db_manager.history_table.add_filesize(domain, media_item) @@ -348,158 +262,37 @@ async def handle_media_item_completion(self, media_item: MediaItem, downloaded: try: media_item.downloaded = downloaded await self.manager.hash_manager.hash_client.hash_item_during_download(media_item) - self.manager.path_manager.add_completed(media_item) + self.manager.add_completed(media_item) except Exception: log(f"Error handling media item completion of: {media_item.complete_file}", 10, exc_info=True) - """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - def get_download_dir(self, media_item: MediaItem) -> Path: - """Returns the download directory for the media item.""" - download_folder = media_item.download_folder - if self.manager.parsed_args.cli_only_args.retry_any: - return download_folder - - if self.manager.config_manager.settings_data.download_options.block_download_sub_folders: - while download_folder.parent != self.manager.path_manager.download_folder: - download_folder = download_folder.parent - media_item.download_folder = download_folder - return download_folder - - def get_file_location(self, media_item: MediaItem) -> Path: - download_dir = self.get_download_dir(media_item) - return download_dir / media_item.filename - - async def get_final_file_info(self, media_item: MediaItem, domain: str) -> tuple[bool, bool]: - """Complicated checker for if a file already exists, and was already downloaded.""" - media_item.complete_file = self.get_file_location(media_item) - part_suffix = media_item.complete_file.suffix + constants.TempExt.PART - media_item.partial_file = media_item.complete_file.with_suffix(part_suffix) - - expected_size = media_item.filesize - proceed = True - skip = False - - if not TYPE_CHECKING: - log = log_debug if media_item.is_segment else globals()["log"] - - while True: - if expected_size and not media_item.is_segment: - file_size_check = self.check_filesize_limits(media_item) - if not file_size_check: - log(f"Download Skip {media_item.url} due to filesize restrictions", 10) - proceed = False - skip = True - return proceed, skip - - if not media_item.complete_file.exists() and not media_item.partial_file.exists(): - break - - if media_item.complete_file.exists() and media_item.complete_file.stat().st_size == media_item.filesize: - log(f"Found {media_item.complete_file.name} locally, skipping download") - proceed = False - break - - downloaded_filename = await self.manager.db_manager.history_table.get_downloaded_filename( - domain, - media_item, - ) - if not downloaded_filename: - media_item.complete_file, media_item.partial_file = await self.iterate_filename( - media_item.complete_file, - media_item, - ) - break - - if media_item.filename == downloaded_filename: - if media_item.partial_file.exists(): - log(f"Found {downloaded_filename} locally, trying to resume") - assert media_item.filesize - size = media_item.partial_file.stat().st_size - if size >= media_item.filesize != 0: - log(f"Deleting partial file {media_item.partial_file}") - media_item.partial_file.unlink() - - elif size == media_item.filesize: - if media_item.complete_file.exists(): - log( - f"Found conflicting complete file '{media_item.complete_file}' locally, iterating filename", - 30, - ) - new_complete_filename, new_partial_file = await self.iterate_filename( - media_item.complete_file, - media_item, - ) - media_item.partial_file.rename(new_complete_filename) - proceed = False - - media_item.complete_file = new_complete_filename - media_item.partial_file = new_partial_file - else: - proceed = False - media_item.partial_file.rename(media_item.complete_file) - log( - f"Renaming found partial file '{media_item.partial_file}' to complete file {media_item.complete_file}" - ) - elif media_item.complete_file.exists(): - if media_item.complete_file.stat().st_size == media_item.filesize: - log(f"Found complete file '{media_item.complete_file}' locally, skipping download") - proceed = False - else: - log( - f"Found conflicting complete file '{media_item.complete_file}' locally, iterating filename", - 30, - ) - media_item.complete_file, media_item.partial_file = await self.iterate_filename( - media_item.complete_file, - media_item, - ) - break - - media_item.filename = downloaded_filename - media_item.download_filename = media_item.complete_file.name - await self.manager.db_manager.history_table.add_download_filename(domain, media_item) - return proceed, skip - - async def iterate_filename(self, complete_file: Path, media_item: MediaItem) -> tuple[Path, Path]: - """Iterates the filename until it is unique.""" - part_suffix = complete_file.suffix + constants.TempExt.PART - partial_file = complete_file.with_suffix(part_suffix) - for iteration in itertools.count(1): - filename = f"{complete_file.stem} ({iteration}){complete_file.suffix}" - temp_complete_file = media_item.download_folder / filename - if ( - not temp_complete_file.exists() - and not await self.manager.db_manager.history_table.check_filename_exists(filename) - ): - media_item.filename = filename - complete_file = media_item.download_folder / media_item.filename - partial_file = complete_file.with_suffix(part_suffix) - break - return complete_file, partial_file - - def check_filesize_limits(self, media: MediaItem) -> bool: - """Checks if the file size is within the limits.""" - file_size_limits = self.manager.config_manager.settings_data.file_size_limits - max_video_filesize = file_size_limits.maximum_video_size or float("inf") - min_video_filesize = file_size_limits.minimum_video_size - max_image_filesize = file_size_limits.maximum_image_size or float("inf") - min_image_filesize = file_size_limits.minimum_image_size - max_other_filesize = file_size_limits.maximum_other_size or float("inf") - min_other_filesize = file_size_limits.minimum_other_size - - assert media.filesize is not None - if media.ext in FILE_FORMATS["Images"]: - proceed = min_image_filesize < media.filesize < max_image_filesize - elif media.ext in FILE_FORMATS["Videos"]: - proceed = min_video_filesize < media.filesize < max_video_filesize - else: - proceed = min_other_filesize < media.filesize < max_other_filesize - - return proceed - - -def get_content_type(ext: str, headers: Mapping[str, str]) -> str | None: +def get_file_location(media_item: MediaItem) -> Path: + return media_item.download_folder / media_item.filename + + +def _check_filesize_limits(media: MediaItem) -> bool: + """Checks if the file size is within the limits.""" + file_size_limits = config.get().file_size_limits + max_video_filesize = file_size_limits.maximum_video_size or float("inf") + min_video_filesize = file_size_limits.minimum_video_size + max_image_filesize = file_size_limits.maximum_image_size or float("inf") + min_image_filesize = file_size_limits.minimum_image_size + max_other_filesize = file_size_limits.maximum_other_size or float("inf") + min_other_filesize = file_size_limits.minimum_other_size + + assert media.filesize is not None + if media.ext in constants.FileFormats.IMAGE: + proceed = min_image_filesize < media.filesize < max_image_filesize + elif media.ext in constants.FileFormats.VIDEO: + proceed = min_video_filesize < media.filesize < max_video_filesize + else: + proceed = min_other_filesize < media.filesize < max_other_filesize + + return proceed + + +def _get_content_type(ext: str, headers: Mapping[str, str]) -> str | None: content_type: str = headers.get("Content-Type", "") content_length = headers.get("Content-Length") if not content_type and not content_length: @@ -514,43 +307,17 @@ def get_content_type(ext: str, headers: Mapping[str, str]) -> str | None: content_type = override or content_type content_type = content_type.lower() - if is_html_or_text(content_type) and ext.lower() not in FILE_FORMATS["Text"]: + if _is_html_or_text(content_type) and ext.lower() not in constants.FileFormats.TEXT: msg = f"Received '{content_type}', was expecting other" raise InvalidContentTypeError(message=msg) return content_type -def get_last_modified(headers: Mapping[str, str]) -> int | None: +def _get_last_modified(headers: Mapping[str, str]) -> int | None: if date_str := headers.get("Last-Modified"): return dates.parse_http(date_str) -def is_html_or_text(content_type: str) -> bool: +def _is_html_or_text(content_type: str) -> bool: return any(s in content_type for s in ("html", "text")) - - -def _fallback_generator(media_item: MediaItem): - fallbacks = media_item.fallbacks - - def gen_fallback() -> Generator[AbsoluteHttpURL | None, aiohttp.ClientResponse, None]: - response = yield - if fallbacks is None: - return - - if callable(fallbacks): - for retry in itertools.count(1): - if not response: - return - url = fallbacks(response, retry) - if not url: - return - response = yield url - - else: - for fall in fallbacks: # noqa: UP028 - yield fall - - gen = gen_fallback() - _ = next(gen) - return gen diff --git a/cyberdrop_dl/clients/flaresolverr.py b/cyberdrop_dl/clients/flaresolverr.py index 487134afe..8677b6690 100644 --- a/cyberdrop_dl/clients/flaresolverr.py +++ b/cyberdrop_dl/clients/flaresolverr.py @@ -10,7 +10,7 @@ import aiohttp from multidict import CIMultiDict, CIMultiDictProxy -from cyberdrop_dl import ddos_guard +from cyberdrop_dl import config, ddos_guard from cyberdrop_dl.compat import StrEnum from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL from cyberdrop_dl.exceptions import DDOSGuardError @@ -19,7 +19,7 @@ if TYPE_CHECKING: from collections.abc import Callable - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager class _Command(StrEnum): @@ -76,8 +76,8 @@ def __init__(self, manager: Manager) -> None: self._session_id: str = "" self._session_lock, self._request_lock = asyncio.Lock(), asyncio.Lock() self._next_request_id: Callable[[], int] = itertools.count(1).__next__ - if manager.global_config.general.flaresolverr: - self.url = manager.global_config.general.flaresolverr / "v1" + if flare := config.get().general.flaresolverr: + self.url = flare / "v1" else: self.url = None @@ -116,7 +116,7 @@ async def request(self, url: AbsoluteHttpURL, data: Any = None) -> FlareSolverrS return resp.solution async def _check_user_agent(self, solution: FlareSolverrSolution) -> None: - cdl_user_agent = self.manager.global_config.general.user_agent + cdl_user_agent = config.get().general.user_agent mismatch_ua_msg = ( "Config user_agent and flaresolverr user_agent do not match:" f"\n Cyberdrop-DL: '{cdl_user_agent}'" @@ -137,12 +137,12 @@ async def _request(self, command: _Command, /, data: Any = None, **kwargs: Any) if not self.url: raise DDOSGuardError("Found DDoS challenge, but FlareSolverr is not configured") - timeout = self.manager.global_config.rate_limiting_options._aiohttp_timeout + timeout = config.get().rate_limiting_options._aiohttp_timeout if command is _Command.CREATE_SESSION: timeout = aiohttp.ClientTimeout(total=5 * 60, connect=60) # 5 minutes to create session # timeout in milliseconds (60s) - playload = {"cmd": command, "maxTimeout": 60_000} | kwargs + playload: dict[str, Any] = {"cmd": command, "maxTimeout": 60_000} | kwargs if data: assert command is _Command.POST_REQUEST @@ -150,9 +150,7 @@ async def _request(self, command: _Command, /, data: Any = None, **kwargs: Any) async with ( self._request_lock, - self.manager.progress_manager.show_status_msg( - f"Waiting For Flaresolverr Response [{self._next_request_id()}]" - ), + self.manager.progress_manager.status.show(f"Waiting For Flaresolverr Response [{self._next_request_id()}]"), ): async with self.manager.client_manager._session.post( self.url, @@ -164,7 +162,7 @@ async def _request(self, command: _Command, /, data: Any = None, **kwargs: Any) async def _create_session(self) -> None: session_id = "cyberdrop-dl" kwargs = {} - if proxy := self.manager.global_config.general.proxy: + if proxy := config.get().general.proxy: kwargs["proxy"] = {"url": str(proxy)} resp = await self._request(_Command.CREATE_SESSION, session=session_id, **kwargs) diff --git a/cyberdrop_dl/clients/hash_client.py b/cyberdrop_dl/clients/hash_client.py index 689a660d0..e4f477dee 100644 --- a/cyberdrop_dl/clients/hash_client.py +++ b/cyberdrop_dl/clients/hash_client.py @@ -7,7 +7,7 @@ from send2trash import send2trash -from cyberdrop_dl import constants +from cyberdrop_dl import config, constants from cyberdrop_dl.constants import Hashing from cyberdrop_dl.ui.prompts.basic_prompts import enter_to_continue from cyberdrop_dl.utils.logger import log @@ -16,9 +16,9 @@ if TYPE_CHECKING: from yarl import URL - from cyberdrop_dl.config.config_model import DupeCleanup + from cyberdrop_dl.config.settings import Dedupe from cyberdrop_dl.data_structures.url_objects import MediaItem - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager def hash_directory_scanner(manager: Manager, path: Path) -> None: @@ -54,14 +54,14 @@ def _deleted_file_suffix(self) -> Literal["Sent to trash", "Permanently deleted" return "Sent to trash" if self._to_trash else "Permanently deleted" @property - def dupe_cleanup_options(self) -> DupeCleanup: - return self.manager.config.dupe_cleanup_options + def dupe_cleanup_options(self) -> Dedupe: + return config.get().dupe_cleanup_options async def hash_directory(self, path: Path) -> None: path = Path(path) with ( self.manager.live_manager.get_hash_live(stop=True), - self.manager.progress_manager.hash_progress.currently_hashing_dir(path), + self.manager.progress_manager.hashing.currently_hashing_dir(path), ): if not await asyncio.to_thread(path.is_dir): raise NotADirectoryError @@ -79,9 +79,9 @@ async def hash_item(self, media_item: MediaItem) -> None: async def hash_item_during_download(self, media_item: MediaItem) -> None: if media_item.is_segment: return - if self.manager.config_manager.settings_data.dupe_cleanup_options.hashing != Hashing.IN_PLACE: + if config.get().dupe_cleanup_options.hashing != Hashing.IN_PLACE: return - await self.manager.states.RUNNING.wait() + try: assert media_item.original_filename hash = await self.update_db_and_retrive_hash( @@ -103,9 +103,9 @@ async def update_db_and_retrive_hash( return hash = await self._update_db_and_retrive_hash_helper(file, original_filename, referer, hash_type=self.xxhash) - if self.manager.config_manager.settings_data.dupe_cleanup_options.add_md5_hash: + if config.get().dupe_cleanup_options.add_md5_hash: await self._update_db_and_retrive_hash_helper(file, original_filename, referer, hash_type=self.md5) - if self.manager.config_manager.settings_data.dupe_cleanup_options.add_sha256_hash: + if config.get().dupe_cleanup_options.add_sha256_hash: await self._update_db_and_retrive_hash_helper(file, original_filename, referer, hash_type=self.sha256) return hash @@ -117,7 +117,7 @@ async def _update_db_and_retrive_hash_helper( hash_type: str, ) -> str | None: """Generates hash of a file.""" - self.manager.progress_manager.hash_progress.update_currently_hashing(file) + await self.manager.progress_manager.hashing.update_currently_hashing(file) hash = await self.manager.db_manager.hash_table.get_file_hash_exists(file, hash_type) try: if not hash: @@ -129,9 +129,9 @@ async def _update_db_and_retrive_hash_helper( original_filename, referer, ) - self.manager.progress_manager.hash_progress.add_new_completed_hash(hash_type) + self.manager.progress_manager.hashing.add_new_completed_hash(hash_type) else: - self.manager.progress_manager.hash_progress.add_prev_hash() + self.manager.progress_manager.hashing.add_prev_hash() await self.manager.db_manager.hash_table.insert_or_update_hash_db( hash, hash_type, @@ -156,11 +156,11 @@ async def save_hash_data(self, media_item: MediaItem, hash: str | None) -> None: self.hashes_dict[hash][size].add(absolute_path) async def cleanup_dupes_after_download(self) -> None: - if self.manager.config_manager.settings_data.dupe_cleanup_options.hashing == Hashing.OFF: + if config.get().dupe_cleanup_options.hashing == Hashing.OFF: return - if not self.manager.config_manager.settings_data.dupe_cleanup_options.auto_dedupe: + if not config.get().dupe_cleanup_options.auto_dedupe: return - if self.manager.config_manager.settings_data.runtime_options.ignore_history: + if config.get().runtime_options.ignore_history: return with self.manager.live_manager.get_hash_live(stop=True): file_hashes_dict = await self.get_file_hashes_dict() @@ -199,28 +199,25 @@ async def _delete_and_log(self, file: Path, xxh128_value: str) -> None: f"File hash matches with a previous download ({hash_string})" ) log(msg, 10) - self.manager.progress_manager.hash_progress.add_removed_file() - + self.manager.progress_manager.hashing.add_removed_file() finally: self._sem.release() async def get_file_hashes_dict(self) -> dict: """Get a dictionary of files based on matching file hashes and file size.""" - downloads = self.manager.path_manager.completed_downloads - self.hashed_media_items + downloads = self.manager.completed_downloads - self.hashed_media_items async def exists(item: MediaItem) -> MediaItem | None: if await asyncio.to_thread(item.complete_file.is_file): return item - results = await asyncio.gather(*(exists(item) for item in downloads)) - for media_item in results: - if media_item is None: - continue + for media_item in filter(None, await asyncio.gather(*(exists(item) for item in downloads))): try: await self.hash_item(media_item) except Exception as e: msg = f"Unable to hash file = {media_item.complete_file}: {e}" log(msg, 40) + return self.hashes_dict diff --git a/cyberdrop_dl/clients/jdownloader.py b/cyberdrop_dl/clients/jdownloader.py new file mode 100644 index 000000000..4b01513bd --- /dev/null +++ b/cyberdrop_dl/clients/jdownloader.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +import asyncio +import dataclasses +import logging +from typing import TYPE_CHECKING, Self + +from myjdapi import myjdapi + +from cyberdrop_dl.exceptions import JDownloaderError + +if TYPE_CHECKING: + from pathlib import Path + + from myjdapi.myjdapi import Jddevice + + from cyberdrop_dl.config import Config + from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL + + +logger = logging.getLogger(__name__) + + +@dataclasses.dataclass(slots=True) +class JDownloaderConfig: + enabled: bool + username: str + password: str + device: str + download_dir: Path + autostart: bool + + @staticmethod + def from_config(config: Config) -> JDownloaderConfig: + download_dir = config.runtime_options.jdownloader_download_dir or config.files.download_folder + return JDownloaderConfig( + enabled=config.runtime_options.send_unsupported_to_jdownloader, + device=config.auth.jdownloader.device, + username=config.auth.jdownloader.username, + password=config.auth.jdownloader.password, + download_dir=download_dir.resolve(), + autostart=config.runtime_options.jdownloader_autostart, + ) + + +@dataclasses.dataclass(slots=True) +class JDownloader: + """Class that handles connecting and sending links to JDownloader.""" + + config: JDownloaderConfig + _enabled: bool = dataclasses.field(init=False) + _device: Jddevice | None = dataclasses.field(default=None, init=False) + + @classmethod + def new(cls, options: Config | JDownloaderConfig, /) -> Self: + if not isinstance(options, JDownloaderConfig): + options = JDownloaderConfig.from_config(options) + return cls(options) + + def __post_init__(self): + self._enabled = self.config.enabled + + async def _connect(self) -> None: + if not all((self.config.username, self.config.password, self.config.device)): + raise JDownloaderError("JDownloader credentials were not provided.") + + api = myjdapi.Myjdapi() + api.set_app_key("CYBERDROP-DL") + await asyncio.to_thread(api.connect, self.config.username, self.config.password) + self._device = api.get_device(self.config.device) + + async def connect(self) -> None: + if not self._enabled or self._device is not None: + return + try: + return await self._connect() + except JDownloaderError as e: + msg = e.message + except myjdapi.MYJDDeviceNotFoundException: + msg = f"Device not found ({self.config.device})" + except myjdapi.MYJDApiException as e: + msg = e + + logger.error(f"Failed to connect to jDownloader: {msg}") + self._enabled = False + + async def send( + self, + url: AbsoluteHttpURL, + title: str, + download_path: Path | None = None, + ) -> None: + assert self._device is not None + try: + download_folder = self.config.download_dir + if download_path: + download_folder = download_folder / download_path + + await asyncio.to_thread( + self._device.linkgrabber.add_links, + [ + { + "autostart": self.config.autostart, + "links": str(url), + "packageName": title if title else "Cyberdrop-DL", + "destinationFolder": str(download_folder), + "overwritePackagizerRules": True, + }, + ], + ) + except (AssertionError, myjdapi.MYJDException) as e: + raise JDownloaderError(str(e)) from e diff --git a/cyberdrop_dl/clients/scraper_client.py b/cyberdrop_dl/clients/scraper_client.py index 785a5531b..3774954fb 100644 --- a/cyberdrop_dl/clients/scraper_client.py +++ b/cyberdrop_dl/clients/scraper_client.py @@ -10,8 +10,8 @@ import cyberdrop_dl.constants as constants from cyberdrop_dl.clients.response import AbstractResponse +from cyberdrop_dl.cookies import make_simple_cookie from cyberdrop_dl.exceptions import DDOSGuardError -from cyberdrop_dl.utils.cookie_management import make_simple_cookie from cyberdrop_dl.utils.utilities import sanitize_filename if TYPE_CHECKING: @@ -29,10 +29,6 @@ class ScraperClient: def __init__(self, client_manager: ClientManager) -> None: self.client_manager = client_manager - self._save_pages_html = client_manager.manager.config_manager.settings_data.files.save_pages_html - self._pages_folder = self.client_manager.manager.path_manager.pages_folder - min_html_file_path_len = len(str(self._pages_folder)) + len(constants.STARTUP_TIME_STR) + 10 - self._max_html_stem_len = 245 - min_html_file_path_len @contextlib.asynccontextmanager async def _limiter(self, domain: str) -> AsyncGenerator[None]: @@ -71,7 +67,7 @@ async def _request( request_params["json"] = json if not impersonate: - headers.setdefault("user-agent", self.client_manager.manager.global_config.general.user_agent) + headers.setdefault("user-agent", self.client_manager.config.get().general.user_agent) async with self.__request_context(url, method, request_params, impersonate, cache_disabled) as resp: exc = None diff --git a/cyberdrop_dl/compat.py b/cyberdrop_dl/compat.py index ca3f4772c..bf44c00dc 100644 --- a/cyberdrop_dl/compat.py +++ b/cyberdrop_dl/compat.py @@ -44,8 +44,8 @@ class StrEnum(enum.StrEnum, metaclass=_ContainerEnumType): ... class MayBeUpperStrEnum(StrEnum): @classmethod - def _missing_(cls, value: object): + def _missing_(cls, value: object) -> MayBeUpperStrEnum | None: try: return cls[str(value).upper()] - except KeyError as e: - raise e + except KeyError: + return None diff --git a/cyberdrop_dl/config/__init__.py b/cyberdrop_dl/config/__init__.py index c1d711864..3a27ffe66 100755 --- a/cyberdrop_dl/config/__init__.py +++ b/cyberdrop_dl/config/__init__.py @@ -1,177 +1,99 @@ from __future__ import annotations -import dataclasses -import shutil +import datetime +from contextvars import ContextVar, Token from pathlib import Path -from time import sleep -from typing import TYPE_CHECKING - -from cyberdrop_dl import constants, env -from cyberdrop_dl.utils.apprise import get_apprise_urls - -from .auth_model import AuthSettings -from .config_model import ConfigSettings -from .global_model import GlobalSettings - -if TYPE_CHECKING: - from cyberdrop_dl.cli import ParsedArgs - from cyberdrop_dl.utils.apprise import AppriseURL - -__all__ = [ - "AuthSettings", - "ConfigSettings", - "GlobalSettings", -] - -deep_scrape: bool = False - -current_config: Config -cli: ParsedArgs -appdata: AppData - -# re-export current config values for easy access -auth: AuthSettings -settings: ConfigSettings -global_settings: GlobalSettings - - -def startup() -> None: - global appdata, cli - from cyberdrop_dl.cli import parse_args - - cli = parse_args() - - if env.RUNNING_IN_IDE and Path.cwd().name == "cyberdrop_dl": - """This is for testing purposes only""" - constants.DEFAULT_APP_STORAGE = Path("../AppData") - constants.DEFAULT_DOWNLOAD_STORAGE = Path("../Downloads") - - appdata_path = cli.cli_only_args.appdata_folder or constants.DEFAULT_APP_STORAGE - appdata = AppData(appdata_path.resolve()) - appdata.mkdirs() - # cache.startup(appdata.cache_file) - load_config(get_default_config()) - settings.logs._delete_old_logs_and_folders(constants.STARTUP_TIME) - - -class AppData(Path): - def __init__(self, app_data_path: Path) -> None: - self.configs_dir = app_data_path / "Configs" - self.cache_dir = app_data_path / "Cache" - self.cookies_dir = app_data_path / "Cookies" - self.cache_file = self.cache_dir / "cache.yaml" - self.default_auth_config_file = self.configs_dir / "authentication.yaml" - self.global_config_file = self.configs_dir / "global_settings.yaml" - self.cache_db = self.cache_dir / "request_cache.db" - self.history_db = self.cache_dir / "cyberdrop.db" - - def mkdirs(self) -> None: - for dir in (self.configs_dir, self.cache_dir, self.cookies_dir): - dir.mkdir(parents=True, exist_ok=True) - - -@dataclasses.dataclass(slots=True) -class Config: - """Helper class to group a single config, not necessarily the current config""" - - folder: Path - - apprise_file: Path - config_file: Path - - auth_config_file: Path - - auth: AuthSettings - settings: ConfigSettings - global_settings: GlobalSettings - apprise_urls: list[AppriseURL] - - def __init__(self, name: str) -> None: - self.apprise_urls = [] - self.folder = appdata.configs_dir / name - self.apprise_file = self.folder / "apprise.txt" - self.config_file = self.folder / "settings.yaml" - auth_override = self.folder / "authentication.yaml" - if auth_override.is_file(): - self.auth_config_file = auth_override - else: - self.auth_config_file = appdata.default_auth_config_file - - @staticmethod - def build(name: str, auth: AuthSettings, settings: ConfigSettings, global_settings: GlobalSettings) -> Config: - self = Config(name) - self.auth = auth - self.settings = settings - self.global_settings = global_settings - self.apprise_urls = get_apprise_urls(file=self.apprise_file) - return self +from typing import Annotated, Self + +from cyclopts import Parameter +from pydantic import BaseModel + +from cyberdrop_dl.config.auth import AuthSettings +from cyberdrop_dl.config.settings import ConfigSettings +from cyberdrop_dl.models import get_model_fields, merge_models + +_config: ContextVar[Config] = ContextVar("_config") - @staticmethod - def new_empty_config(name: str) -> Config: - assert name not in get_all_configs() - self = Config(name) - self._load() - return self - def _load(self) -> None: - """Read each config module from their respective files +class Config(ConfigSettings): + auth: Annotated[AuthSettings, Parameter(show=False)] = AuthSettings() + _source: Path | None = None + + _token: Token[Config] | None = None + _resolved: bool = False + + @property + def source(self) -> Path | None: + return self._source + + def __enter__(self) -> Self: + self._token = _config.set(self) + return self - If a files does not exists, uses the default config and creates it""" - self.auth = AuthSettings.load_file(self.auth_config_file, "socialmediagirls_username:") - self.settings = ConfigSettings.load_file(self.config_file, "download_error_urls_filename:") - self.global_settings = GlobalSettings.load_file(appdata.global_config_file, "Dupe_Cleanup_Options:") - self.apprise_urls = get_apprise_urls(file=self.apprise_file) + def __exit__(self, *_) -> None: + assert self._token is not None + _config.reset(self._token) - def _resolve_all_paths(self) -> None: - self.auth.resolve_paths() - self.settings.resolve_paths() - self.global_settings.resolve_paths() + def save(self, file: Path) -> None: + from cyberdrop_dl.utils import yaml - def _all_settings(self) -> tuple[ConfigSettings, AuthSettings, GlobalSettings]: - return self.settings, self.auth, self.global_settings + yaml.save(file, self) - def write_updated_config(self) -> None: - """Writes config to disk.""" - self.auth.save_to_file(self.auth_config_file) - self.settings.save_to_file(self.config_file) - self.global_settings.save_to_file(appdata.global_config_file) + def resolve_paths(self) -> None: + if self._resolved: + return + self._resolve_paths(self) + now = datetime.datetime.now() + self.logs.set_output_filenames(now) + self.logs.delete_old_logs_and_folders(now) + self._resolved = True + @classmethod + def _resolve_paths(cls, model: BaseModel) -> None: + for name, value in vars(model).items(): + if isinstance(value, Path): + setattr(model, name, value.resolve()) -def get_default_config() -> str: - ... - # return cache.get(cache.DEFAULT_CONFIG_KEY) or "Default" + elif isinstance(value, BaseModel): + cls._resolve_paths(value) + def update(self, other: Self) -> Self: + return merge_models(self, other) -def get_all_configs() -> list: - return sorted(config.name for config in appdata.configs_dir.iterdir() if config.is_dir()) +def load(file: Path) -> Config: + from cyberdrop_dl.utils import yaml -def set_default_config(config_name: str) -> None: - ... - # cache.save(cache.DEFAULT_CONFIG_KEY, config_name) + default = Config() + if not file.is_file(): + config = default + overwrite = True + else: + all_fields = get_model_fields(default, exclude_unset=False) + config = Config.model_validate(yaml.load(file)) + set_fields = get_model_fields(config) + overwrite = all_fields != set_fields -def delete_config(config_name: str) -> None: - all_configs = get_all_configs() - assert config_name in all_configs - assert len(all_configs) > 1 - assert config_name != current_config.folder.name - all_configs.remove(config_name) + if overwrite: + config.save(file) - # if cache.get(cache.DEFAULT_CONFIG_KEY) == config_name: - # set_default_config(all_configs[0]) + config._source = file # pyright: ignore[reportPrivateUsage] + return config - config_path = appdata.configs_dir / config_name - shutil.rmtree(config_path) +def get() -> Config: + return _config.get() -def load_config(config_name: str) -> None: - global current_config, auth, global_settings, settings - assert config_name - current_config = Config(config_name) - current_config._load() - current_config._resolve_all_paths() - settings, auth, global_settings = current_config._all_settings() - settings.logs._set_output_filenames(constants.STARTUP_TIME) - sleep(1) +def add_or_remove_lists(cli_values: list[str], config_values: list[str]) -> None: + exclude = {"+", "-"} + if cli_values: + if cli_values[0] == "+": + new_values_set = set(config_values + cli_values) + cli_values.clear() + cli_values.extend(sorted(new_values_set - exclude)) + elif cli_values[0] == "-": + new_values_set = set(config_values) - set(cli_values) + cli_values.clear() + cli_values.extend(sorted(new_values_set - exclude)) diff --git a/cyberdrop_dl/config/_common.py b/cyberdrop_dl/config/_common.py deleted file mode 100755 index 997bb9b7e..000000000 --- a/cyberdrop_dl/config/_common.py +++ /dev/null @@ -1,52 +0,0 @@ -from pathlib import Path -from typing import Self - -from pydantic import BaseModel - -from cyberdrop_dl.exceptions import InvalidYamlError -from cyberdrop_dl.models import AliasModel, get_model_fields -from cyberdrop_dl.utils import yaml - - -class ConfigModel(AliasModel): - @classmethod - def load_file(cls, file: Path, update_if_has_string: str) -> Self: - default = cls() - if not file.is_file(): - config = default - needs_update = True - - else: - all_fields = get_model_fields(default, exclude_unset=False) - config = cls.model_validate(yaml.load(file)) - set_fields = get_model_fields(config) - needs_update = all_fields != set_fields or _is_in_file(update_if_has_string, file) - - if needs_update: - config.save_to_file(file) - - return config - - def save_to_file(self, file: Path) -> None: - yaml.save(file, self) - - def resolve_paths(self) -> None: - self._resolve_paths(self) - - @classmethod - def _resolve_paths(cls, model: BaseModel) -> None: - for name, value in vars(model).items(): - if isinstance(value, Path): - setattr(model, name, value.resolve()) - - elif isinstance(value, BaseModel): - cls._resolve_paths(value) - - -def _is_in_file(search_value: str, file: Path) -> bool: - try: - return search_value.casefold() in file.read_text().casefold() - except FileNotFoundError: - return False - except Exception as e: - raise InvalidYamlError(file, e) from e diff --git a/cyberdrop_dl/config/auth.py b/cyberdrop_dl/config/auth.py new file mode 100755 index 000000000..8d49ee95f --- /dev/null +++ b/cyberdrop_dl/config/auth.py @@ -0,0 +1,47 @@ +from cyberdrop_dl.models import AliasModel, Settings + + +class Coomer(AliasModel): + session: str = "" + + +class Imgur(AliasModel): + client_id: str = "" + + +class MegaNz(AliasModel): + email: str = "" + password: str = "" + + +class JDownloader(AliasModel): + username: str = "" + password: str = "" + device: str = "" + + +class Kemono(AliasModel): + session: str = "" + + +class GoFile(AliasModel): + api_key: str = "" + + +class Pixeldrain(AliasModel): + api_key: str = "" + + +class RealDebrid(AliasModel): + api_key: str = "" + + +class AuthSettings(Settings): + coomer: Coomer = Coomer() + gofile: GoFile = GoFile() + imgur: Imgur = Imgur() + jdownloader: JDownloader = JDownloader() + kemono: Kemono = Kemono() + meganz: MegaNz = MegaNz() + pixeldrain: Pixeldrain = Pixeldrain() + realdebrid: RealDebrid = RealDebrid() diff --git a/cyberdrop_dl/config/auth_model.py b/cyberdrop_dl/config/auth_model.py deleted file mode 100755 index af9cb8d78..000000000 --- a/cyberdrop_dl/config/auth_model.py +++ /dev/null @@ -1,50 +0,0 @@ -from pydantic import BaseModel - -from cyberdrop_dl.config._common import ConfigModel -from cyberdrop_dl.models import AliasModel - - -class CoomerAuth(BaseModel): - session: str = "" - - -class ImgurAuth(AliasModel): - client_id: str = "" - - -class MegaNzAuth(AliasModel): - email: str = "" - password: str = "" - - -class JDownloaderAuth(AliasModel): - username: str = "" - password: str = "" - device: str = "" - - -class KemonoAuth(AliasModel): - session: str = "" - - -class GoFileAuth(AliasModel): - api_key: str = "" - - -class PixeldrainAuth(AliasModel): - api_key: str = "" - - -class RealDebridAuth(AliasModel): - api_key: str = "" - - -class AuthSettings(ConfigModel): - coomer: CoomerAuth = CoomerAuth() - gofile: GoFileAuth = GoFileAuth() - imgur: ImgurAuth = ImgurAuth() - jdownloader: JDownloaderAuth = JDownloaderAuth() - kemono: KemonoAuth = KemonoAuth() - meganz: MegaNzAuth = MegaNzAuth() - pixeldrain: PixeldrainAuth = PixeldrainAuth() - realdebrid: RealDebridAuth = RealDebridAuth() diff --git a/cyberdrop_dl/config/global_model.py b/cyberdrop_dl/config/global_model.py deleted file mode 100755 index a69fd25b0..000000000 --- a/cyberdrop_dl/config/global_model.py +++ /dev/null @@ -1,113 +0,0 @@ -import random -from typing import Literal - -import aiohttp -from pydantic import ( - BaseModel, - ByteSize, - NonNegativeFloat, - PositiveFloat, - PositiveInt, - field_serializer, - field_validator, -) -from yarl import URL - -from cyberdrop_dl.config._common import ConfigModel -from cyberdrop_dl.models.types import ByteSizeSerilized, HttpURL, ListNonEmptyStr, ListPydanticURL, NonEmptyStr -from cyberdrop_dl.models.validators import falsy_as, falsy_as_none, to_bytesize - -MIN_REQUIRED_FREE_SPACE = to_bytesize("512MB") -DEFAULT_REQUIRED_FREE_SPACE = to_bytesize("5GB") - - -class General(BaseModel): - ssl_context: Literal["truststore", "certifi", "truststore+certifi"] | None = "truststore+certifi" - disable_crawlers: ListNonEmptyStr = [] - flaresolverr: HttpURL | None = None - max_file_name_length: PositiveInt = 95 - max_folder_name_length: PositiveInt = 60 - proxy: HttpURL | None = None - required_free_space: ByteSizeSerilized = DEFAULT_REQUIRED_FREE_SPACE - user_agent: NonEmptyStr = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0" - - @field_validator("ssl_context", mode="before") - @classmethod - def ssl(cls, value: str | None) -> str | None: - if isinstance(value, str): - value = value.lower().strip() - return falsy_as(value, None) - - @field_validator("disable_crawlers", mode="after") - @classmethod - def unique_list(cls, value: list[str]) -> list[str]: - return sorted(set(value)) - - @field_serializer("flaresolverr", "proxy") - def serialize(self, value: URL | str) -> str | None: - return falsy_as(value, None, str) - - @field_validator("flaresolverr", "proxy", mode="before") - @classmethod - def convert_to_str(cls, value: str) -> str | None: - return falsy_as(value, None, str) - - @field_validator("required_free_space", mode="after") - @classmethod - def override_min(cls, value: ByteSize) -> ByteSize: - return max(value, MIN_REQUIRED_FREE_SPACE) - - -class RateLimiting(BaseModel): - download_attempts: PositiveInt = 2 - download_delay: NonNegativeFloat = 0.0 - download_speed_limit: ByteSizeSerilized = ByteSize(0) - jitter: NonNegativeFloat = 0 - max_simultaneous_downloads_per_domain: PositiveInt = 5 - max_simultaneous_downloads: PositiveInt = 15 - rate_limit: PositiveFloat = 25 - - connection_timeout: PositiveFloat = 15 - read_timeout: PositiveFloat | None = 300 - - @field_validator("read_timeout", mode="before") - @classmethod - def parse_timeouts(cls, value: object) -> object | None: - return falsy_as_none(value) - - def model_post_init(self, *_) -> None: - self._curl_timeout = self.connection_timeout - if self.read_timeout is not None: - self._curl_timeout = self.connection_timeout, self.read_timeout - self._aiohttp_timeout: aiohttp.ClientTimeout = aiohttp.ClientTimeout( - total=None, - sock_connect=self.connection_timeout, - sock_read=self.read_timeout, - ) - - @property - def total_delay(self) -> NonNegativeFloat: - """download_delay + jitter""" - return self.download_delay + self.get_jitter() - - def get_jitter(self) -> NonNegativeFloat: - """Get a random number in the range [0, self.jitter]""" - return random.uniform(0, self.jitter) - - -class UIOptions(BaseModel): - refresh_rate: PositiveInt = 10 - - -class GenericCrawlerInstances(BaseModel): - wordpress_media: ListPydanticURL = [] - wordpress_html: ListPydanticURL = [] - discourse: ListPydanticURL = [] - chevereto: ListPydanticURL = [] - - -class GlobalSettings(ConfigModel): - general: General = General() - rate_limiting_options: RateLimiting = RateLimiting() - ui_options: UIOptions = UIOptions() - generic_crawlers_instances: GenericCrawlerInstances = GenericCrawlerInstances() diff --git a/cyberdrop_dl/config/config_model.py b/cyberdrop_dl/config/settings.py similarity index 61% rename from cyberdrop_dl/config/config_model.py rename to cyberdrop_dl/config/settings.py index 0b33aa67e..a5f8bd5a5 100755 --- a/cyberdrop_dl/config/config_model.py +++ b/cyberdrop_dl/config/settings.py @@ -1,32 +1,44 @@ -import itertools +# ruff: noqa: RUF012 +import random import re from datetime import date, datetime, timedelta +from functools import cached_property from logging import DEBUG from pathlib import Path - -from pydantic import BaseModel, ByteSize, Field, NonNegativeInt, field_serializer, field_validator +from typing import Literal + +import aiohttp +from pydantic import ( + ByteSize, + Field, + NonNegativeFloat, + NonNegativeInt, + PositiveFloat, + PositiveInt, + field_serializer, + field_validator, +) from cyberdrop_dl import constants from cyberdrop_dl.constants import BROWSERS, DEFAULT_APP_STORAGE, DEFAULT_DOWNLOAD_STORAGE, Hashing -from cyberdrop_dl.models import AliasModel, HttpAppriseURL +from cyberdrop_dl.models import HttpAppriseURL, Settings, SettingsGroup from cyberdrop_dl.models.types import ( ByteSizeSerilized, + HttpURL, ListNonEmptyStr, ListNonNegativeInt, + ListPydanticURL, LogPath, MainLogPath, NonEmptyStr, NonEmptyStrOrNone, PathOrNone, ) -from cyberdrop_dl.models.validators import falsy_as, to_timedelta -from cyberdrop_dl.supported_domains import SUPPORTED_SITES_DOMAINS -from cyberdrop_dl.utils.strings import validate_format_string -from cyberdrop_dl.utils.utilities import purge_dir_tree +from cyberdrop_dl.models.validators import falsy_as, falsy_as_none, to_bytesize, to_timedelta -from ._common import ConfigModel +MIN_REQUIRED_FREE_SPACE = to_bytesize("512MB") +DEFAULT_REQUIRED_FREE_SPACE = to_bytesize("5GB") -ALL_SUPPORTED_SITES = ["<>"] _SORTING_COMMON_FIELDS = { "base_dir", "ext", @@ -39,7 +51,15 @@ } -class DownloadOptions(BaseModel): +class FormatValidator: + @classmethod + def _validate_format(cls, value: str, valid_keys: set[str]) -> None: + from cyberdrop_dl.utils.strings import validate_format_string + + validate_format_string(value, valid_keys) + + +class DownloadOptions(FormatValidator, SettingsGroup): block_download_sub_folders: bool = False disable_download_attempt_limit: bool = False disable_file_timestamps: bool = False @@ -59,18 +79,18 @@ class DownloadOptions(BaseModel): @classmethod def valid_format(cls, value: str) -> str: valid_keys = {"default", "title", "id", "number", "date"} - validate_format_string(value, valid_keys) + cls._validate_format(value, valid_keys) return value -class Files(AliasModel): +class Files(SettingsGroup): download_folder: Path = Field(default=DEFAULT_DOWNLOAD_STORAGE, validation_alias="d") dump_json: bool = Field(default=False, validation_alias="j") input_file: Path = Field(default=DEFAULT_APP_STORAGE / "Configs/{config}/URLs.txt", validation_alias="i") save_pages_html: bool = False -class Logs(AliasModel): +class Logs(SettingsGroup): download_error_urls: LogPath = Path("Download_Error_URLs.csv") last_forum_post: LogPath = Path("Last_Scraped_Forum_Posts.csv") log_folder: Path = DEFAULT_APP_STORAGE / "Configs/{config}/Logs" @@ -81,6 +101,10 @@ class Logs(AliasModel): unsupported_urls: LogPath = Path("Unsupported_URLs.csv") webhook: HttpAppriseURL | None = None + @cached_property + def jsonl_file(self): + return self.main_log.with_suffix(".results.jsonl") + @field_validator("webhook", mode="before") @classmethod def handle_falsy(cls, value: str) -> str | None: @@ -92,7 +116,7 @@ def parse_logs_duration(input_date: timedelta | str | int | None) -> timedelta | if value := falsy_as(input_date, None): return to_timedelta(value) - def _set_output_filenames(self, now: datetime) -> None: + def set_output_filenames(self, now: datetime) -> None: self.log_folder.mkdir(exist_ok=True, parents=True) current_time_file_iso: str = now.strftime(constants.LOGS_DATETIME_FORMAT) current_time_folder_iso: str = now.strftime(constants.LOGS_DATE_FORMAT) @@ -107,17 +131,25 @@ def _set_output_filenames(self, now: datetime) -> None: log_file.parent.mkdir(exist_ok=True, parents=True) - def _delete_old_logs_and_folders(self, now: datetime | None = None) -> None: - if now and self.logs_expire_after: - for file in itertools.chain(self.log_folder.rglob("*.log"), self.log_folder.rglob("*.csv")): - file_date = file.stat().st_ctime - t_delta = now - datetime.fromtimestamp(file_date) - if t_delta > self.logs_expire_after: - file.unlink(missing_ok=True) - purge_dir_tree(self.log_folder) + def delete_old_logs_and_folders(self, now: datetime | None = None) -> None: + if not (now and self.logs_expire_after): + return + + from cyberdrop_dl.utils.utilities import purge_dir_tree + + for file in self.log_folder.rglob("*"): + if file.suffix not in (".log", ".csv"): + continue + file_date = file.stat().st_ctime + t_delta = now - datetime.fromtimestamp(file_date) + if t_delta > self.logs_expire_after: + file.unlink(missing_ok=True) -class FileSizeLimits(BaseModel): + _ = purge_dir_tree(self.log_folder) + + +class FileSizeLimits(SettingsGroup): maximum_image_size: ByteSizeSerilized = ByteSize(0) maximum_other_size: ByteSizeSerilized = ByteSize(0) maximum_video_size: ByteSizeSerilized = ByteSize(0) @@ -126,7 +158,7 @@ class FileSizeLimits(BaseModel): minimum_video_size: ByteSizeSerilized = ByteSize(0) -class MediaDurationLimits(BaseModel): +class MediaDurationLimits(SettingsGroup): maximum_video_duration: timedelta = timedelta(seconds=0) maximum_audio_duration: timedelta = timedelta(seconds=0) minimum_video_duration: timedelta = timedelta(seconds=0) @@ -146,7 +178,7 @@ def parse_runtime_duration(input_date: timedelta | str | int | None) -> timedelt return to_timedelta(input_date) -class IgnoreOptions(BaseModel): +class IgnoreOptions(SettingsGroup): exclude_audio: bool = False exclude_images: bool = False exclude_other: bool = False @@ -172,7 +204,7 @@ def is_valid_regex(cls, value: str | None) -> str | None: return value -class RuntimeOptions(BaseModel): +class Runtime(SettingsGroup): console_log_level: NonNegativeInt = 100 deep_scrape: bool = False delete_partial_files: bool = False @@ -188,7 +220,7 @@ class RuntimeOptions(BaseModel): update_last_forum_post: bool = True -class Sorting(BaseModel): +class Sorting(FormatValidator, SettingsGroup): scan_folder: PathOrNone = None sort_downloads: bool = False sort_folder: Path = DEFAULT_DOWNLOAD_STORAGE / "Cyberdrop-DL Sorted Downloads" @@ -203,7 +235,7 @@ class Sorting(BaseModel): def valid_sort_incrementer_format(cls, value: str | None) -> str | None: if value is not None: valid_keys = {"i"} - validate_format_string(value, valid_keys) + cls._validate_format(value, valid_keys) return value @field_validator("sorted_audio", mode="after") @@ -211,7 +243,7 @@ def valid_sort_incrementer_format(cls, value: str | None) -> str | None: def valid_sorted_audio(cls, value: str | None) -> str | None: if value is not None: valid_keys = _SORTING_COMMON_FIELDS | {"bitrate", "duration", "length", "sample_rate"} - validate_format_string(value, valid_keys) + cls._validate_format(value, valid_keys) return value @field_validator("sorted_image", mode="after") @@ -219,7 +251,7 @@ def valid_sorted_audio(cls, value: str | None) -> str | None: def valid_sorted_image(cls, value: str | None) -> str | None: if value is not None: valid_keys = _SORTING_COMMON_FIELDS | {"height", "resolution", "width"} - validate_format_string(value, valid_keys) + cls._validate_format(value, valid_keys) return value @field_validator("sorted_other", mode="after") @@ -227,7 +259,7 @@ def valid_sorted_image(cls, value: str | None) -> str | None: def valid_sorted_other(cls, value: str | None) -> str | None: if value is not None: valid_keys = _SORTING_COMMON_FIELDS | {"bitrate", "duration", "length", "sample_rate"} - validate_format_string(value, valid_keys) + cls._validate_format(value, valid_keys) return value @field_validator("sorted_video", mode="after") @@ -243,37 +275,20 @@ def valid_sorted_video(cls, value: str | None) -> str | None: "resolution", "width", } - validate_format_string(value, valid_keys) + cls._validate_format(value, valid_keys) return value -class BrowserCookies(BaseModel): +class Cookies(SettingsGroup): auto_import: bool = False browser: BROWSERS | None = BROWSERS.firefox - sites: list[NonEmptyStr] = SUPPORTED_SITES_DOMAINS def model_post_init(self, *_) -> None: if self.auto_import and not self.browser: raise ValueError("You need to provide a browser for auto_import to work") - @field_validator("sites", mode="before") - @classmethod - def handle_list(cls, values: list[str]) -> list[str]: - values = falsy_as(values, []) - if values == ALL_SUPPORTED_SITES: - return SUPPORTED_SITES_DOMAINS - if isinstance(values, list): - return sorted(str(value).lower() for value in values) - return values - - @field_serializer("sites", when_used="json-unless-none") - def use_placeholder(self, values: list[str]) -> list[str]: - if set(values) == set(SUPPORTED_SITES_DOMAINS): - return ALL_SUPPORTED_SITES - return values - - -class DupeCleanup(BaseModel): + +class Dedupe(SettingsGroup): add_md5_hash: bool = False add_sha256_hash: bool = False auto_dedupe: bool = True @@ -281,14 +296,107 @@ class DupeCleanup(BaseModel): send_deleted_to_trash: bool = True -class ConfigSettings(ConfigModel): - browser_cookies: BrowserCookies = BrowserCookies() +# ruff: noqa: RUF012 + + +class General(SettingsGroup): + ssl_context: Literal["truststore", "certifi", "truststore+certifi"] | None = "truststore+certifi" + disable_crawlers: ListNonEmptyStr = [] + flaresolverr: HttpURL | None = None + max_file_name_length: PositiveInt = 95 + max_folder_name_length: PositiveInt = 60 + proxy: HttpURL | None = None + required_free_space: ByteSizeSerilized = DEFAULT_REQUIRED_FREE_SPACE + user_agent: NonEmptyStr = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0" + + @field_validator("ssl_context", mode="before") + @classmethod + def ssl(cls, value: str | None) -> str | None: + if isinstance(value, str): + value = value.lower().strip() + return falsy_as(value, None) + + @field_validator("disable_crawlers", mode="after") + @classmethod + def unique_list(cls, value: list[str]) -> list[str]: + return sorted(set(value)) + + @field_serializer("flaresolverr", "proxy") + def serialize(self, value: str) -> str | None: + return falsy_as(value, None, str) + + @field_validator("flaresolverr", "proxy", mode="before") + @classmethod + def convert_to_str(cls, value: str) -> str | None: + return falsy_as(value, None, str) + + @field_validator("required_free_space", mode="after") + @classmethod + def override_min(cls, value: ByteSize) -> ByteSize: + return max(value, MIN_REQUIRED_FREE_SPACE) + + +class RateLimiting(SettingsGroup): + download_attempts: PositiveInt = 2 + download_delay: NonNegativeFloat = 0.0 + download_speed_limit: ByteSizeSerilized = ByteSize(0) + jitter: NonNegativeFloat = 0 + max_simultaneous_downloads_per_domain: PositiveInt = 5 + max_simultaneous_downloads: PositiveInt = 15 + rate_limit: PositiveFloat = 25 + + connection_timeout: PositiveFloat = 15 + read_timeout: PositiveFloat | None = 300 + + @field_validator("read_timeout", mode="before") + @classmethod + def parse_timeouts(cls, value: object) -> object | None: + return falsy_as_none(value) + + def model_post_init(self, *_) -> None: + self._curl_timeout = self.connection_timeout + if self.read_timeout is not None: + self._curl_timeout = self.connection_timeout, self.read_timeout + + self._aiohttp_timeout: aiohttp.ClientTimeout = aiohttp.ClientTimeout( + total=None, + sock_connect=self.connection_timeout, + sock_read=self.read_timeout, + ) + + @property + def total_delay(self) -> NonNegativeFloat: + """download_delay + jitter""" + return self.download_delay + self.get_jitter() + + def get_jitter(self) -> NonNegativeFloat: + """Get a random number in the range [0, self.jitter]""" + return random.uniform(0, self.jitter) + + +class UIOptions(SettingsGroup): + refresh_rate: PositiveInt = 10 + + +class GenericCrawlerInstances(SettingsGroup): + wordpress_media: ListPydanticURL = [] + wordpress_html: ListPydanticURL = [] + discourse: ListPydanticURL = [] + chevereto: ListPydanticURL = [] + + +class ConfigSettings(Settings): + browser_cookies: Cookies = Cookies() download_options: DownloadOptions = DownloadOptions() - dupe_cleanup_options: DupeCleanup = DupeCleanup() + dupe_cleanup_options: Dedupe = Dedupe() file_size_limits: FileSizeLimits = FileSizeLimits() - media_duration_limits: MediaDurationLimits = MediaDurationLimits() files: Files = Files() + general: General = General() + generic_crawlers_instances: GenericCrawlerInstances = GenericCrawlerInstances() ignore_options: IgnoreOptions = IgnoreOptions() logs: Logs = Logs() - runtime_options: RuntimeOptions = RuntimeOptions() + media_duration_limits: MediaDurationLimits = MediaDurationLimits() + rate_limiting_options: RateLimiting = RateLimiting() + runtime_options: Runtime = Runtime() sorting: Sorting = Sorting() + ui_options: UIOptions = UIOptions() diff --git a/cyberdrop_dl/constants.py b/cyberdrop_dl/constants.py index 49c8159d9..953c5181b 100644 --- a/cyberdrop_dl/constants.py +++ b/cyberdrop_dl/constants.py @@ -1,26 +1,28 @@ +from __future__ import annotations + import re from dataclasses import field from datetime import UTC, datetime from enum import auto from pathlib import Path -from typing import TYPE_CHECKING, Any, Final +from typing import TYPE_CHECKING, Final -from aiohttp.resolver import AsyncResolver, ThreadedResolver from rich.text import Text from cyberdrop_dl import env -from cyberdrop_dl.compat import Enum, StrEnum +from cyberdrop_dl.compat import Enum, MayBeUpperStrEnum, StrEnum if TYPE_CHECKING: + from aiohttp.resolver import AsyncResolver, ThreadedResolver + from cyberdrop_dl.utils.logger import LogHandler # TIME STARTUP_TIME = datetime.now() -STARTUP_TIME_UTC = datetime.now(UTC) +STARTUP_TIME_UTC = STARTUP_TIME.astimezone(UTC) LOGS_DATETIME_FORMAT = "%Y%m%d_%H%M%S" LOGS_DATE_FORMAT = "%Y_%m_%d" STARTUP_TIME_STR = STARTUP_TIME.strftime(LOGS_DATETIME_FORMAT) -STARTUP_TIME_UTC_STR = STARTUP_TIME_UTC.strftime(LOGS_DATETIME_FORMAT) DNS_RESOLVER: type[AsyncResolver] | type[ThreadedResolver] | None = None MAX_REDIRECTS: Final[int] = 8 @@ -28,22 +30,10 @@ # logging CONSOLE_LEVEL = 100 MAX_NAME_LENGTHS = {"FILE": 95, "FOLDER": 60} -DEFAULT_CONSOLE_WIDTH = 240 -CSV_DELIMITER = "," LOG_OUTPUT_TEXT = Text("") -RICH_HANDLER_CONFIG: dict[str, Any] = {"rich_tracebacks": True, "tracebacks_show_locals": False} -RICH_HANDLER_DEBUG_CONFIG = RICH_HANDLER_CONFIG | { - "tracebacks_show_locals": True, - "locals_max_string": DEFAULT_CONSOLE_WIDTH, - "tracebacks_extra_lines": 2, - "locals_max_length": 20, -} -VALIDATION_ERROR_FOOTER = """Please delete the file or fix the errors. Read the documentation to learn what's the expected format and values: https://script-ware.gitbook.io/cyberdrop-dl/reference/configuration-options -\nThis is not a bug. Do not open issues related to this""" +VALIDATION_ERROR_FOOTER = """Please delete the file or fix the errors""" -CLI_VALIDATION_ERROR_FOOTER = """Please read the documentation to learn about the expected values: https://script-ware.gitbook.io/cyberdrop-dl/reference/configuration-options -\nThis is not a bug. Do not open issues related to this""" # regex RAR_MULTIPART_PATTERN = re.compile(r"^part\d+") @@ -52,7 +42,7 @@ HTTP_REGEX_LINKS = re.compile( r"https?://(www\.)?[-a-zA-Z0-9@:%._+~#=]{2,256}\.[a-z]{2,12}\b([-a-zA-Z0-9@:%_+.~#?&/=]*)" ) -console_handler: "LogHandler" +console_handler: LogHandler class TempExt(StrEnum): @@ -98,15 +88,11 @@ class HashType(StrEnum): xxh128 = "xxh128" -class Hashing(StrEnum): +class Hashing(MayBeUpperStrEnum): OFF = auto() IN_PLACE = auto() POST_DOWNLOAD = auto() - @classmethod - def _missing_(cls, value: object) -> "Hashing": - return cls[str(value).upper()] - class BROWSERS(StrEnum): chrome = auto() @@ -128,74 +114,81 @@ class NotificationResult(Enum): NONE = Text("No Notifications Sent", "yellow") -# file formats -FILE_FORMATS = { - "Images": { - ".gif", - ".gifv", - ".heic", - ".jfif", - ".jif", - ".jpe", - ".jpeg", - ".jpg", - ".jxl", - ".png", - ".svg", - ".tif", - ".tiff", - ".webp", - }, - "Videos": { - ".3gp", - ".avchd", - ".avi", - ".f4v", - ".flv", - ".m2ts", - ".m4p", - ".m4v", - ".mkv", - ".mov", - ".mp2", - ".mp4", - ".mpe", - ".mpeg", - ".mpg", - ".mpv", - ".mts", - ".ogg", - ".ogv", - ".qt", - ".swf", - ".ts", - ".webm", - ".wmv", - }, - "Audio": { - ".flac", - ".m4a", - ".mka", - ".mp3", - ".wav", - }, - "Text": { - ".htm", - ".html", - ".md", - ".nfo", - ".txt", - ".vtt", - ".sub", - }, - "7z": { - ".7z", - ".bz2", - ".gz", - ".tar", - ".zip", - }, -} - - -MEDIA_EXTENSIONS = FILE_FORMATS["Audio"] | FILE_FORMATS["Videos"] | FILE_FORMATS["Images"] +class FileFormats: + IMAGE = frozenset( + { + ".gif", + ".gifv", + ".heic", + ".jfif", + ".jif", + ".jpe", + ".jpeg", + ".jpg", + ".jxl", + ".png", + ".svg", + ".tif", + ".tiff", + ".webp", + } + ) + VIDEO = frozenset( + { + ".3gp", + ".avchd", + ".avi", + ".f4v", + ".flv", + ".m2ts", + ".m4p", + ".m4v", + ".mkv", + ".mov", + ".mp2", + ".mp4", + ".mpe", + ".mpeg", + ".mpg", + ".mpv", + ".mts", + ".ogg", + ".ogv", + ".qt", + ".swf", + ".ts", + ".webm", + ".wmv", + } + ) + AUDIO = frozenset( + { + ".flac", + ".m4a", + ".mka", + ".mp3", + ".wav", + } + ) + TEXT = frozenset( + { + ".htm", + ".html", + ".md", + ".nfo", + ".txt", + ".vtt", + ".sub", + } + ) + _7Z = frozenset( + { + ".7z", + ".bz2", + ".gz", + ".tar", + ".zip", + } + ) + VIDEO_OR_IMAGE = VIDEO | IMAGE + MEDIA = AUDIO | VIDEO_OR_IMAGE diff --git a/cyberdrop_dl/utils/cookie_management.py b/cyberdrop_dl/cookies.py similarity index 73% rename from cyberdrop_dl/utils/cookie_management.py rename to cyberdrop_dl/cookies.py index e1c936fd2..ecad96d29 100644 --- a/cyberdrop_dl/utils/cookie_management.py +++ b/cyberdrop_dl/cookies.py @@ -9,6 +9,7 @@ from textwrap import dedent from typing import TYPE_CHECKING, NamedTuple, ParamSpec, TypeVar +from cyberdrop_dl import appdata from cyberdrop_dl.dependencies import browser_cookie3 from cyberdrop_dl.utils.logger import log @@ -17,7 +18,7 @@ from pathlib import Path from cyberdrop_dl.constants import BROWSERS - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager P = ParamSpec("P") @@ -63,40 +64,23 @@ def wrapper(*args, **kwargs) -> R: @cookie_wrapper -def get_cookies_from_browsers(manager: Manager, *, browser: BROWSERS, domains: list[str] | None = None) -> set[str]: - """Extract cookies from browsers. - - :param browsers: list of browsers to extract from. If `None`, config `browser_cookies.browsers` will be used - :param domains: list of domains to filter cookies. If `None`, config `browser_cookies.sites` will be used - :return: A set with all the domains that actually had cookies - :raises BrowserCookieError: If there's any error while extracting cookies""" - from cyberdrop_dl.supported_domains import SUPPORTED_FORUMS, SUPPORTED_SITES_DOMAINS, SUPPORTED_WEBSITES - +def get_cookies_from_browsers(manager: Manager, *, browser: BROWSERS, domains: list[str]) -> set[str]: if domains == []: msg = "No domains selected" raise ValueError(msg) extractor_name = browser.lower() - domains_to_extract: list[str] = domains or manager.config_manager.settings_data.browser_cookies.sites - if "all" in domains_to_extract: - domains_to_extract.remove("all") - domains_to_extract.extend(SUPPORTED_SITES_DOMAINS) - elif "all_forums" in domains_to_extract: - domains_to_extract.remove("all_forums") - domains_to_extract.extend(SUPPORTED_FORUMS.values()) - elif "all_file_hosts" in domains_to_extract: - domains_to_extract.remove("all_file_hosts") - domains_to_extract.extend(SUPPORTED_WEBSITES.values()) + domains_to_extract: list[str] = domains extracted_cookies = extract_cookies(extractor_name) if not extracted_cookies: msg = "None of the provided browsers is supported for extraction" raise ValueError(msg) - manager.path_manager.cookies_dir.mkdir(parents=True, exist_ok=True) + appdata.get().cookies_dir.mkdir(parents=True, exist_ok=True) domains_with_cookies: set[str] = set() for domain in domains_to_extract: - cookie_file_path = manager.path_manager.cookies_dir / f"{domain}.txt" + cookie_file_path = appdata.get().cookies_dir / f"{domain}.txt" cdl_cookie_jar = MozillaCookieJar(cookie_file_path) for cookie in extracted_cookies: if domain in cookie.domain: @@ -109,34 +93,27 @@ def get_cookies_from_browsers(manager: Manager, *, browser: BROWSERS, domains: l return domains_with_cookies -def clear_cookies(manager: Manager, domains: list[str]) -> None: - if not domains: - raise ValueError("No domains selected") - - manager.path_manager.cookies_dir.mkdir(parents=True, exist_ok=True) +def clear_cookies(*domains: str) -> None: + appdata.get().cookies_dir.mkdir(parents=True, exist_ok=True) for domain in domains: - cookie_file_path = manager.path_manager.cookies_dir / f"{domain}.txt" - cookie_jar = MozillaCookieJar(cookie_file_path) - cookie_jar.save(ignore_discard=True, ignore_expires=True) + cookie_file_path = appdata.get().cookies_dir / f"{domain}.txt" + cookie_file_path.unlink(missing_ok=True) def extract_cookies(extractor_name: str) -> CookieJar: - def is_decrypt_error(msg: str) -> bool: - return "Unable to get key for cookie decryption" in msg - extractor = next(extractor for extractor in COOKIE_EXTRACTORS if extractor.name == extractor_name) try: return extractor.extract() except browser_cookie3.BrowserCookieError as e: msg = str(e) - if is_decrypt_error(msg) and extractor.name in CHROMIUM_BROWSERS and os.name == "nt": + if "Unable to get key for cookie decryption" in msg and extractor.name in CHROMIUM_BROWSERS and os.name == "nt": msg = f"Cookie extraction from {extractor.name.capitalize()} is not supported on Windows - {msg}" raise UnsupportedBrowserError(msg) from None raise async def read_netscape_files(cookie_files: list[Path]) -> AsyncIterable[tuple[str, SimpleCookie]]: - now = time.time() + now = int(time.time()) domains_seen = set() cookie_jars = await asyncio.gather(*(_read_netscape_file(file) for file in cookie_files)) for file, cookie_jar in zip(cookie_files, cookie_jars, strict=True): @@ -154,7 +131,7 @@ async def read_netscape_files(cookie_files: list[Path]) -> AsyncIterable[tuple[s if simplified_domain in domains_seen: log(f"Previous cookies for domain {simplified_domain} detected. They will be overwritten", 30) - if (simplified_domain not in expired_cookies_domains) and cookie.is_expired(now): # type: ignore + if (simplified_domain not in expired_cookies_domains) and cookie.is_expired(now): expired_cookies_domains.add(simplified_domain) log(f"Cookies for {simplified_domain} are expired", 30) diff --git a/cyberdrop_dl/crawlers/__init__.py b/cyberdrop_dl/crawlers/__init__.py index 3839a0139..8d24d35b7 100644 --- a/cyberdrop_dl/crawlers/__init__.py +++ b/cyberdrop_dl/crawlers/__init__.py @@ -1,7 +1,7 @@ # ruff: noqa: F401 from __future__ import annotations -from cyberdrop_dl import env +from cyberdrop_dl import cache, config, env from ._chevereto import CheveretoCrawler from .anontransfer import AnonTransferCrawler diff --git a/cyberdrop_dl/crawlers/_forum.py b/cyberdrop_dl/crawlers/_forum.py index 9f28cc2a7..f595de7f2 100644 --- a/cyberdrop_dl/crawlers/_forum.py +++ b/cyberdrop_dl/crawlers/_forum.py @@ -19,6 +19,7 @@ from bs4 import BeautifulSoup, Tag +from cyberdrop_dl import config from cyberdrop_dl.constants import HTTP_REGEX_LINKS from cyberdrop_dl.crawlers.crawler import Crawler from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL @@ -208,17 +209,17 @@ async def login(self) -> None: @final @property def scrape_single_forum_post(self) -> bool: - return self.manager.config_manager.settings_data.download_options.scrape_single_forum_post + return config.get().download_options.scrape_single_forum_post @final @property def max_thread_depth(self) -> int: - return self.manager.config_manager.settings_data.download_options.maximum_thread_depth + return config.get().download_options.maximum_thread_depth @final @property def max_thread_folder_depth(self): - return self.manager.config.download_options.maximum_thread_folder_depth + return config.get().download_options.maximum_thread_folder_depth async def fetch(self, scrape_item: ScrapeItem) -> None: if not self.logged_in and self.login_required is True: @@ -326,7 +327,7 @@ async def handle_internal_link(self, scrape_item: ScrapeItem, link: AbsoluteHttp async def write_last_forum_post(self, thread_url: AbsoluteHttpURL, last_post_url: AbsoluteHttpURL | None) -> None: if not last_post_url or last_post_url == thread_url: return - self.manager.log_manager.write_last_post_log(last_post_url) + self.manager.logs.write_last_post_log(last_post_url) # TODO: Move this to the base crawler # TODO: Define an unified workflow for crawlers to perform and check login diff --git a/cyberdrop_dl/crawlers/archivebate.py b/cyberdrop_dl/crawlers/archivebate.py index 6f4b7a181..ab605c244 100644 --- a/cyberdrop_dl/crawlers/archivebate.py +++ b/cyberdrop_dl/crawlers/archivebate.py @@ -60,7 +60,7 @@ async def video(self, scrape_item: ScrapeItem) -> None: check_complete = await self.manager.db_manager.history_table.check_complete(self.DOMAIN, url, url, db_path) if check_complete: self.log(f"Skipping {scrape_item.url} as it has already been downloaded", 10) - self.manager.progress_manager.download_progress.add_previously_completed() + self.manager.progress_manager.files.add_previously_completed() return soup = await self.request_soup(scrape_item.url) diff --git a/cyberdrop_dl/crawlers/bunkrr.py b/cyberdrop_dl/crawlers/bunkrr.py index a147b25bc..8df41cd75 100644 --- a/cyberdrop_dl/crawlers/bunkrr.py +++ b/cyberdrop_dl/crawlers/bunkrr.py @@ -10,7 +10,7 @@ from aiohttp import ClientConnectorError -from cyberdrop_dl.constants import FILE_FORMATS +from cyberdrop_dl import constants from cyberdrop_dl.crawlers.crawler import Crawler, RateLimit, SupportedPaths, auto_task_id from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL from cyberdrop_dl.exceptions import DDOSGuardError @@ -35,7 +35,6 @@ class Selector: IMAGE_PREVIEW = "img.max-h-full.w-auto.object-cover.relative" -VIDEO_AND_IMAGE_EXTS: set[str] = FILE_FORMATS["Images"] | FILE_FORMATS["Videos"] HOST_OPTIONS: set[str] = {"bunkr.site", "bunkr.cr", "bunkr.ph"} DEEP_SCRAPE_CDNS: set[str] = {"burger", "milkshake"} # CDNs under maintanance, ignore them and try to get a cached URL FILE_KEYS = "id", "name", "original", "slug", "type", "extension", "size", "timestamp", "thumbnail", "cdnEndpoint" @@ -100,7 +99,7 @@ def src(self) -> AbsoluteHttpURL: src_str = self.thumbnail.replace("/thumbs/", "/") ext = Path(self.name).suffix src = parse_url(src_str).with_suffix(ext).with_query(None) - if src.suffix.lower() not in FILE_FORMATS["Images"]: + if src.suffix.lower() not in constants.FileFormats.IMAGE: src = src.with_host(src.host.replace("i-", "")) return _override_cdn(src) @@ -173,10 +172,11 @@ async def _album_file(self, scrape_item: ScrapeItem, file: File, results: dict[s try: src = file.src() except ValueError: - deep_scrape = True + src = None - deep_scrape = deep_scrape or ( - src.suffix.lower() not in VIDEO_AND_IMAGE_EXTS + deep_scrape = ( + not src + or src.suffix.lower() not in constants.FileFormats.VIDEO_OR_IMAGE or "no-image" in src.name or self.deep_scrape or any(cdn in src.host for cdn in DEEP_SCRAPE_CDNS) @@ -185,6 +185,7 @@ async def _album_file(self, scrape_item: ScrapeItem, file: File, results: dict[s self.create_task(self.run(scrape_item)) return + assert src if self.check_album_results(src, results): return diff --git a/cyberdrop_dl/crawlers/coomer.py b/cyberdrop_dl/crawlers/coomer.py index eae9fe8c9..cb8285f4d 100644 --- a/cyberdrop_dl/crawlers/coomer.py +++ b/cyberdrop_dl/crawlers/coomer.py @@ -2,6 +2,7 @@ from typing import ClassVar +from cyberdrop_dl import config from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL from .kemono import KemonoBaseCrawler @@ -16,4 +17,4 @@ class CoomerCrawler(KemonoBaseCrawler): @property def session_cookie(self) -> str: - return self.manager.config_manager.authentication_data.coomer.session + return config.get().auth.coomer.session diff --git a/cyberdrop_dl/crawlers/crawler.py b/cyberdrop_dl/crawlers/crawler.py index 9a62316c0..1fa79341e 100644 --- a/cyberdrop_dl/crawlers/crawler.py +++ b/cyberdrop_dl/crawlers/crawler.py @@ -16,7 +16,7 @@ from aiolimiter import AsyncLimiter from yarl import URL -from cyberdrop_dl import constants, signature +from cyberdrop_dl import config, constants, signature from cyberdrop_dl.clients.scraper_client import ScraperClient from cyberdrop_dl.data_structures.mediaprops import ISO639Subtitle, Resolution from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, MediaItem, ScrapeItem @@ -53,7 +53,7 @@ from rich.progress import TaskID from cyberdrop_dl.clients.response import AbstractResponse - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager OneOrTuple: TypeAlias = T | tuple[T, ...] @@ -264,15 +264,14 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: ... @final @property def allow_no_extension(self) -> bool: - return not self.manager.config_manager.settings_data.ignore_options.exclude_files_with_no_extension + return not config.get().ignore_options.exclude_files_with_no_extension @property def deep_scrape(self) -> bool: - return self.manager.config_manager.deep_scrape + return config.get().runtime_options.deep_scrape def _init_downloader(self) -> Downloader: self.downloader = dl = Downloader(self.manager, self.DOMAIN) - dl.startup() return dl @final @@ -314,7 +313,6 @@ async def run(self, scrape_item: ScrapeItem) -> None: self.waiting_items += 1 async with self._semaphore: - await self.manager.states.RUNNING.wait() self.waiting_items -= 1 og_url = scrape_item.url scrape_item.url = url = self.transform_url(scrape_item.url) @@ -367,11 +365,11 @@ def raise_exc(self, scrape_item: ScrapeItem, exc: type[Exception] | Exception | def new_task_id(self, url: AbsoluteHttpURL) -> Generator[TaskID]: """Creates a new task_id (shows the URL in the UI and logs)""" log(f"Scraping [{self.FOLDER_DOMAIN}]: {url}", 20) - task_id = self.manager.progress_manager.scraping_progress.add_task(url) + task_id = self.manager.progress_manager.scrape.new_task(url) try: yield task_id finally: - self.manager.progress_manager.scraping_progress.remove_task(task_id) + self.manager.progress_manager.scrape.remove_task(task_id) @staticmethod def is_subdomain(url: AbsoluteHttpURL) -> bool: @@ -421,7 +419,7 @@ async def handle_file( if custom_filename: original_filename, filename = filename, custom_filename elif self.DOMAIN in ["cyberdrop"]: - original_filename, filename = remove_file_id(self.manager, filename, ext) + original_filename, filename = remove_file_id(filename, ext) else: original_filename = filename @@ -437,6 +435,7 @@ async def handle_file( ext=ext, ) media_item.debrid_link = debrid_link + media_item.headers = self._get_download_headers(media_item.referer) if metadata is not None: media_item.metadata = metadata await self.handle_media_item(media_item, m3u8) @@ -453,11 +452,11 @@ async def _download(self, media_item: MediaItem, m3u8: m3u8.RenditionGroup | Non await self.__write_to_jsonl(media_item) async def __write_to_jsonl(self, media_item: MediaItem) -> None: - if not self.manager.config.files.dump_json: + if not config.get().files.dump_json: return data = [media_item.as_jsonable_dict()] - await self.manager.log_manager.write_jsonl(data) + await self.manager.logs.write_jsonl(data) async def check_complete(self, url: AbsoluteHttpURL, referer: AbsoluteHttpURL) -> bool: """Checks if this URL has been download before. @@ -468,13 +467,12 @@ async def check_complete(self, url: AbsoluteHttpURL, referer: AbsoluteHttpURL) - check_complete = await self.manager.db_manager.history_table.check_complete(self.DOMAIN, url, referer, db_path) if check_complete: log(f"Skipping {url} as it has already been downloaded", 10) - self.manager.progress_manager.download_progress.add_previously_completed() + self.manager.progress_manager.files.add_previously_completed() return check_complete async def handle_media_item(self, media_item: MediaItem, m3u8: m3u8.RenditionGroup | None = None) -> None: - await self.manager.states.RUNNING.wait() - if media_item.datetime and not isinstance(media_item.datetime, int): - msg = f"Invalid datetime from '{self.FOLDER_DOMAIN}' crawler . Got {media_item.datetime!r}, expected int." + if media_item.timestamp and not isinstance(media_item.timestamp, int): + msg = f"Invalid datetime from '{self.FOLDER_DOMAIN}' crawler . Got {media_item.timestamp!r}, expected int." log(msg, bug=True) check_complete = await self.check_complete(media_item.url, media_item.referer) @@ -484,7 +482,7 @@ async def handle_media_item(self, media_item: MediaItem, m3u8: m3u8.RenditionGro return if await self.check_skip_by_config(media_item): - self.manager.progress_manager.download_progress.add_skipped() + self.manager.progress_manager.files.add_skipped() return self.create_task(self._download(media_item, m3u8)) @@ -493,17 +491,15 @@ async def handle_media_item(self, media_item: MediaItem, m3u8: m3u8.RenditionGro async def check_skip_by_config(self, media_item: MediaItem) -> bool: media_host = media_item.url.host - if (hosts := self.manager.config.ignore_options.skip_hosts) and any(host in media_host for host in hosts): + if (hosts := config.get().ignore_options.skip_hosts) and any(host in media_host for host in hosts): log(f"Download skip {media_item.url} due to skip_hosts config", 10) return True - if (hosts := self.manager.config.ignore_options.only_hosts) and not any(host in media_host for host in hosts): + if (hosts := config.get().ignore_options.only_hosts) and not any(host in media_host for host in hosts): log(f"Download skip {media_item.url} due to only_hosts config", 10) return True - if (regex := self.manager.config.ignore_options.filename_regex_filter) and re.search( - regex, media_item.filename - ): + if (regex := config.get().ignore_options.filename_regex_filter) and re.search(regex, media_item.filename): log(f"Download skip {media_item.url} due to filename regex filter config", 10) return True @@ -522,7 +518,7 @@ async def check_complete_from_referer( downloaded = await self.manager.db_manager.history_table.check_complete_by_referer(domain, url) if downloaded: log(f"Skipping {url} as it has already been downloaded", 10) - self.manager.progress_manager.download_progress.add_previously_completed() + self.manager.progress_manager.files.add_previously_completed() return True return False @@ -535,7 +531,7 @@ async def check_complete_by_hash( if downloaded: url = scrape_item if isinstance(scrape_item, URL) else scrape_item.url log(f"Skipping {url} as its hash ({hash_type}:{hash_value}) has already been downloaded", 10) - self.manager.progress_manager.download_progress.add_previously_completed() + self.manager.progress_manager.files.add_previously_completed() return downloaded async def get_album_results(self, album_id: str) -> dict[str, int]: @@ -571,7 +567,7 @@ def check_album_results(self, url: URL, album_results: dict[str, Any]) -> bool: url_path = self.create_db_path(url) if url_path in album_results and album_results[url_path] != 0: log(f"Skipping {url} as it has already been downloaded") - self.manager.progress_manager.download_progress.add_previously_completed() + self.manager.progress_manager.files.add_previously_completed() return True return False @@ -581,13 +577,13 @@ def create_title(self, title: str, album_id: str | None = None, thread_id: int | title = "Untitled" title = title.strip() - if album_id and self.manager.config.download_options.include_album_id_in_folder_name: + if album_id and config.get().download_options.include_album_id_in_folder_name: title = f"{title} {album_id}" - if thread_id and self.manager.config.download_options.include_thread_id_in_folder_name: + if thread_id and config.get().download_options.include_thread_id_in_folder_name: title = f"{title} {thread_id}" - if not self.manager.config.download_options.remove_domains_from_folder_names: + if not config.get().download_options.remove_domains_from_folder_names: title = f"{title} ({self.FOLDER_DOMAIN})" # Remove double spaces @@ -599,7 +595,7 @@ def create_title(self, title: str, album_id: str | None = None, thread_id: int | @property def separate_posts(self) -> bool: - return self.manager.config.download_options.separate_posts + return config.get().download_options.separate_posts def create_separate_post_title( self, @@ -610,7 +606,7 @@ def create_separate_post_title( ) -> str: if not self.separate_posts: return "" - title_format = self.manager.config.download_options.separate_posts_format + title_format = config.get().download_options.separate_posts_format if title_format.strip().casefold() == "{default}": title_format = self.DEFAULT_POST_TITLE_FORMAT if isinstance(date, int): @@ -758,7 +754,9 @@ async def _web_pager( page_url = self.parse_url(page_url_str, **kwargs) @error_handling_wrapper - async def direct_file(self, scrape_item: ScrapeItem, url: URL | None = None, assume_ext: str | None = None) -> None: + async def direct_file( + self, scrape_item: ScrapeItem, url: AbsoluteHttpURL | None = None, assume_ext: str | None = None + ) -> None: """Download a direct link file. Filename will be the url slug""" link = url or scrape_item.url filename, ext = self.get_filename_and_ext(link.name or link.parent.name, assume_ext=assume_ext) @@ -931,6 +929,12 @@ def handle_subs(self, scrape_item: ScrapeItem, video_filename: str, subtitles: I ) ) + def _get_download_headers(self, referer: AbsoluteHttpURL) -> dict[str, str]: + return { + "User-Agent": config.get().general.user_agent, + "Referer": str(referer), + } + def _make_scrape_mapper_keys(cls: type[Crawler] | Crawler) -> tuple[str, ...]: if cls.SUPPORTED_DOMAINS: @@ -1038,7 +1042,6 @@ def auto_task_id( @wraps(func) async def wrapper(self: _CrawlerT, scrape_item: ScrapeItem, *args: P.args, **kwargs: P.kwargs) -> R: - await self.manager.states.RUNNING.wait() with self.new_task_id(scrape_item.url): result = func(self, scrape_item, *args, **kwargs) if inspect.isawaitable(result): diff --git a/cyberdrop_dl/crawlers/filester.py b/cyberdrop_dl/crawlers/filester.py index d9e3d37d6..110771e13 100644 --- a/cyberdrop_dl/crawlers/filester.py +++ b/cyberdrop_dl/crawlers/filester.py @@ -1,4 +1,4 @@ -from __future__ import annotations # +from __future__ import annotations from typing import TYPE_CHECKING, ClassVar diff --git a/cyberdrop_dl/crawlers/gofile.py b/cyberdrop_dl/crawlers/gofile.py index e06db6166..af7cc0f51 100644 --- a/cyberdrop_dl/crawlers/gofile.py +++ b/cyberdrop_dl/crawlers/gofile.py @@ -238,6 +238,9 @@ async def _get_website_token(self) -> str: raise ScrapeError(401, "Couldn't generate GoFile websiteToken", origin=_GLOBAL_JS_URL) + def _get_download_headers(self, referer: AbsoluteHttpURL) -> dict[str, str]: + return super()._get_download_headers(referer) | self.headers + def _check_node_is_accessible(node: Node) -> TypeGuard[File | Folder]: if (type_ := node["type"]) not in ("file", "folder"): diff --git a/cyberdrop_dl/crawlers/http_direct.py b/cyberdrop_dl/crawlers/http_direct.py index 08f703e0b..d7444db53 100644 --- a/cyberdrop_dl/crawlers/http_direct.py +++ b/cyberdrop_dl/crawlers/http_direct.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, ClassVar -from cyberdrop_dl.constants import FILE_FORMATS +from cyberdrop_dl import constants from cyberdrop_dl.crawlers.crawler import Crawler from cyberdrop_dl.exceptions import NoExtensionError from cyberdrop_dl.utils.utilities import get_filename_and_ext @@ -11,9 +11,6 @@ from cyberdrop_dl.data_structures.url_objects import ScrapeItem -MEDIA_EXTENSIONS = FILE_FORMATS["Images"] | FILE_FORMATS["Videos"] | FILE_FORMATS["Audio"] - - class DirectHttpFile(Crawler, is_generic=True): DOMAIN: ClassVar[str] = "no_crawler" @@ -23,7 +20,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: except NoExtensionError: filename, ext = get_filename_and_ext(scrape_item.url.name, forum=True) - if ext not in MEDIA_EXTENSIONS: + if ext not in constants.FileFormats.MEDIA: raise ValueError scrape_item.add_to_parent_title("Loose Files") diff --git a/cyberdrop_dl/crawlers/kemono.py b/cyberdrop_dl/crawlers/kemono.py index 10a11a5b3..d8d0dde9d 100644 --- a/cyberdrop_dl/crawlers/kemono.py +++ b/cyberdrop_dl/crawlers/kemono.py @@ -12,6 +12,7 @@ from pydantic import BeforeValidator, Field +from cyberdrop_dl import config from cyberdrop_dl.crawlers.crawler import Crawler, SupportedPaths, auto_task_id from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL from cyberdrop_dl.exceptions import NoExtensionError, ScrapeError @@ -215,11 +216,11 @@ def session_cookie(self) -> str: @property def ignore_content(self) -> bool: - return self.manager.config.ignore_options.ignore_coomer_post_content + return config.get().ignore_options.ignore_coomer_post_content @property def ignore_ads(self) -> bool: - return self.manager.config.ignore_options.ignore_coomer_ads + return config.get().ignore_options.ignore_coomer_ads async def async_startup(self) -> None: if getattr(self, "API_ENTRYPOINT", None): @@ -618,7 +619,7 @@ class KemonoCrawler(KemonoBaseCrawler): @property def session_cookie(self) -> str: - return self.manager.config_manager.authentication_data.kemono.session + return config.get().auth.kemono.session def _thumbnail_to_src(og_url: AbsoluteHttpURL) -> AbsoluteHttpURL: diff --git a/cyberdrop_dl/crawlers/megacloud.py b/cyberdrop_dl/crawlers/megacloud.py index b6996c0d9..dd7548ccb 100644 --- a/cyberdrop_dl/crawlers/megacloud.py +++ b/cyberdrop_dl/crawlers/megacloud.py @@ -113,6 +113,9 @@ def parse_subs(): subtitles=tuple(parse_subs()), ) + def _get_download_headers(self, referer: AbsoluteHttpURL) -> dict[str, str]: + return super()._get_download_headers(referer) | {"referer": "https://megacloud.blog/"} + _ISO639_MAP = { "arabic": "ara", diff --git a/cyberdrop_dl/crawlers/odnoklassniki.py b/cyberdrop_dl/crawlers/odnoklassniki.py index de0c7eddb..7e3538066 100644 --- a/cyberdrop_dl/crawlers/odnoklassniki.py +++ b/cyberdrop_dl/crawlers/odnoklassniki.py @@ -147,6 +147,19 @@ async def video(self, scrape_item: ScrapeItem, video_id: str): mobile_url, scrape_item, video_id + ".mp4", custom_filename=filename, debrid_link=cdn_url ) + def _get_download_headers(self, referer: AbsoluteHttpURL) -> dict[str, str]: + return super()._get_download_headers(referer) | { + "Accept-Language": "en-gb, en;q=0.8", + "User-Agent": _CHROME_ANDROID_USER_AGENT, + "Referer": "https://m.ok.ru/", + "Origin": "https://m.ok.ru", + } + + +_CHROME_ANDROID_USER_AGENT: str = ( + "Mozilla/5.0 (Linux; Android 16) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.7204.180 Mobile Safari/537.36" +) + def _get_best_src(metadata: dict[str, Any]) -> tuple[Resolution, str]: def parse(): diff --git a/cyberdrop_dl/crawlers/onedrive.py b/cyberdrop_dl/crawlers/onedrive.py index da9be4401..6d842eb78 100644 --- a/cyberdrop_dl/crawlers/onedrive.py +++ b/cyberdrop_dl/crawlers/onedrive.py @@ -9,6 +9,7 @@ from functools import partial from typing import TYPE_CHECKING, Any, ClassVar, Self +from cyberdrop_dl import cache from cyberdrop_dl.crawlers.crawler import Crawler, SupportedDomains, SupportedPaths from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL from cyberdrop_dl.exceptions import ScrapeError @@ -102,8 +103,8 @@ class OneDriveCrawler(Crawler): FOLDER_DOMAIN: ClassVar[str] = "OneDrive" def __post_init__(self) -> None: - badger_token: str = self.manager.cache_manager.get("onedrive_badger_token") or "" - badger_token_expires: str = self.manager.cache_manager.get("onedrive_badger_token_expires") or "" + badger_token: str = cache.get().get("onedrive_badger_token") or "" + badger_token_expires: str = cache.get().get("onedrive_badger_token_expires") or "" self.auth_headers = {} expired = True if badger_token_expires: @@ -226,8 +227,8 @@ async def get_badger_token(self, badger_url: AbsoluteHttpURL = BADGER_URL) -> No badger_token: str = json_resp["token"] badger_token_expires: str = json_resp["expiryTimeUtc"] self.auth_headers = {"Prefer": "autoredeem", "Authorization": f"Badger {badger_token}"} - self.manager.cache_manager.save("onedrive_badger_token", badger_token) - self.manager.cache_manager.save("onedrive_badger_token_expires", badger_token_expires) + cache.get().save("onedrive_badger_token", badger_token) + cache.get().save("onedrive_badger_token_expires", badger_token_expires) def is_share_link(url: AbsoluteHttpURL) -> bool: diff --git a/cyberdrop_dl/crawlers/pixeldrain.py b/cyberdrop_dl/crawlers/pixeldrain.py index 75f3f9e28..41fb5d796 100644 --- a/cyberdrop_dl/crawlers/pixeldrain.py +++ b/cyberdrop_dl/crawlers/pixeldrain.py @@ -6,7 +6,7 @@ from pydantic import BaseModel -from cyberdrop_dl import env +from cyberdrop_dl import config, env from cyberdrop_dl.crawlers.crawler import Crawler, RateLimit, SupportedDomains, SupportedPaths, auto_task_id from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL from cyberdrop_dl.utils.utilities import error_handling_wrapper @@ -105,7 +105,7 @@ class PixelDrainCrawler(Crawler): def __post_init__(self) -> None: self._headers: dict[str, str] = {} - if api_key := self.manager.auth_config.pixeldrain.api_key: + if api_key := config.get().auth.pixeldrain.api_key: self._headers["Authorization"] = self.manager.client_manager.basic_auth( "Cyberdrop-DL", api_key, @@ -282,3 +282,6 @@ async def _text(self, scrape_item: ScrapeItem, file: File | Node) -> None: scrape_item.add_children() _file_task = auto_task_id(_file) + + def _get_download_headers(self, referer: AbsoluteHttpURL) -> dict[str, str]: + return super()._get_download_headers(referer=referer) | self._headers diff --git a/cyberdrop_dl/crawlers/rumble.py b/cyberdrop_dl/crawlers/rumble.py index 2b8fee7ea..ad782334a 100644 --- a/cyberdrop_dl/crawlers/rumble.py +++ b/cyberdrop_dl/crawlers/rumble.py @@ -42,7 +42,7 @@ class Format(NamedTuple): is_single_file: bool # for formats with the same resolution, give priority to non hls bitrate: int size: int - type: FormatType # On formats where everything else is the same, choose mp4 over webm + type: FormatType # On formats where everything else is the same, choose mp4 over webm url: AbsoluteHttpURL m3u8: m3u8.RenditionGroup | None = None diff --git a/cyberdrop_dl/crawlers/twitter_images.py b/cyberdrop_dl/crawlers/twitter_images.py index 87ef71596..e19408737 100644 --- a/cyberdrop_dl/crawlers/twitter_images.py +++ b/cyberdrop_dl/crawlers/twitter_images.py @@ -42,7 +42,6 @@ async def photo(self, scrape_item: ScrapeItem, url: AbsoluteHttpURL | None = Non await self.handle_file(link, scrape_item, filename, ext) async def handle_media_item(self, media_item: MediaItem, m3u8: m3u8.RenditionGroup | None = None) -> None: - _, *media_item.fallbacks = list(_make_download_urls(media_item.url)) if media_item.referer == media_item.url and media_item.parents: media_item.referer = media_item.parents[0] await super().handle_media_item(media_item, m3u8) diff --git a/cyberdrop_dl/crawlers/wordpress/models.py b/cyberdrop_dl/crawlers/wordpress/models.py index 11f614d8e..b3b90172a 100644 --- a/cyberdrop_dl/crawlers/wordpress/models.py +++ b/cyberdrop_dl/crawlers/wordpress/models.py @@ -7,7 +7,7 @@ from pydantic import AfterValidator, AliasPath, BaseModel, Field from cyberdrop_dl.compat import StrEnum -from cyberdrop_dl.models.base_models import SequenceModel +from cyberdrop_dl.models.base import SequenceModel _ModelT = TypeVar("_ModelT", bound=BaseModel) diff --git a/cyberdrop_dl/data_structures/url_objects.py b/cyberdrop_dl/data_structures/url_objects.py index d4894ebcc..8dc9c3834 100644 --- a/cyberdrop_dl/data_structures/url_objects.py +++ b/cyberdrop_dl/data_structures/url_objects.py @@ -11,14 +11,10 @@ import yarl if TYPE_CHECKING: - from collections.abc import Callable - - import aiohttp from propcache.api import under_cached_property as cached_property - from rich.progress import TaskID from cyberdrop_dl import signature - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager class AbsoluteHttpURL(yarl.URL): @signature.copy(yarl.URL.__new__) @@ -127,30 +123,24 @@ class MediaItem: download_folder: Path filename: str original_filename: str - download_filename: str | None = field(default=None) - filesize: int | None = field(default=None, compare=False) + download_filename: str | None = None + filesize: int | None = None ext: str db_path: str - - debrid_link: AbsoluteHttpURL | None = field(default=None, compare=False) - duration: float | None = field(default=None, compare=False) + debrid_link: AbsoluteHttpURL | None = None + duration: float | None = None is_segment: bool = False - fallbacks: Callable[[aiohttp.ClientResponse, int], AbsoluteHttpURL] | list[AbsoluteHttpURL] | None = field( - default=None, compare=False - ) album_id: str | None = None - datetime: int | None = field(default=None, compare=False) - parents: list[AbsoluteHttpURL] = field(default_factory=list, compare=False) - parent_threads: set[AbsoluteHttpURL] = field(default_factory=set, compare=False) - - current_attempt: int = field(default=0, compare=False) - partial_file: Path = None # type: ignore - complete_file: Path = None # type: ignore - hash: str | None = field(default=None, compare=False) - downloaded: bool = field(default=False, compare=False) - - parent_media_item: MediaItem | None = field(default=None, compare=False) - _task_id: TaskID | None = field(default=None, compare=False) + timestamp: int | None = None + + parents: list[AbsoluteHttpURL] = field(default_factory=list) + parent_threads: set[AbsoluteHttpURL] = field(default_factory=set) + current_attempt: int = field(default=0) + complete_file: Path = field(init=False) + hash: str | None = None + + headers: dict[str, str] = field(default_factory=dict, compare=False) + downloaded: bool = False metadata: object = field(init=False, default_factory=dict, compare=False) def __repr__(self) -> str: @@ -160,10 +150,16 @@ def __post_init__(self) -> None: if self.url.scheme == "metadata": self.db_path = "" + self.complete_file = self.download_folder / self.filename + + @property + def partial_file(self) -> Path: + return self.complete_file.with_suffix(self.complete_file.suffix + ".part") + def datetime_obj(self) -> datetime.datetime | None: - if self.datetime: - assert isinstance(self.datetime, int), f"Invalid {self.datetime =!r} from {self.referer}" - return datetime.datetime.fromtimestamp(self.datetime) + if self.timestamp: + assert isinstance(self.timestamp, int), f"Invalid {self.timestamp =!r} from {self.referer}" + return datetime.datetime.fromtimestamp(self.timestamp, tz=datetime.UTC) @staticmethod def from_item( @@ -189,36 +185,19 @@ def from_item( ext=ext or Path(filename).suffix, original_filename=original_filename or filename, parents=origin.parents.copy(), - datetime=origin.possible_datetime if isinstance(origin, ScrapeItem) else origin.datetime, - parent_media_item=None if isinstance(origin, ScrapeItem) else origin, + timestamp=origin.possible_datetime if isinstance(origin, ScrapeItem) else origin.timestamp, parent_threads=origin.parent_threads.copy(), ) - @property - def task_id(self) -> TaskID | None: - if self.parent_media_item is not None: - return self.parent_media_item.task_id - return self._task_id - - def set_task_id(self, task_id: TaskID | None) -> None: - if self.task_id is not None and task_id is not None: - # We already have a task_id; we can't replace it, only reset it. - # This should never happen. Calling code should always check the value before making a new task. - # We can't silently ignore it either because we will lose any reference to the created task. - raise ValueError("task_id is already set") - if self.parent_media_item is not None: - self.parent_media_item.set_task_id(task_id) - else: - self._task_id = task_id - def as_jsonable_dict(self) -> dict[str, Any]: item = asdict(self) if datetime := self.datetime_obj(): item["datetime"] = datetime item["attempts"] = item.pop("current_attempt") + item["partial_file"] = self.partial_file if self.hash: item["hash"] = f"xxh128:{self.hash}" - for name in ("fallbacks", "_task_id", "is_segment", "parent_media_item"): + for name in ("is_segment",): _ = item.pop(name) return item diff --git a/cyberdrop_dl/director.py b/cyberdrop_dl/director.py index b07a7b136..9c582cbf4 100644 --- a/cyberdrop_dl/director.py +++ b/cyberdrop_dl/director.py @@ -10,26 +10,16 @@ from pathlib import Path from typing import TYPE_CHECKING, ParamSpec, TypeVar -from pydantic import ValidationError - -from cyberdrop_dl import constants, env +from cyberdrop_dl import config, constants, env from cyberdrop_dl.dependencies import browser_cookie3 -from cyberdrop_dl.managers.manager import Manager -from cyberdrop_dl.scraper.scrape_mapper import ScrapeMapper -from cyberdrop_dl.ui.program_ui import ProgramUI +from cyberdrop_dl.managers import Manager +from cyberdrop_dl.scrape_mapper import ScrapeMapper from cyberdrop_dl.utils.apprise import send_apprise_notifications -from cyberdrop_dl.utils.logger import ( - LogHandler, - QueuedLogger, - log, - log_spacer, - log_with_color, -) +from cyberdrop_dl.utils.logger import LogHandler, QueuedLogger, log, log_spacer, log_with_color from cyberdrop_dl.utils.sorting import Sorter from cyberdrop_dl.utils.updates import check_latest_pypi from cyberdrop_dl.utils.utilities import check_partials_and_empty_folders from cyberdrop_dl.utils.webhook import send_webhook_message -from cyberdrop_dl.utils.yaml import handle_validation_error if TYPE_CHECKING: from collections.abc import Callable, Coroutine, Sequence @@ -70,9 +60,6 @@ async def wrapper(*args, **kwargs) -> R | None: @_ui_error_handling_wrapper async def _run_manager(manager: Manager) -> None: - """Runs the program and handles the UI.""" - manager.path_manager.startup() - manager.log_manager.startup() debug_log_file_path = _setup_debug_logger(manager) start_time = manager.start_time _setup_main_logger(manager) @@ -122,20 +109,17 @@ async def _runtime(manager: Manager) -> None: async def _post_runtime(manager: Manager) -> None: """Actions to complete after main runtime, and before ui shutdown.""" log_spacer(20, log_to_console=False) - msg = f"Running Post-Download Processes For Config: {manager.config_manager.loaded_config}" + msg = "Running Post-Download Processes" log_with_color(msg, "green", 20) await manager.hash_manager.hash_client.cleanup_dupes_after_download() - if manager.config_manager.settings_data.sorting.sort_downloads and not manager.parsed_args.cli_only_args.retry_any: + if config.get().sorting.sort_downloads: sorter = Sorter(manager) await sorter.run() check_partials_and_empty_folders(manager) - if manager.config_manager.settings_data.runtime_options.update_last_forum_post: - await manager.log_manager.update_last_forum_post() - def _setup_debug_logger(manager: Manager) -> Path | None: if not env.DEBUG_VAR: @@ -143,7 +127,7 @@ def _setup_debug_logger(manager: Manager) -> Path | None: debug_logger = logging.getLogger("cyberdrop_dl_debug") log_level = 10 - settings_data = manager.config_manager.settings_data + settings_data = config.get() settings_data.runtime_options.log_level = log_level debug_logger.setLevel(log_level) debug_log_file_path = Path(__file__).parents[1] / "cyberdrop_dl_debug.log" @@ -171,46 +155,14 @@ def _setup_debug_logger(manager: Manager) -> Path | None: def _setup_main_logger(manager: Manager) -> None: logger = logging.getLogger("cyberdrop_dl") - file_io = manager.path_manager.main_log.open("w", encoding="utf8") - settings_data = manager.config_manager.settings_data - log_level = settings_data.runtime_options.log_level + file_io = config.get().logs.main_log.open("w", encoding="utf8") + log_level = config.get().runtime_options.log_level logger.setLevel(log_level) - if not manager.parsed_args.cli_only_args.fullscreen_ui: - constants.CONSOLE_LEVEL = settings_data.runtime_options.console_log_level - constants.console_handler = LogHandler(level=constants.CONSOLE_LEVEL) - logger.addHandler(constants.console_handler) - file_handler = LogHandler(level=log_level, file=file_io, width=500) - queued_logger = QueuedLogger(manager, file_handler) - logger.addHandler(queued_logger.handler) - - -def _setup_manager(args: Sequence[str] | None = None) -> Manager: - """Starts the program and returns the manager. - - This will also run the UI for the program - After this function returns, the manager will be ready to use and scraping / downloading can begin. - """ - - manager = Manager(args) - try: - manager.startup() - - if not manager.parsed_args.cli_only_args.download: - ProgramUI(manager) - - except ValidationError as e: - file = { - "GlobalSettings": manager.config_manager.global_settings, - "ConfigSettings": manager.config_manager.settings, - "AuthSettings": manager.config_manager.authentication_settings, - }.get(e.title) - - handle_validation_error(e, file=file) - sys.exit(_C.ERROR) - - return manager + logger.addHandler( + QueuedLogger(manager, LogHandler(level=log_level, file=file_io, width=500)).handler, + ) def _loop_factory() -> asyncio.AbstractEventLoop: @@ -224,7 +176,7 @@ class Director: """Creates a manager and runs it""" def __init__(self, args: Sequence[str] | None = None) -> None: - self.manager = _setup_manager(args) + self.manager: Manager = Manager() def run(self) -> int: return self._run() diff --git a/cyberdrop_dl/downloader/downloader.py b/cyberdrop_dl/downloader/downloader.py index 9911b335a..4da1ae516 100644 --- a/cyberdrop_dl/downloader/downloader.py +++ b/cyberdrop_dl/downloader/downloader.py @@ -3,19 +3,14 @@ import asyncio import contextlib import os -import shutil import subprocess -import sys -from dataclasses import field -from datetime import datetime from functools import wraps from pathlib import Path from typing import TYPE_CHECKING, NamedTuple, ParamSpec, TypeVar from aiohttp import ClientConnectorError, ClientError, ClientResponseError -from cyberdrop_dl import constants -from cyberdrop_dl.data_structures.url_objects import HlsSegment, MediaItem +from cyberdrop_dl import config from cyberdrop_dl.exceptions import ( DownloadError, DurationError, @@ -24,48 +19,23 @@ RestrictedDateRangeError, RestrictedFiletypeError, SkipDownloadError, - TooManyCrawlerErrors, ) -from cyberdrop_dl.utils import aio, ffmpeg +from cyberdrop_dl.utils import aio +from cyberdrop_dl.utils.dates import set_creation_time from cyberdrop_dl.utils.logger import log, log_debug -from cyberdrop_dl.utils.utilities import error_handling_wrapper, parse_url +from cyberdrop_dl.utils.utilities import error_handling_wrapper -# Windows epoch is January 1, 1601. Unix epoch is January 1, 1970 -WIN_EPOCH_OFFSET = 116444736e9 -MAC_OS_SET_FILE = None _VIDEO_HLS_BATCH_SIZE = 10 _AUDIO_HLS_BATCH_SIZE = 50 -# Try to import win32con for Windows constants, fallback to hardcoded values if unavailable -try: - import win32con # type: ignore[reportMissingModuleSource] - - FILE_WRITE_ATTRIBUTES = 256 - OPEN_EXISTING = win32con.OPEN_EXISTING - FILE_ATTRIBUTE_NORMAL = win32con.FILE_ATTRIBUTE_NORMAL - FILE_FLAG_BACKUP_SEMANTICS = win32con.FILE_FLAG_BACKUP_SEMANTICS -except ImportError: - FILE_WRITE_ATTRIBUTES = 256 - OPEN_EXISTING = 3 - FILE_ATTRIBUTE_NORMAL = 128 - FILE_FLAG_BACKUP_SEMANTICS = 33554432 - -if sys.platform == "win32": - from ctypes import byref, windll, wintypes - - -elif sys.platform == "darwin": - # SetFile is non standard in macOS. Only users that have xcode installed will have SetFile - MAC_OS_SET_FILE = shutil.which("SetFile") - - if TYPE_CHECKING: - from collections.abc import Callable, Coroutine, Generator + from collections.abc import Callable, Coroutine from cyberdrop_dl.clients.download_client import DownloadClient - from cyberdrop_dl.managers.manager import Manager - from cyberdrop_dl.utils.m3u8 import M3U8, RenditionGroup + from cyberdrop_dl.data_structures.url_objects import MediaItem + from cyberdrop_dl.managers import Manager + P = ParamSpec("P") R = TypeVar("R") @@ -87,10 +57,8 @@ class SegmentDownloadResult(NamedTuple): def retry(func: Callable[P, Coroutine[None, None, R]]) -> Callable[P, Coroutine[None, None, R]]: - """This function is a wrapper that handles retrying for failed downloads.""" - @wraps(func) - async def wrapper(*args, **kwargs) -> R: + async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: self: Downloader = args[0] media_item: MediaItem = args[1] while True: @@ -100,9 +68,7 @@ async def wrapper(*args, **kwargs) -> R: if not e.retry: raise - self.attempt_task_removal(media_item) - if e.status != 999: - media_item.current_attempt += 1 + media_item.current_attempt += 1 log(f"{self.log_prefix} failed: {media_item.url} with error: {e!s}", 40) if media_item.current_attempt >= self.max_attempts: @@ -118,52 +84,41 @@ async def wrapper(*args, **kwargs) -> R: class Downloader: - def __init__(self, manager: Manager, domain: str) -> None: + def __init__( + self, + config: config.Config, + manager: Manager, + client: DownloadClient, + slots: int, + ) -> None: self.manager: Manager = manager - self.domain: str = domain - self.client: DownloadClient = field(init=False) - self.log_prefix = "Download attempt (unsupported domain)" if domain in GENERIC_CRAWLERS else "Download" + self.config = config + self.client: DownloadClient = client + + self.log_prefix = "Download" self.processed_items: set[str] = set() self.waiting_items = 0 - - self._additional_headers = {} self._current_attempt_filesize: dict[str, int] = {} - self._file_lock_vault = manager.client_manager.file_locks - self._ignore_history = manager.config_manager.settings_data.runtime_options.ignore_history - self._semaphore: asyncio.Semaphore = field(init=False) + self._file_lock_vault: aio.WeakAsyncLocks[str] = aio.WeakAsyncLocks() + self._ignore_history: bool = self.config.runtime_options.ignore_history + self._semaphore: asyncio.Semaphore = asyncio.Semaphore(slots) @property def max_attempts(self): - if self.manager.config_manager.settings_data.download_options.disable_download_attempt_limit: + if self.config.download_options.disable_download_attempt_limit: return 1 - return self.manager.config_manager.global_settings_data.rate_limiting_options.download_attempts - - def startup(self) -> None: - """Starts the downloader.""" - self.client = self.manager.client_manager.download_client - self._semaphore = asyncio.Semaphore(self.manager.client_manager.get_download_slots(self.domain)) - - self.manager.path_manager.download_folder.mkdir(parents=True, exist_ok=True) - if self.manager.config_manager.settings_data.sorting.sort_downloads: - self.manager.path_manager.sorted_folder.mkdir(parents=True, exist_ok=True) - - def update_queued_files(self, increase_total: bool = True): - queued_files = self.manager.progress_manager.file_progress.get_queue_length() - self.manager.progress_manager.download_progress.update_queued(queued_files) - self.manager.progress_manager.download_progress.update_total(increase_total) + return self.config.rate_limiting_options.download_attempts @contextlib.asynccontextmanager - async def _download_context(self, media_item: MediaItem): - await self.manager.states.RUNNING.wait() + async def _limiter(self, media_item: MediaItem): media_item.current_attempt = 0 - await self.client.mark_incomplete(media_item, self.domain) if media_item.is_segment: yield return self.waiting_items += 1 - self.update_queued_files() + await self.client.mark_incomplete(media_item) server = (media_item.debrid_link or media_item.url).host server_limit, domain_limit, global_limit = ( @@ -173,157 +128,36 @@ async def _download_context(self, media_item: MediaItem): ) async with server_limit, domain_limit, global_limit: - await self.manager.states.RUNNING.wait() self.processed_items.add(media_item.db_path) - self.update_queued_files(increase_total=False) self.waiting_items -= 1 yield async def run(self, media_item: MediaItem) -> bool: - """Runs the download loop.""" - if media_item.url.path in self.processed_items and not self._ignore_history: return False - async with self._download_context(media_item): - return await self.start_download(media_item) - - @error_handling_wrapper - async def download_hls(self, media_item: MediaItem, m3u8_group: RenditionGroup) -> None: - if media_item.url.path in self.processed_items and not self._ignore_history: - return - - try: - ffmpeg.check_is_available() - except RuntimeError as e: - msg = f"{e} - ffmpeg and ffprobe are required for HLS downloads" - raise DownloadError("FFmpeg Error", msg, media_item) from None - - async with self._download_context(media_item): - await self._start_hls_download(media_item, m3u8_group) - - async def _start_hls_download(self, media_item: MediaItem, m3u8_group: RenditionGroup) -> None: - media_item.complete_file = media_item.download_folder / media_item.filename - # TODO: register database duration from m3u8 info - # TODO: compute approx size for UI from the m3u8 info - media_item.download_filename = media_item.complete_file.name - await self.manager.db_manager.history_table.add_download_filename(self.domain, media_item) - task_id = self.manager.progress_manager.file_progress.add_task(domain=self.domain, filename=media_item.filename) - media_item.set_task_id(task_id) - video, audio, _subs = await self._download_rendition_group(media_item, m3u8_group) - if not audio: - await asyncio.to_thread(video.rename, media_item.complete_file) - else: - # TODO: add remux method to ffmpeg to create an mkv file instead of mp4 - # Subtitles format may be incompatible with mp4 and they will be silently dropped by ffmpeg - # so we leave them as independent files for now - ffmpeg_result = await ffmpeg.merge((video, audio), media_item.complete_file) - - if not ffmpeg_result.success: - raise DownloadError("FFmpeg Concat Error", ffmpeg_result.stderr, media_item) - - await self.client.process_completed(media_item, self.domain) - await self.client.handle_media_item_completion(media_item, downloaded=True) - await self.finalize_download(media_item, downloaded=True) - - async def _download_rendition_group( - self, media_item: MediaItem, m3u8_group: RenditionGroup - ) -> tuple[Path, Path | None, Path | None]: - async def download(m3u8: M3U8): - assert m3u8.media_type - if not m3u8.segments: - raise DownloadError(204, f"{m3u8.media_type} m3u8 manifest ({m3u8.base_uri}) has no valid segments") - - download_folder = media_item.complete_file.with_suffix(constants.TempExt.HLS) / m3u8.media_type - coros = self._prepare_hls_downloads(media_item, m3u8, download_folder) - n_segmets = len(m3u8.segments) - if n_segmets > 1: - suffix = f".{m3u8.media_type}.ts" - else: - suffix = media_item.complete_file.suffix + parse_url(m3u8.segments[0].absolute_uri).suffix - - output = media_item.complete_file.with_suffix(suffix) - if await asyncio.to_thread(output.is_file): - return output - - batch_size = _VIDEO_HLS_BATCH_SIZE if m3u8.media_type == "video" else _AUDIO_HLS_BATCH_SIZE - tasks_results = await aio.gather(coros, batch_size=batch_size) - n_successful = sum(1 for result in tasks_results if result.downloaded) - - if n_successful != n_segmets: - msg = f"Download of some segments failed. Successful: {n_successful:,}/{n_segmets:,} " - raise DownloadError("HLS Seg Error", msg, media_item) - - seg_paths = [result.item.complete_file for result in tasks_results] - - if n_segmets > 1: - ffmpeg_result = await ffmpeg.concat(seg_paths, output) - if not ffmpeg_result.success: - raise DownloadError("FFmpeg Concat Error", ffmpeg_result.stderr, media_item) - else: - await asyncio.to_thread(seg_paths[0].rename, output) - return output - - audio = subtitles = None - if m3u8_group.subtitle: - try: - subtitles = await download(m3u8_group.subtitle) - except Exception as e: - log(f"Unable to download subtitles for {media_item.url}, Skipping. {e!r}", 40) - else: - log( - f"Found subtitles for {media_item.url}, but CDL is currently unable to merge them. Subtitle were saved at {subtitles} ", - 30, - ) + async with self._limiter(media_item): + if not media_item.is_segment: + log(f"{self.log_prefix} starting: {media_item.url}", 20) - if m3u8_group.audio: - audio = await download(m3u8_group.audio) - video = await download(m3u8_group.video) - return video, audio, subtitles - - def _prepare_hls_downloads( - self, media_item: MediaItem, m3u8: M3U8, download_folder: Path - ) -> list[Coroutine[None, None, SegmentDownloadResult]]: - padding = max(5, len(str(len(m3u8.segments)))) - - def create_segments() -> Generator[HlsSegment]: - for index, segment in enumerate(m3u8.segments, 1): - assert segment.uri - name = f"{index:0{padding}d}{constants.TempExt.HLS}" - yield HlsSegment(segment.title, name, parse_url(segment.absolute_uri)) - - async def download_segment(segment: HlsSegment): - # TODO: segments download should bypass the downloads slots limits. - # They count as a single download - seg_media_item = MediaItem.from_item( - media_item, - segment.url, - media_item.domain, - db_path=media_item.db_path, - download_folder=download_folder, - filename=segment.name, - ext=media_item.ext, - ) - seg_media_item.is_segment = True - return SegmentDownloadResult( - seg_media_item, - await self.start_download(seg_media_item), - ) - - return [download_segment(segment) for segment in create_segments()] + async with self._file_lock_vault[media_item.filename]: + log_debug(f"Lock for {media_item.filename!r} acquired", 20) + try: + return bool(await self.download(media_item)) + finally: + log_debug(f"Lock for {media_item.filename!r} released", 20) async def finalize_download(self, media_item: MediaItem, downloaded: bool) -> None: if downloaded: await asyncio.to_thread(Path.chmod, media_item.complete_file, 0o666) - await self.set_file_datetime(media_item, media_item.complete_file) - self.attempt_task_removal(media_item) - self.manager.progress_manager.download_progress.add_completed() + await _set_file_datetime(media_item, media_item.complete_file) + + self.manager.progress_manager.files.add_completed() log(f"Download finished: {media_item.url}", 20) """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" async def check_file_can_download(self, media_item: MediaItem) -> None: - """Checks if the file can be downloaded.""" await self.manager.storage_manager.check_free_space(media_item) if not self.manager.client_manager.check_allowed_filetype(media_item): raise RestrictedFiletypeError(origin=media_item) @@ -332,135 +166,30 @@ async def check_file_can_download(self, media_item: MediaItem) -> None: if not self.manager.client_manager.check_allowed_date_range(media_item): raise RestrictedDateRangeError(origin=media_item) - async def set_file_datetime(self, media_item: MediaItem, complete_file: Path) -> None: - """Sets the file's datetime.""" - if media_item.is_segment: - return - - if self.manager.config_manager.settings_data.download_options.disable_file_timestamps: - return - if not media_item.datetime: - log(f"Unable to parse upload date for {media_item.url}, using current datetime as file datetime", 30) - return - - # TODO: Make this entire method async (run in another thread) - - # 1. try setting creation date - try: - if sys.platform == "win32": - - def set_win_time(): - nano_ts: float = media_item.datetime * 1e7 # Windows uses nano seconds for dates - timestamp = int(nano_ts + WIN_EPOCH_OFFSET) - - # Windows dates are 64bits, split into 2 32bits unsigned ints (dwHighDateTime , dwLowDateTime) - # XOR to get the date as bytes, then shift to get the first 32 bits (dwHighDateTime) - ctime = wintypes.FILETIME(timestamp & 0xFFFFFFFF, timestamp >> 32) - access_mode = FILE_WRITE_ATTRIBUTES - sharing_mode = 0 # Exclusive access - security_mode = None # Use default security attributes - creation_disposition = OPEN_EXISTING - - # FILE_FLAG_BACKUP_SEMANTICS allows access to directories - flags = FILE_ATTRIBUTE_NORMAL | FILE_FLAG_BACKUP_SEMANTICS - template_file = None - - params = ( - access_mode, - sharing_mode, - security_mode, - creation_disposition, - flags, - template_file, - ) - - handle = windll.kernel32.CreateFileW(str(complete_file), *params) - windll.kernel32.SetFileTime( - handle, - byref(ctime), # Creation time - None, # Access time - None, # Modification time - ) - windll.kernel32.CloseHandle(handle) - - await asyncio.to_thread(set_win_time) - - elif sys.platform == "darwin" and MAC_OS_SET_FILE: - date_string = datetime.fromtimestamp(media_item.datetime).strftime("%m/%d/%Y %H:%M:%S") - cmd = ["-d", date_string, complete_file] - process = await asyncio.subprocess.create_subprocess_exec( - MAC_OS_SET_FILE, *cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL - ) - _ = await process.wait() - - except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError, ValueError): - pass - - # 2. try setting modification and access date - try: - await asyncio.to_thread(os.utime, complete_file, (media_item.datetime, media_item.datetime)) - except OSError: - pass - - def attempt_task_removal(self, media_item: MediaItem) -> None: - """Attempts to remove the task from the progress bar.""" - if media_item.is_segment: - return - if media_item.task_id is not None: - try: - self.manager.progress_manager.file_progress.remove_task(media_item.task_id) - except ValueError: - pass - - media_item.set_task_id(None) - - """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - async def start_download(self, media_item: MediaItem) -> bool: - try: - self.client.client_manager.check_domain_errors(self.domain) - except TooManyCrawlerErrors: - return False - - if not media_item.is_segment: - log(f"{self.log_prefix} starting: {media_item.url}", 20) - - async with self._file_lock_vault[media_item.filename]: - log_debug(f"Lock for {media_item.filename} acquired", 20) - try: - return bool(await self.download(media_item)) - finally: - log_debug(f"Lock for {media_item.filename} released", 20) - @error_handling_wrapper @retry async def download(self, media_item: MediaItem) -> bool | None: - """Downloads the media item.""" - url_as_str = str(media_item.url) - if url_as_str in KNOWN_BAD_URLS: - raise DownloadError(KNOWN_BAD_URLS[url_as_str]) try: - await self.manager.states.RUNNING.wait() - self.client.client_manager.check_domain_errors(self.domain) - media_item.current_attempt = media_item.current_attempt or 1 if not media_item.is_segment: - media_item.duration = await self.manager.db_manager.history_table.get_duration(self.domain, media_item) + media_item.duration = await self.manager.db_manager.history_table.get_duration( + media_item.domain, media_item + ) await self.check_file_can_download(media_item) - downloaded = await self.client.download_file(self.domain, media_item) + + downloaded = await self.client.download_file(media_item.domain, media_item) if downloaded: await asyncio.to_thread(Path.chmod, media_item.complete_file, 0o666) if not media_item.is_segment: - await self.set_file_datetime(media_item, media_item.complete_file) - self.manager.progress_manager.download_progress.add_completed() + await _set_file_datetime(media_item, media_item.complete_file) + self.manager.progress_manager.files.add_completed() log(f"Download finished: {media_item.url}", 20) - self.attempt_task_removal(media_item) + return downloaded except SkipDownloadError as e: if not media_item.is_segment: log(f"Download skip {media_item.url}: {e}", 10) - self.manager.progress_manager.download_progress.add_skipped() - self.attempt_task_removal(media_item) + self.manager.progress_manager.files.add_skipped() except (DownloadError, ClientResponseError, InvalidContentTypeError): raise @@ -474,13 +203,6 @@ async def download(self, media_item: MediaItem) -> bool | None: ClientConnectorError, ) as e: ui_message = getattr(e, "status", type(e).__name__) - if size := await aio.get_size(media_item.partial_file): - if self._current_attempt_filesize.get(media_item.filename, 0) >= size: - raise DownloadError(ui_message, message=f"{self.log_prefix} failed", retry=True) from None - - self._current_attempt_filesize[media_item.filename] = size - raise DownloadError(status=999, message="Download timeout reached, retrying", retry=True) from None - message = str(e) raise DownloadError(ui_message, message, retry=True) from e @@ -490,9 +212,33 @@ def write_download_error( error_log_msg: ErrorLogMessage, exc_info: Exception | None = None, ) -> None: - self.attempt_task_removal(media_item) full_message = f"{self.log_prefix} Failed: {media_item.url} ({error_log_msg.main_log_msg}) \n -> Referer: {media_item.referer}" log(full_message, 40, exc_info=exc_info) - self.manager.log_manager.write_download_error_log(media_item, error_log_msg.csv_log_msg) - self.manager.progress_manager.download_stats_progress.add_failure(error_log_msg.ui_failure) - self.manager.progress_manager.download_progress.add_failed() + self.manager.logs.write_download_error_log(media_item, error_log_msg.csv_log_msg) + self.manager.progress_manager.download_errors.add_failure(error_log_msg.ui_failure) + self.manager.progress_manager.files.add_failed() + + +async def _set_file_datetime(media_item: MediaItem, complete_file: Path) -> None: + if media_item.is_segment: + return + + if config.get().download_options.disable_file_timestamps: + return + + if not media_item.timestamp: + log(f"Unable to parse upload date for {media_item.url}, using current datetime as file datetime", 30) + return + + # 1. try setting creation date + try: + await set_creation_time(media_item.complete_file, media_item.timestamp) + + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError, ValueError): + pass + + # 2. try setting modification and access date + try: + await asyncio.to_thread(os.utime, complete_file, (media_item.timestamp, media_item.timestamp)) + except OSError: + pass diff --git a/cyberdrop_dl/downloader/mega_nz.py b/cyberdrop_dl/downloader/mega_nz.py index 73f47b330..e934c270d 100644 --- a/cyberdrop_dl/downloader/mega_nz.py +++ b/cyberdrop_dl/downloader/mega_nz.py @@ -17,7 +17,7 @@ from yarl import URL from cyberdrop_dl.data_structures.url_objects import MediaItem - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager class MegaDownloadClient(DownloadClient): @@ -40,7 +40,6 @@ async def _append_content(self, media_item: MediaItem, content: aiohttp.StreamRe async with aiofiles.open(media_item.partial_file, mode="ab") as f: for _, chunk_size in get_chunks(file_size): - await self.manager.states.RUNNING.wait() raw_chunk = await content.readexactly(chunk_size) chunk = chunk_decryptor.read(raw_chunk) await check_free_space() @@ -48,7 +47,7 @@ async def _append_content(self, media_item: MediaItem, content: aiohttp.StreamRe await self.client_manager.speed_limiter.acquire(chunk_size) await f.write(chunk) - self.manager.progress_manager.file_progress.advance_file(media_item.task_id, chunk_size) + self.manager.progress_manager.downloads.advance_file(media_item.task_id, chunk_size) check_download_speed() await self._post_download_check(media_item) diff --git a/cyberdrop_dl/env.py b/cyberdrop_dl/env.py index 2eeed1476..ee54532bb 100644 --- a/cyberdrop_dl/env.py +++ b/cyberdrop_dl/env.py @@ -1,5 +1,4 @@ import os -from hashlib import sha256 os.environ["PYDANTIC_ERRORS_INCLUDE_URL"] = "0" RUNNING_IN_IDE = bool(os.getenv("PYCHARM_HOSTED") or os.getenv("TERM_PROGRAM") == "vscode") @@ -7,9 +6,7 @@ os.getenv("TERMUX_VERSION") or os.getenv("TERMUX_MAIN_PACKAGE_FORMAT") or "com.termux" in os.getenv("$PREFIX", "") ) PORTRAIT_MODE = bool(RUNNING_IN_TERMUX or os.getenv("CDL_PORTRAIT_MODE")) -ENABLE_DEBUG_CRAWLERS = os.getenv("CDL_ENABLE_DEBUG_CRAWLERS") -if ENABLE_DEBUG_CRAWLERS: - ENABLE_DEBUG_CRAWLERS = sha256(ENABLE_DEBUG_CRAWLERS.encode("utf-8")).hexdigest() + DEBUG_LOG_FOLDER = os.getenv("CDL_DEBUG_LOG_FOLDER") PROFILING = os.getenv("CDL_PROFILING") diff --git a/cyberdrop_dl/exceptions.py b/cyberdrop_dl/exceptions.py index 06fb0fb22..493871b3f 100644 --- a/cyberdrop_dl/exceptions.py +++ b/cyberdrop_dl/exceptions.py @@ -5,12 +5,9 @@ from pathlib import Path from typing import TYPE_CHECKING -from yaml import YAMLError -from yarl import URL - -from cyberdrop_dl.constants import VALIDATION_ERROR_FOOTER - if TYPE_CHECKING: + from yarl import URL + from cyberdrop_dl.data_structures.url_objects import MediaItem, ScrapeItem @@ -211,6 +208,8 @@ class JDownloaderError(CDLBaseError): class InvalidYamlError(CDLBaseError): def __init__(self, file: Path, e: Exception) -> None: """This error will be thrown when a yaml config file has invalid values.""" + from yaml import YAMLError + file_path = file.resolve() ui_failure = "Invalid YAML" msg = f"Unable to read file '{file_path}'" @@ -221,8 +220,7 @@ def __init__(self, file: Path, e: Exception) -> None: msg += f"\n\nThe error was found in this line: \n {mark}" problem = getattr(e, "problem", str(e)) - msg += f"\n\n{problem.capitalize()}" - msg += f"\n\n{VALIDATION_ERROR_FOOTER}" + msg += f"\n\n{problem.capitalize()}\n\nPlease delete the file or fix the errors" super().__init__(ui_failure, message=msg, origin=file) @@ -244,6 +242,8 @@ def create_error_msg(error: int | str) -> str: def get_origin(origin: ScrapeItem | Path | MediaItem | URL | None = None) -> Path | URL | None: + from yarl import URL + if origin and not isinstance(origin, URL | Path): return origin.parents[0] if origin.parents else None return origin diff --git a/cyberdrop_dl/managers/__init__.py b/cyberdrop_dl/managers/__init__.py index e69de29bb..f0d7ba3a4 100644 --- a/cyberdrop_dl/managers/__init__.py +++ b/cyberdrop_dl/managers/__init__.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +import asyncio +import json +import logging +from dataclasses import field +from time import perf_counter +from typing import TYPE_CHECKING, NamedTuple + +from cyberdrop_dl import __version__, appdata, config, constants +from cyberdrop_dl.database import Database +from cyberdrop_dl.managers.client_manager import ClientManager +from cyberdrop_dl.managers.hash_manager import HashManager +from cyberdrop_dl.managers.live_manager import LiveManager +from cyberdrop_dl.managers.log_manager import LogManager +from cyberdrop_dl.managers.storage_manager import StorageManager +from cyberdrop_dl.progress import ProgressManager +from cyberdrop_dl.utils import ffmpeg +from cyberdrop_dl.utils.logger import LogHandler, QueuedLogger +from cyberdrop_dl.utils.utilities import close_if_defined, get_system_information + +if TYPE_CHECKING: + from asyncio import TaskGroup + from pathlib import Path + + from cyberdrop_dl.data_structures.url_objects import MediaItem + from cyberdrop_dl.scrape_mapper import ScrapeMapper + + +class AsyncioEvents(NamedTuple): + SHUTTING_DOWN: asyncio.Event + RUNNING: asyncio.Event + + +logger = logging.getLogger(__name__) + + +class Manager: + def __init__(self) -> None: + self.hash_manager: HashManager = field(init=False) + self.db_manager: Database = field(init=False) + self.client_manager: ClientManager = field(init=False) + self.storage_manager: StorageManager = field(init=False) + + self.progress_manager: ProgressManager = ProgressManager(self, portrait=False) + self.live_manager: LiveManager = field(init=False) + + self.task_group: TaskGroup = asyncio.TaskGroup() + self.scrape_mapper: ScrapeMapper = field(init=False) + + self.start_time: float = perf_counter() + self.loggers: dict[str, QueuedLogger] = {} + self.states: AsyncioEvents + + constants.console_handler = LogHandler(level=constants.CONSOLE_LEVEL) + + self.logs: LogManager = LogManager(config.get(), self.task_group) + log_app_state() + self._completed_downloads: set[MediaItem] = set() + self._completed_downloads_paths: set[Path] = set() + self._prev_downloads: set[MediaItem] = set() + self._prev_downloads_paths: set[Path] = set() + + def add_completed(self, media_item: MediaItem) -> None: + if media_item.is_segment: + return + self._completed_downloads.add(media_item) + self._completed_downloads_paths.add(media_item.complete_file) + + def add_prev(self, media_item: MediaItem) -> None: + self._prev_downloads.add(media_item) + self._prev_downloads_paths.add(media_item.complete_file) + + @property + def completed_downloads(self) -> set[MediaItem]: + return self._completed_downloads + + @property + def prev_downloads(self) -> set[MediaItem]: + return self._prev_downloads + + """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" + + async def async_startup(self) -> None: + """Async startup process for the manager.""" + self.states = AsyncioEvents(asyncio.Event(), asyncio.Event()) + self.client_manager = ClientManager(self) + await self.client_manager.startup() + self.storage_manager = StorageManager(self) + + await self.async_db_hash_startup() + + constants.MAX_NAME_LENGTHS["FILE"] = config.get().general.max_file_name_length + constants.MAX_NAME_LENGTHS["FOLDER"] = config.get().general.max_folder_name_length + + async def async_db_hash_startup(self) -> None: + self.db_manager = Database( + appdata.get().db_file, + config.get().runtime_options.ignore_history, + ) + await self.db_manager.startup() + self.hash_manager = HashManager(self) + self.live_manager = LiveManager(self) + + async def async_db_close(self) -> None: + "Partial shutdown for managers used for hash directory scanner" + self.db_manager = await close_if_defined(self.db_manager) + self.hash_manager = constants.NOT_DEFINED + + async def close(self) -> None: + """Closes the manager.""" + self.states.RUNNING.clear() + + await self.async_db_close() + + self.client_manager = await close_if_defined(self.client_manager) + self.storage_manager = await close_if_defined(self.storage_manager) + + while self.loggers: + _, queued_logger = self.loggers.popitem() + queued_logger.stop() + + +def log_app_state() -> None: + auth = {} + + config_ = config.get() + app_data = appdata.get() + for site, auth_entries in config_.auth.model_dump().items(): # pyright: ignore[reportAny] + auth[site] = all(auth_entries.values()) # pyright: ignore[reportAny] + + # f"Using Input File: {self.path_manager.input_file}", + stats = dict( # noqa: C408 + version=__version__, + system=get_system_information(), + ffmpeg=ffmpeg.get_ffmpeg_version(), + ffprobe=ffmpeg.get_ffprobe_version(), + database=app_data.db_file, + config_file=config_.source, + auth=auth, + config=config_.model_dump_json(indent=2, exclude={"auth"}), + ) + logger.debug(json.dumps(stats, indent=2, ensure_ascii=False)) diff --git a/cyberdrop_dl/managers/cache_manager.py b/cyberdrop_dl/managers/cache_manager.py deleted file mode 100644 index 104fcab37..000000000 --- a/cyberdrop_dl/managers/cache_manager.py +++ /dev/null @@ -1,59 +0,0 @@ -from __future__ import annotations - -from dataclasses import field -from typing import TYPE_CHECKING, Any - -from cyberdrop_dl import __version__ as current_version -from cyberdrop_dl.utils import yaml - -if TYPE_CHECKING: - from pathlib import Path - - from cyberdrop_dl.managers.manager import Manager - - -class CacheManager: - def __init__(self, manager: Manager) -> None: - self.manager = manager - self.cache_file: Path = field(init=False) - self._cache: dict[str, Any] = {} - - def startup(self, cache_file: Path) -> None: - """Ensures that the cache file exists.""" - self.cache_file = cache_file - if not self.cache_file.is_file(): - self.save("default_config", "Default") - - self.load() - if self.manager.parsed_args.cli_only_args.appdata_folder: - self.save("first_startup_completed", True) - - def load(self) -> None: - """Loads the cache file into memory.""" - self._cache = yaml.load(self.cache_file) - - def load_request_cache(self) -> None: - return - - def get(self, key: str) -> Any: - """Returns the value of a key in the cache.""" - return self._cache.get(key, None) - - def save(self, key: str, value: Any) -> None: - """Saves a key and value to the cache.""" - self._cache[key] = value - yaml.save(self.cache_file, self._cache) - - def dump(self, data: dict[str, Any]) -> None: - """dumps the dictionary into the cache""" - self._cache = data - yaml.save(self.cache_file, self._cache) - - def remove(self, key: str) -> None: - """Removes a key from the cache.""" - if key in self._cache: - del self._cache[key] - yaml.save(self.cache_file, self._cache) - - async def close(self) -> None: - self.save("version", current_version) diff --git a/cyberdrop_dl/managers/client_manager.py b/cyberdrop_dl/managers/client_manager.py index 7bf752250..f88fceb9c 100644 --- a/cyberdrop_dl/managers/client_manager.py +++ b/cyberdrop_dl/managers/client_manager.py @@ -14,24 +14,20 @@ from aiohttp import ClientResponse, ClientSession from aiolimiter import AsyncLimiter -from cyberdrop_dl import constants, ddos_guard, env +from cyberdrop_dl import appdata, config, constants, ddos_guard, env from cyberdrop_dl.clients.download_client import DownloadClient from cyberdrop_dl.clients.flaresolverr import FlareSolverr from cyberdrop_dl.clients.response import AbstractResponse from cyberdrop_dl.clients.scraper_client import ScraperClient +from cyberdrop_dl.cookies import get_cookies_from_browsers, read_netscape_files from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, MediaItem from cyberdrop_dl.exceptions import DDOSGuardError, DownloadError, ScrapeError, TooManyCrawlerErrors -from cyberdrop_dl.ui.prompts.user_prompts import get_cookies_from_browsers -from cyberdrop_dl.utils.aio import WeakAsyncLocks -from cyberdrop_dl.utils.cookie_management import read_netscape_files +from cyberdrop_dl.managers import Manager from cyberdrop_dl.utils.ffmpeg import probe from cyberdrop_dl.utils.logger import log, log_debug, log_spacer -_VALID_EXTENSIONS = ( - constants.FILE_FORMATS["Images"] | constants.FILE_FORMATS["Videos"] | constants.FILE_FORMATS["Audio"] -) - if TYPE_CHECKING: + from asyncio.locks import Semaphore from collections.abc import Callable, Generator, Iterable, Mapping from http.cookies import BaseCookie @@ -39,7 +35,7 @@ from curl_cffi.requests import AsyncSession from curl_cffi.requests.models import Response as CurlResponse - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager _curl_import_error = None try: @@ -60,13 +56,13 @@ if TYPE_CHECKING: - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager _null_context = contextlib.nullcontext() class DownloadSpeedLimiter(AsyncLimiter): - __slots__ = (*AsyncLimiter.__slots__, "chunk_size") + __slots__ = ("chunk_size",) def __init__(self, speed_limit: int) -> None: self.chunk_size: int = 1024 * 1024 * 10 # 10MB @@ -82,61 +78,43 @@ async def acquire(self, amount: float | None = None) -> None: await super().acquire(amount) def __repr__(self): - return f"{self.__class__.__name__}(speed_limit={self.max_rate}, chunk_size={self.chunk_size})" - - -class DDosGuard: - TITLES = ("Just a moment...", "DDoS-Guard") - SELECTORS = ( - "#cf-challenge-running", - ".ray_id", - ".attack-box", - "#cf-please-wait", - "#challenge-spinner", - "#trk_jschal_js", - "#turnstile-wrapper", - ".lds-ring", - ) - ALL_SELECTORS = ", ".join(SELECTORS) - - -class CloudflareTurnstile: - TITLES = ("Simpcity Cuck Detection", "Attention Required! | Cloudflare", "Sentinel CAPTCHA") - SELECTORS = ( - "captchawrapper", - "cf-turnstile", - "script[src*='challenges.cloudflare.com/turnstile']", - "script:-soup-contains('Dont open Developer Tools')", - ) - ALL_SELECTORS = ", ".join(SELECTORS) + return f"{type(self).__name__}(speed_limit={self.max_rate!r}, chunk_size={self.chunk_size!r})" + + +def _create_ssl(): + ssl_context = config.get().general.ssl_context + + if not ssl_context: + return False + + if ssl_context == "certifi": + return ssl.create_default_context(cafile=certifi.where()) + if ssl_context == "truststore": + return truststore.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + + ctx = truststore.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + ctx.load_verify_locations(cafile=certifi.where()) + return ctx class ClientManager: """Creates a 'client' that can be referenced by scraping or download sessions.""" def __init__(self, manager: Manager) -> None: - self.manager = manager - ssl_context = self.manager.global_config.general.ssl_context - if not ssl_context: - self.ssl_context = False - elif ssl_context == "certifi": - self.ssl_context = ssl.create_default_context(cafile=certifi.where()) - elif ssl_context == "truststore": - self.ssl_context = truststore.SSLContext(ssl.PROTOCOL_TLS_CLIENT) - elif ssl_context == "truststore+certifi": - self.ssl_context = ctx = truststore.SSLContext(ssl.PROTOCOL_TLS_CLIENT) - ctx.load_verify_locations(cafile=certifi.where()) - - self.cookies = aiohttp.CookieJar(quote_cookie=False) + self.manager: Manager = manager + self.ssl_context: ssl.SSLContext | Literal[False] = _create_ssl() + self.cookies: aiohttp.CookieJar = aiohttp.CookieJar(quote_cookie=False) self.rate_limits: dict[str, AsyncLimiter] = {} self.download_slots: dict[str, int] = {} - self.global_rate_limiter = AsyncLimiter(self.rate_limiting_options.rate_limit, 1) - self.global_download_slots = asyncio.Semaphore(self.rate_limiting_options.max_simultaneous_downloads) - self.scraper_client = ScraperClient(self) - self.speed_limiter = DownloadSpeedLimiter(self.rate_limiting_options.download_speed_limit) - self.download_client = DownloadClient(manager, self) - self.flaresolverr = FlareSolverr(manager) - self.file_locks: WeakAsyncLocks[str] = WeakAsyncLocks() + + rate_limits = config.get().rate_limiting_options + + self.global_rate_limiter: AsyncLimiter = AsyncLimiter(rate_limits.rate_limit, 1) + self.global_download_slots: Semaphore = asyncio.Semaphore(rate_limits.max_simultaneous_downloads) + self.scraper_client: ScraperClient = ScraperClient(self) + self.speed_limiter: DownloadSpeedLimiter = DownloadSpeedLimiter(rate_limits.download_speed_limit) + self.download_client: DownloadClient = DownloadClient(manager, self) + self.flaresolverr: FlareSolverr = FlareSolverr(manager) self._session: aiohttp.ClientSession self._download_session: aiohttp.ClientSession @@ -167,7 +145,7 @@ async def __aexit__(self, *args) -> None: @property def rate_limiting_options(self): - return self.manager.global_config.rate_limiting_options + return config.get().rate_limiting_options def get_download_slots(self, domain: str) -> int: """Returns the download limit for a domain.""" @@ -196,15 +174,17 @@ def basic_auth(username: str, password: str) -> str: def check_allowed_filetype(self, media_item: MediaItem) -> bool: """Checks if the file type is allowed to download.""" - ignore_options = self.manager.config_manager.settings_data.ignore_options + ignore_options = config.get().ignore_options + ext = media_item.ext.lower() - if media_item.ext.lower() in constants.FILE_FORMATS["Images"] and ignore_options.exclude_images: + if ext in constants.FileFormats.IMAGE and ignore_options.exclude_images: return False - if media_item.ext.lower() in constants.FILE_FORMATS["Videos"] and ignore_options.exclude_videos: + if ext in constants.FileFormats.VIDEO and ignore_options.exclude_videos: return False - if media_item.ext.lower() in constants.FILE_FORMATS["Audio"] and ignore_options.exclude_audio: + if ext in constants.FileFormats.AUDIO and ignore_options.exclude_audio: return False - return not (ignore_options.exclude_other and media_item.ext.lower() not in _VALID_EXTENSIONS) + + return ext in constants.FileFormats.MEDIA or not ignore_options.exclude_other def check_allowed_date_range(self, media_item: MediaItem) -> bool: """Checks if the file was uploaded within the config date range""" @@ -213,7 +193,7 @@ def check_allowed_date_range(self, media_item: MediaItem) -> bool: return True item_date = datetime.date() - ignore_options = self.manager.config_manager.settings_data.ignore_options + ignore_options = config.get().ignore_options if ignore_options.exclude_before and item_date < ignore_options.exclude_before: return False @@ -233,7 +213,7 @@ def filter_cookies_by_word_in_domain(self, word: str) -> Iterable[tuple[str, Bas async def startup(self) -> None: await _set_dns_resolver() - def new_curl_cffi_session(self) -> AsyncSession: + def new_curl_cffi_session(self) -> AsyncSession[CurlResponse]: # Calling code should have validated if curl is actually available import warnings @@ -247,7 +227,7 @@ def new_curl_cffi_session(self) -> AsyncSession: warnings.filterwarnings("ignore", category=CurlCffiWarning) acurl = AsyncCurl(loop=loop) - proxy_or_none = str(proxy) if (proxy := self.manager.global_config.general.proxy) else None + proxy_or_none = str(proxy) if (proxy := config.get().general.proxy) else None return AsyncSession( loop=loop, @@ -262,23 +242,21 @@ def new_curl_cffi_session(self) -> AsyncSession: def new_scrape_session(self) -> ClientSession: trace_configs = _create_request_log_hooks("scrape") - return self._new_session(cached=True, trace_configs=trace_configs) + return self._new_session(trace_configs=trace_configs) def new_download_session(self) -> ClientSession: trace_configs = _create_request_log_hooks("download") - return self._new_session(cached=False, trace_configs=trace_configs) + return self._new_session(trace_configs=trace_configs) - def _new_session( - self, cached: bool = False, trace_configs: list[aiohttp.TraceConfig] | None = None - ) -> ClientSession: + def _new_session(self, trace_configs: list[aiohttp.TraceConfig] | None = None) -> ClientSession: timeout = self.rate_limiting_options._aiohttp_timeout return ClientSession( - headers={"user-agent": self.manager.global_config.general.user_agent}, + headers={"user-agent": config.get().general.user_agent}, raise_for_status=False, cookie_jar=self.cookies, timeout=timeout, trace_configs=trace_configs, - proxy=self.manager.global_config.general.proxy, + proxy=config.get().general.proxy, connector=self._new_tcp_connector(), requote_redirect_url=False, ) @@ -315,14 +293,14 @@ def request_context(self, domain: str) -> Generator[None]: pass async def load_cookie_files(self) -> None: - if self.manager.config_manager.settings_data.browser_cookies.auto_import: - assert self.manager.config_manager.settings_data.browser_cookies.browser - get_cookies_from_browsers( - self.manager, browser=self.manager.config_manager.settings_data.browser_cookies.browser - ) - cookie_files = sorted(self.manager.path_manager.cookies_dir.glob("*.txt")) + if config.get().browser_cookies.auto_import: + assert config.get().browser_cookies.browser + get_cookies_from_browsers(self.manager, browser=config.get().browser_cookies.browser) + + cookie_files = sorted(appdata.get().cookies_dir.glob("*.txt")) if not cookie_files: return + async for domain, cookie in read_netscape_files(cookie_files): self.cookies.update_cookies(cookie, response_url=AbsoluteHttpURL(f"https://{domain}")) @@ -392,12 +370,12 @@ async def check_file_duration(self, media_item: MediaItem) -> bool: if media_item.is_segment: return True - is_video = media_item.ext.lower() in constants.FILE_FORMATS["Videos"] - is_audio = media_item.ext.lower() in constants.FILE_FORMATS["Audio"] + is_video = media_item.ext.lower() in constants.FileFormats.VIDEO + is_audio = media_item.ext.lower() in constants.FileFormats.AUDIO if not (is_video or is_audio): return True - duration_limits = self.manager.config.media_duration_limits + duration_limits = config.get().media_duration_limits min_video_duration: float = duration_limits.minimum_video_duration.total_seconds() max_video_duration: float = duration_limits.maximum_video_duration.total_seconds() min_audio_duration: float = duration_limits.minimum_audio_duration.total_seconds() diff --git a/cyberdrop_dl/managers/config_manager.py b/cyberdrop_dl/managers/config_manager.py deleted file mode 100644 index 23981a74a..000000000 --- a/cyberdrop_dl/managers/config_manager.py +++ /dev/null @@ -1,216 +0,0 @@ -from __future__ import annotations - -import os -import shutil -from dataclasses import field -from time import sleep -from typing import TYPE_CHECKING - -from cyberdrop_dl.config import AuthSettings, ConfigSettings, GlobalSettings -from cyberdrop_dl.exceptions import InvalidYamlError -from cyberdrop_dl.managers.log_manager import LogManager -from cyberdrop_dl.utils import yaml -from cyberdrop_dl.utils.apprise import get_apprise_urls - -if TYPE_CHECKING: - from pathlib import Path - - from pydantic import BaseModel - - from cyberdrop_dl.managers.manager import Manager - from cyberdrop_dl.utils.apprise import AppriseURL - - -class ConfigManager: - def __init__(self, manager: Manager) -> None: - self.manager = manager - self.loaded_config: str = "" - - self.authentication_settings: Path = field(init=False) - self.settings: Path = field(init=False) - self.global_settings: Path = field(init=False) - self.deep_scrape: bool = False - self.apprise_urls: list[AppriseURL] = [] - - self.authentication_data: AuthSettings = field(init=False) - self.settings_data: ConfigSettings = field(init=False) - self.global_settings_data: GlobalSettings = field(init=False) - self.pydantic_config: str | None = None - - def startup(self) -> None: - """Startup process for the config manager.""" - self.loaded_config = self.get_loaded_config() - self.settings = self.manager.path_manager.config_folder / self.loaded_config / "settings.yaml" - self.global_settings = self.manager.path_manager.config_folder / "global_settings.yaml" - self.authentication_settings = self.manager.path_manager.config_folder / "authentication.yaml" - auth_override = self.manager.path_manager.config_folder / self.loaded_config / "authentication.yaml" - - if auth_override.is_file(): - self.authentication_settings = auth_override - - self.settings.parent.mkdir(parents=True, exist_ok=True) - self.pydantic_config = self.manager.cache_manager.get("pydantic_config") - self.load_configs() - - def get_loaded_config(self): - return self.loaded_config or self.get_default_config() - - def get_default_config(self) -> str: - return self.manager.cache_manager.get("default_config") or "Default" - - def load_configs(self) -> None: - """Loads all the configs.""" - self._load_authentication_config() - self._load_global_settings_config() - self._load_settings_config() - self.apprise_file = self.manager.path_manager.config_folder / self.loaded_config / "apprise.txt" - self.apprise_urls = get_apprise_urls(file=self.apprise_file) - self._set_apprise_fixed() - self._set_pydantic_config() - - @staticmethod - def get_model_fields(model: BaseModel, *, exclude_unset: bool = True) -> set[str]: - fields = set() - default_dict: dict = model.model_dump(exclude_unset=exclude_unset) - for submodel_name, submodel in default_dict.items(): - for field_name in submodel: - fields.add(f"{submodel_name}.{field_name}") - return fields - - def _load_authentication_config(self) -> None: - """Verifies the authentication config file and creates it if it doesn't exist.""" - needs_update = is_in_file("socialmediagirls_username:", self.authentication_settings) - posible_fields = self.get_model_fields(AuthSettings(), exclude_unset=False) - if self.authentication_settings.is_file(): - self.authentication_data = AuthSettings.model_validate(yaml.load(self.authentication_settings)) - set_fields = self.get_model_fields(self.authentication_data) - if posible_fields == set_fields and not needs_update and self.pydantic_config: - return - - else: - self.authentication_data = AuthSettings() - - yaml.save(self.authentication_settings, self.authentication_data) - - def _load_settings_config(self) -> None: - """Verifies the settings config file and creates it if it doesn't exist.""" - needs_update = is_in_file("download_error_urls_filename:", self.settings) - posible_fields = self.get_model_fields(ConfigSettings(), exclude_unset=False) - if self.manager.parsed_args.cli_only_args.config_file: - self.settings = self.manager.parsed_args.cli_only_args.config_file - self.loaded_config = "CLI-Arg Specified" - - if self.settings.is_file(): - self.settings_data = ConfigSettings.model_validate(yaml.load(self.settings)) - set_fields = self.get_model_fields(self.settings_data) - self.deep_scrape = self.settings_data.runtime_options.deep_scrape - self.settings_data.runtime_options.deep_scrape = False - if posible_fields == set_fields and not needs_update and self.pydantic_config: - return - else: - self.settings_data = ConfigSettings() - self.settings_data.files.input_file = ( - self.manager.path_manager.appdata / "Configs" / self.loaded_config / "URLs.txt" - ) - downloads = self.manager.path_manager.cwd / "Downloads" - self.settings_data.sorting.sort_folder = downloads / "Cyberdrop-DL Sorted Downloads" - self.settings_data.files.download_folder = downloads / "Cyberdrop-DL Downloads" - self.settings_data.logs.log_folder = ( - self.manager.path_manager.appdata / "Configs" / self.loaded_config / "Logs" - ) - - yaml.save(self.settings, self.settings_data) - - def _load_global_settings_config(self) -> None: - """Verifies the global settings config file and creates it if it doesn't exist.""" - needs_update = is_in_file("Dupe_Cleanup_Options:", self.global_settings) - posible_fields = self.get_model_fields(GlobalSettings(), exclude_unset=False) - if self.global_settings.is_file(): - self.global_settings_data = GlobalSettings.model_validate(yaml.load(self.global_settings)) - set_fields = self.get_model_fields(self.global_settings_data) - if posible_fields == set_fields and not needs_update and self.pydantic_config: - return - else: - self.global_settings_data = GlobalSettings() - - yaml.save(self.global_settings, self.global_settings_data) - - """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - def save_as_new_config(self, new_settings: Path, settings_data: ConfigSettings) -> None: - """Creates a new settings config file.""" - yaml.save(new_settings, settings_data) - - def write_updated_authentication_config(self) -> None: - """Write updated authentication data.""" - yaml.save(self.authentication_settings, self.authentication_data) - - def write_updated_settings_config(self) -> None: - """Write updated settings data.""" - yaml.save(self.settings, self.settings_data) - - def write_updated_global_settings_config(self) -> None: - """Write updated global settings data.""" - yaml.save(self.global_settings, self.global_settings_data) - - """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - def get_configs(self) -> list: - """Returns a list of all the configs.""" - configs = [config.name for config in self.manager.path_manager.config_folder.iterdir() if config.is_dir()] - configs.sort() - return configs - - def change_default_config(self, config_name: str) -> None: - """Changes the default config.""" - self.manager.cache_manager.save("default_config", config_name) - - def delete_config(self, config_name: str) -> None: - """Deletes a config.""" - configs = self.get_configs() - configs.remove(config_name) - - if self.manager.cache_manager.get("default_config") == config_name: - self.manager.cache_manager.save("default_config", configs[0]) - - config = self.manager.path_manager.config_folder / config_name - shutil.rmtree(config) - - def change_config(self, config_name: str) -> None: - """Changes the config.""" - self.loaded_config = config_name - self.startup() - - self.manager.path_manager.startup() - sleep(1) - self.manager.log_manager = LogManager(self.manager) - sleep(1) - - def _set_apprise_fixed(self): - apprise_fixed = self.manager.cache_manager.get("apprise_fixed") - if apprise_fixed: - return - if os.name == "nt": - try: - import win32con # noqa: F401 - except ImportError: - pass - else: - with self.apprise_file.open("a", encoding="utf8") as f: - f.write("windows://\n") - self.manager.cache_manager.save("apprise_fixed", True) - - def _set_pydantic_config(self): - if self.pydantic_config: - return - self.manager.cache_manager.save("pydantic_config", True) - self.pydantic_config = True - - -def is_in_file(search_value: str, file: Path) -> bool: - if not file.is_file(): - return False - try: - return search_value.casefold() in file.read_text(encoding="utf8").casefold() - except Exception as e: - raise InvalidYamlError(file, e) from e diff --git a/cyberdrop_dl/managers/hash_manager.py b/cyberdrop_dl/managers/hash_manager.py index 8a24d46c4..93dd275c8 100644 --- a/cyberdrop_dl/managers/hash_manager.py +++ b/cyberdrop_dl/managers/hash_manager.py @@ -10,7 +10,7 @@ from cyberdrop_dl.clients.hash_client import HashClient if TYPE_CHECKING: - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager _HASHERS: Final = { "md5": hashlib.md5, diff --git a/cyberdrop_dl/managers/live_manager.py b/cyberdrop_dl/managers/live_manager.py index bab9bc58e..d5ed00254 100644 --- a/cyberdrop_dl/managers/live_manager.py +++ b/cyberdrop_dl/managers/live_manager.py @@ -7,7 +7,7 @@ from rich.live import Live -from cyberdrop_dl import constants +from cyberdrop_dl import config, constants from cyberdrop_dl.cli import is_terminal_in_portrait if TYPE_CHECKING: @@ -15,7 +15,7 @@ from rich.console import RenderableType - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager class LiveManager: @@ -23,7 +23,7 @@ def __init__(self, manager: Manager) -> None: self.manager = manager self.ui_setting = self.manager.parsed_args.cli_only_args.ui self.fullscreen = f = self.manager.parsed_args.cli_only_args.fullscreen_ui - self.refresh_rate = rate = self.manager.config_manager.global_settings_data.ui_options.refresh_rate + self.refresh_rate = rate = config.get().ui_options.refresh_rate self.live = Live(refresh_per_second=rate, transient=True, screen=f, auto_refresh=True) self.current_layout: str = "" diff --git a/cyberdrop_dl/managers/log_manager.py b/cyberdrop_dl/managers/log_manager.py index 80e06f5c7..cb25f149e 100644 --- a/cyberdrop_dl/managers/log_manager.py +++ b/cyberdrop_dl/managers/log_manager.py @@ -2,148 +2,83 @@ import asyncio import csv +import dataclasses from collections import defaultdict -from pathlib import Path from typing import TYPE_CHECKING, Any -from cyberdrop_dl.constants import CSV_DELIMITER from cyberdrop_dl.exceptions import get_origin from cyberdrop_dl.utils import json -from cyberdrop_dl.utils.logger import log, log_spacer if TYPE_CHECKING: from collections.abc import Iterable + from pathlib import Path from yarl import URL - from cyberdrop_dl.data_structures.url_objects import MediaItem - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.config import Config + from cyberdrop_dl.data_structures.url_objects import MediaItem, ScrapeItem +_CSV_DELIMITER = "," +_file_locks: defaultdict[Path, asyncio.Lock] = defaultdict(asyncio.Lock) + + +@dataclasses.dataclass(slots=True, frozen=True) class LogManager: - def __init__(self, manager: Manager) -> None: - self.manager = manager - self.main_log: Path = manager.path_manager.main_log - self.last_post_log: Path = manager.path_manager.last_forum_post_log - self.unsupported_urls_log: Path = manager.path_manager.unsupported_urls_log - self.download_error_log: Path = manager.path_manager.download_error_urls_log - self.scrape_error_log: Path = manager.path_manager.scrape_error_urls_log - self.jsonl_file = self.main_log.with_suffix(".results.jsonl") - self._file_locks: dict[Path, asyncio.Lock] = defaultdict(asyncio.Lock) - self._has_headers: set[Path] = set() - - def startup(self) -> None: - """Startup process for the file manager.""" - for var in vars(self).values(): - if isinstance(var, Path): - var.unlink(missing_ok=True) - - async def write_jsonl(self, data: Iterable[dict[str, Any]]): - async with self._file_locks[self.jsonl_file]: - await json.dump_jsonl(data, self.jsonl_file) - - async def _write_to_csv(self, file: Path, **kwargs) -> None: + config: Config + task_group: asyncio.TaskGroup = dataclasses.field(repr=False) + _has_headers: set[Path] = dataclasses.field(init=False, default_factory=set) + + async def write_jsonl(self, data: Iterable[dict[str, Any]]) -> None: + async with _file_locks[self.config.logs.jsonl_file]: + await asyncio.to_thread(json.dump_jsonl, data, self.config.logs.jsonl_file) + + async def _write_to_csv(self, file: Path, **kwargs: Any) -> None: """Write to the specified csv file. kwargs are columns for the CSV.""" - async with self._file_locks[file]: - write_headers = file not in self._has_headers + async with _file_locks[file]: + is_first_write = file not in self._has_headers self._has_headers.add(file) def write(): + if is_first_write: + file.parent.mkdir(parents=True, exist_ok=True) + file.unlink(missing_ok=True) + with file.open("a", encoding="utf8", newline="") as csv_file: writer = csv.DictWriter( - csv_file, fieldnames=kwargs.keys(), delimiter=CSV_DELIMITER, quoting=csv.QUOTE_ALL + csv_file, fieldnames=kwargs, delimiter=_CSV_DELIMITER, quoting=csv.QUOTE_ALL ) - if write_headers: + if is_first_write: writer.writeheader() writer.writerow(kwargs) await asyncio.to_thread(write) def write_last_post_log(self, url: URL) -> None: - """Writes to the last post log.""" - self.manager.task_group.create_task(self._write_to_csv(self.last_post_log, url=url)) + _ = self.task_group.create_task(self._write_to_csv(self.config.logs.last_forum_post, url=url)) - def write_unsupported_urls_log(self, url: URL, origin: URL | None = None) -> None: - """Writes to the unsupported urls log.""" - self.manager.task_group.create_task(self._write_to_csv(self.unsupported_urls_log, url=url, origin=origin)) + def write_unsupported(self, url: URL, origin: ScrapeItem | URL | None = None) -> None: + _ = self.task_group.create_task( + self._write_to_csv(self.config.logs.unsupported_urls, url=url, origin=get_origin(origin)) + ) def write_download_error_log(self, media_item: MediaItem, error_message: str) -> None: - """Writes to the download error log.""" - origin = get_origin(media_item) - self.manager.task_group.create_task( + _ = self.task_group.create_task( self._write_to_csv( - self.download_error_log, + self.config.logs.download_error_urls, url=media_item.url, error=error_message, referer=media_item.referer, - origin=origin, + origin=get_origin(media_item), ) ) def write_scrape_error_log(self, url: URL | str, error_message: str, origin: URL | Path | None = None) -> None: - """Writes to the scrape error log.""" - self.manager.task_group.create_task( - self._write_to_csv(self.scrape_error_log, url=url, error=error_message, origin=origin) + _ = self.task_group.create_task( + self._write_to_csv( + self.config.logs.scrape_error_urls, + url=url, + error=error_message, + origin=origin, + ) ) - - async def update_last_forum_post(self) -> None: - """Updates the last forum post.""" - input_file = self.manager.path_manager.input_file - - def proceed(): - return input_file.is_file() and self.last_post_log.is_file() - - if await asyncio.to_thread(proceed): - await asyncio.to_thread(_update_last_forum_post, input_file, self.last_post_log) - - -def _update_last_forum_post(input_file: Path, last_post_log: Path) -> None: - log_spacer(20) - log("Updating Last Forum Posts...\n", 20) - - current_urls, current_base_urls, new_urls, new_base_urls = [], [], [], [] - try: - with input_file.open(encoding="utf8") as f: - for line in f: - url = base_url = line.strip().removesuffix("/") - - if "https" in url and "/post-" in url: - base_url = url.rsplit("/post", 1)[0] - - # only keep 1 url of the same thread - if base_url not in current_base_urls: - current_urls.append(url) - current_base_urls.append(base_url) - except UnicodeDecodeError: - log("Unable to read input file, skipping update_last_forum_post", 40) - return - - with last_post_log.open(encoding="utf8") as f: - reader = csv.DictReader(f.readlines()) - for row in reader: - new_url = base_url = row.get("url").strip().removesuffix("/") # type: ignore - - if "https" in new_url and "/post-" in new_url: - base_url = new_url.rsplit("/post", 1)[0] - - # only keep 1 url of the same thread - if base_url not in new_base_urls: - new_urls.append(new_url) - new_base_urls.append(base_url) - - updated_urls = current_urls.copy() - for new_url, base in zip(new_urls, new_base_urls, strict=False): - if base in current_base_urls: - index = current_base_urls.index(base) - old_url = current_urls[index] - if old_url == new_url: - continue - log(f"Updating {base}\n {old_url = }\n {new_url = }", 20) - updated_urls[index] = new_url - - if updated_urls == current_urls: - log("No URLs updated", 20) - return - - with input_file.open("w", encoding="utf8") as f: - f.write("\n".join(updated_urls)) diff --git a/cyberdrop_dl/managers/manager.py b/cyberdrop_dl/managers/manager.py deleted file mode 100644 index ca87a508c..000000000 --- a/cyberdrop_dl/managers/manager.py +++ /dev/null @@ -1,251 +0,0 @@ -from __future__ import annotations - -import asyncio -import json -from dataclasses import Field, field -from time import perf_counter -from typing import TYPE_CHECKING, Any, NamedTuple, TypeVar - -from pydantic import BaseModel - -from cyberdrop_dl import __version__, constants -from cyberdrop_dl.cli import ParsedArgs, parse_args -from cyberdrop_dl.database import Database -from cyberdrop_dl.database.transfer import transfer_v5_db_to_v6 -from cyberdrop_dl.managers.cache_manager import CacheManager -from cyberdrop_dl.managers.client_manager import ClientManager -from cyberdrop_dl.managers.config_manager import ConfigManager -from cyberdrop_dl.managers.hash_manager import HashManager -from cyberdrop_dl.managers.live_manager import LiveManager -from cyberdrop_dl.managers.log_manager import LogManager -from cyberdrop_dl.managers.path_manager import PathManager -from cyberdrop_dl.managers.progress_manager import ProgressManager -from cyberdrop_dl.managers.storage_manager import StorageManager -from cyberdrop_dl.utils import ffmpeg -from cyberdrop_dl.utils.logger import LogHandler, QueuedLogger, log -from cyberdrop_dl.utils.utilities import close_if_defined, get_system_information - -if TYPE_CHECKING: - from asyncio import TaskGroup - from collections.abc import Sequence - - from cyberdrop_dl.scraper.scrape_mapper import ScrapeMapper - - -class AsyncioEvents(NamedTuple): - SHUTTING_DOWN: asyncio.Event - RUNNING: asyncio.Event - - -class Manager: - def __init__(self, args: Sequence[str] | None = None) -> None: - if isinstance(args, str): - args = [args] - - self.parsed_args: ParsedArgs = field(init=False) - self.cache_manager: CacheManager = CacheManager(self) - self.path_manager: PathManager = field(init=False) - self.config_manager: ConfigManager = field(init=False) - self.hash_manager: HashManager = field(init=False) - - self.log_manager: LogManager = field(init=False) - self.db_manager: Database = field(init=False) - self.client_manager: ClientManager = field(init=False) - self.storage_manager: StorageManager = field(init=False) - - self.progress_manager: ProgressManager = field(init=False) - self.live_manager: LiveManager = field(init=False) - - self._loaded_args_config: bool = False - self._made_portable: bool = False - - self.task_group: TaskGroup = field(init=False) - self.scrape_mapper: ScrapeMapper = field(init=False) - - self.start_time: float = perf_counter() - self.downloaded_data: int = 0 - self.loggers: dict[str, QueuedLogger] = {} - self.args = args - self.states: AsyncioEvents - - constants.console_handler = LogHandler(level=constants.CONSOLE_LEVEL) - - @property - def config(self): - return self.config_manager.settings_data - - @property - def auth_config(self): - return self.config_manager.authentication_data - - @property - def global_config(self): - return self.config_manager.global_settings_data - - def startup(self) -> None: - """Startup process for the manager.""" - - if isinstance(self.parsed_args, Field): - self.parsed_args = parse_args(self.args) - - self.path_manager = PathManager(self) - self.path_manager.pre_startup() - self.cache_manager.startup(self.path_manager.cache_folder / "cache.yaml") - self.config_manager = ConfigManager(self) - self.config_manager.startup() - - self.args_consolidation() - - self.path_manager.startup() - self.log_manager = LogManager(self) - - """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - async def async_startup(self) -> None: - """Async startup process for the manager.""" - self.states = AsyncioEvents(asyncio.Event(), asyncio.Event()) - self.args_logging() - - if not isinstance(self.client_manager, ClientManager): - self.client_manager = ClientManager(self) - await self.client_manager.startup() - if not isinstance(self.storage_manager, StorageManager): - self.storage_manager = StorageManager(self) - - elif self.states.RUNNING.is_set(): - await self.storage_manager.reset() - - await self.async_db_hash_startup() - - constants.MAX_NAME_LENGTHS["FILE"] = self.config_manager.global_settings_data.general.max_file_name_length - constants.MAX_NAME_LENGTHS["FOLDER"] = self.config_manager.global_settings_data.general.max_folder_name_length - - async def async_db_hash_startup(self) -> None: - if not isinstance(self.db_manager, Database): - self.db_manager = Database( - self.path_manager.history_db, - self.config.runtime_options.ignore_history, - ) - await self.db_manager.startup() - transfer_v5_db_to_v6(self.path_manager.history_db) - if not isinstance(self.hash_manager, HashManager): - self.hash_manager = HashManager(self) - if not isinstance(self.live_manager, LiveManager): - self.live_manager = LiveManager(self) - if not isinstance(self.progress_manager, ProgressManager): - self.progress_manager = ProgressManager(self) - self.progress_manager.startup() - - def process_additive_args(self) -> None: - cli_general_options = self.parsed_args.global_settings.general - cli_ignore_options = self.parsed_args.config_settings.ignore_options - config_ignore_options = self.config_manager.settings_data.ignore_options - config_general_options = self.config_manager.global_settings_data.general - - add_or_remove_lists(cli_ignore_options.skip_hosts, config_ignore_options.skip_hosts) - add_or_remove_lists(cli_ignore_options.only_hosts, config_ignore_options.only_hosts) - add_or_remove_lists(cli_general_options.disable_crawlers, config_general_options.disable_crawlers) - - def args_consolidation(self) -> None: - """Consolidates runtime arguments with config values.""" - self.process_additive_args() - - conf = merge_models(self.config_manager.settings_data, self.parsed_args.config_settings) - global_conf = merge_models(self.config_manager.global_settings_data, self.parsed_args.global_settings) - deep_scrape = self.parsed_args.config_settings.runtime_options.deep_scrape or self.config_manager.deep_scrape - - self.config_manager.settings_data = conf - self.config_manager.global_settings_data = global_conf - self.config_manager.deep_scrape = deep_scrape - - def args_logging(self) -> None: - """Logs the runtime arguments.""" - auth_provided = {} - - for site, auth_entries in self.config_manager.authentication_data.model_dump().items(): - auth_provided[site] = all(auth_entries.values()) - - config_settings = self.config_manager.settings_data.model_copy() - config_settings.runtime_options.deep_scrape = self.config_manager.deep_scrape - config_settings = config_settings.model_dump_json(indent=4) - global_settings = self.config_manager.global_settings_data.model_dump_json(indent=4) - cli_only_args = self.parsed_args.cli_only_args.model_dump_json(indent=4) - system_info = get_system_information() - - args_info = ( - "Starting Cyberdrop-DL Process", - f"Running Version: {__version__}", - f"System Info: {system_info}", - f"Using Config: {self.config_manager.loaded_config}", - f"Using Config File: {self.config_manager.settings}", - f"Using Input File: {self.path_manager.input_file}", - f"Using Download Folder: {self.path_manager.download_folder}", - f"Using Database File: {self.path_manager.history_db}", - f"Using CLI only options: {cli_only_args}", - f"Using Authentication: \n{json.dumps(auth_provided, indent=4, sort_keys=True)}", - f"Using Settings: \n{config_settings}", - f"Using Global Settings: \n{global_settings}", - f"Using ffmpeg version: {ffmpeg.get_ffmpeg_version()}", - f"Using ffprobe version: {ffmpeg.get_ffprobe_version()}", - ) - log("\n".join(args_info)) - - async def async_db_close(self) -> None: - "Partial shutdown for managers used for hash directory scanner" - self.db_manager = await close_if_defined(self.db_manager) - self.hash_manager = constants.NOT_DEFINED - self.progress_manager.hash_progress.reset() - - async def close(self) -> None: - """Closes the manager.""" - self.states.RUNNING.clear() - - await self.async_db_close() - - self.client_manager = await close_if_defined(self.client_manager) - self.storage_manager = await close_if_defined(self.storage_manager) - self.cache_manager = await close_if_defined(self.cache_manager) - - while self.loggers: - _, queued_logger = self.loggers.popitem() - queued_logger.stop() - - -def add_or_remove_lists(cli_values: list[str], config_values: list[str]) -> None: - exclude = {"+", "-"} - if cli_values: - if cli_values[0] == "+": - new_values_set = set(config_values + cli_values) - cli_values.clear() - cli_values.extend(sorted(new_values_set - exclude)) - elif cli_values[0] == "-": - new_values_set = set(config_values) - set(cli_values) - cli_values.clear() - cli_values.extend(sorted(new_values_set - exclude)) - - -def merge_dicts(dict1: dict[str, Any], dict2: dict[str, Any]) -> dict[str, Any]: - for key, val in dict1.items(): - if isinstance(val, dict): - if key in dict2 and isinstance(dict2[key], dict): - merge_dicts(dict1[key], dict2[key]) - else: - if key in dict2: - dict1[key] = dict2[key] - - for key, val in dict2.items(): - if key not in dict1: - dict1[key] = val - - return dict1 - - -M = TypeVar("M", bound=BaseModel) - - -def merge_models(default: M, new: M) -> M: - default_dict = default.model_dump() - new_dict = new.model_dump(exclude_unset=True) - - updated_dict = merge_dicts(default_dict, new_dict) - return default.model_validate(updated_dict) diff --git a/cyberdrop_dl/managers/mock_manager.py b/cyberdrop_dl/managers/mock_manager.py deleted file mode 100644 index 3cf903507..000000000 --- a/cyberdrop_dl/managers/mock_manager.py +++ /dev/null @@ -1,52 +0,0 @@ -from __future__ import annotations - -from typing import Any - -MOCK_MANAGER = None - - -class MockCallable: - def __init__(self, return_obj: Any = None) -> None: - self.return_obj = return_obj - - def __getitem__(self, parameters: Any) -> object: ... - def __or__(self, other: Any) -> MockCallable: ... - def __ror__(self, other: Any) -> MockCallable: ... - def __call__(self, *args, **kwargs): - return self.return_obj - - -class Mock(Any): - def __init__(self, name: str, /) -> None: - self._nested_attrs: dict[str, Mock] = {} - self._mock_name = name - - def __call__(self, *args, **kwargs) -> None: ... - - def __getattribute__(self, name: str, /) -> Any: - if name == "manager" and MOCK_MANAGER is not None: - return MOCK_MANAGER - try: - return super().__getattribute__(name) - except AttributeError: - if name == "_nested_attrs": - raise # Avoid infinite recursion - return self._nested_attrs.get(name, Mock(name)) - - -class MockCacheManager(Mock): - def __init__(self) -> None: - self.get = self.save = MockCallable() - super().__init__("cache_manager") - - -class MockManager(Mock): - def __init__(self): - global MOCK_MANAGER - assert MOCK_MANAGER is None, "A global MockManager already exists. Only 1 should be created" - super().__init__("manager") - self.cache_manager = MockCacheManager() - MOCK_MANAGER = self - - -MOCK_MANAGER = MockManager() diff --git a/cyberdrop_dl/managers/path_manager.py b/cyberdrop_dl/managers/path_manager.py deleted file mode 100644 index 60e22c106..000000000 --- a/cyberdrop_dl/managers/path_manager.py +++ /dev/null @@ -1,172 +0,0 @@ -from __future__ import annotations - -import os -from dataclasses import Field, field -from datetime import datetime -from pathlib import Path -from typing import TYPE_CHECKING - -from cyberdrop_dl import env -from cyberdrop_dl.utils.utilities import purge_dir_tree - -if TYPE_CHECKING: - from cyberdrop_dl.data_structures.url_objects import MediaItem - from cyberdrop_dl.managers.manager import Manager - - -class PathManager: - def __init__(self, manager: Manager) -> None: - self.manager = manager - - self.download_folder: Path = field(init=False) - self.sorted_folder: Path = field(init=False) - self.scan_folder: Path | None = field(init=False) - - self.log_folder: Path = field(init=False) - - self.cache_folder: Path = field(init=False) - self.config_folder: Path = field(init=False) - - self.input_file: Path = field(init=False) - self.history_db: Path = field(init=False) - self.cache_db: Path = field(init=False) - - self._completed_downloads: set[MediaItem] = set() - self._completed_downloads_paths: set[Path] = set() - self._prev_downloads: set[MediaItem] = set() - self._prev_downloads_paths: set[Path] = set() - - self.main_log: Path = field(init=False) - self.last_forum_post_log: Path = field(init=False) - self.unsupported_urls_log: Path = field(init=False) - self.download_error_urls_log: Path = field(init=False) - self.scrape_error_urls_log: Path = field(init=False) - self.pages_folder: Path = field(init=False) - - self._logs_model_names = [ - "main_log", - "last_forum_post", - "unsupported_urls", - "download_error_urls", - "scrape_error_urls", - ] - self._appdata: Path = field(init=False) - - @property - def cwd(self) -> Path: - if env.RUNNING_IN_IDE and Path.cwd().name == "cyberdrop_dl": - # This is for testing purposes only""" - return Path("..").resolve() - return Path().resolve() - - @property - def appdata(self) -> Path: - if isinstance(self._appdata, Field): - if self.manager.parsed_args.cli_only_args.appdata_folder: - path = self.manager.parsed_args.cli_only_args.appdata_folder / "AppData" - self._appdata = self.cwd / path - else: - self._appdata = self.cwd / "AppData" - - return self._appdata - - def pre_startup(self) -> None: - self.cache_folder = self.appdata / "Cache" - self.config_folder = self.appdata / "Configs" - self.cookies_dir = self.appdata / "Cookies" - self.cache_db = self.cache_folder / "request_cache.db" - - self.cache_folder.mkdir(parents=True, exist_ok=True) - self.config_folder.mkdir(parents=True, exist_ok=True) - self.cookies_dir.mkdir(parents=True, exist_ok=True) - self.cache_db.touch(exist_ok=True) - - def startup(self) -> None: - """Startup process for the Directory Manager.""" - settings_data = self.manager.config_manager.settings_data - current_config = self.manager.config_manager.loaded_config - - def replace(path: Path) -> Path: - path_w_config = str(path).replace("{config}", current_config) - if os.name == "nt": - return self.cwd.joinpath(Path(path_w_config)).resolve() - normalized_path_str = path_w_config.replace("\\", "/") - return self.cwd.joinpath(Path(normalized_path_str)).resolve() - - self.download_folder = replace(settings_data.files.download_folder) - self.sorted_folder = replace(settings_data.sorting.sort_folder) - self.log_folder = replace(settings_data.logs.log_folder) - self.input_file = replace(settings_data.files.input_file) - self.history_db = self.cache_folder / "cyberdrop.db" - self.scan_folder = settings_data.sorting.scan_folder - if self.scan_folder: - self.scan_folder = replace(self.scan_folder) - - self.log_folder.mkdir(parents=True, exist_ok=True) - - now = datetime.now() - self._set_output_filenames(now) - self._delete_logs_and_folders(now) - self._create_output_folders() - - if not self.input_file.is_file(): - self.input_file.touch(exist_ok=True) - self.history_db.touch(exist_ok=True) - - def _set_output_filenames(self, now: datetime) -> None: - current_time_file_iso: str = now.strftime("%Y%m%d_%H%M%S") - current_time_folder_iso: str = now.strftime("%Y_%m_%d") - log_settings_config = self.manager.config_manager.settings_data.logs - log_files: dict[str, Path] = log_settings_config.model_dump() - - for model_name, log_file in log_files.items(): - if model_name not in self._logs_model_names: - continue - if log_settings_config.rotate_logs: - new_name = f"{log_file.stem}_{current_time_file_iso}{log_file.suffix}" - log_file: Path = log_file.parent / current_time_folder_iso / new_name - log_files[model_name] = log_file - - log_settings_config = log_settings_config.model_copy(update=log_files) - - for model_name in self._logs_model_names: - internal_name = f"{model_name.replace('_log', '')}_log" - setattr(self, internal_name, self.log_folder / getattr(log_settings_config, model_name)) - - self.pages_folder = self.main_log.parent / "cdl_responses" - - def _delete_logs_and_folders(self, now: datetime): - if self.manager.config_manager.settings_data.logs.logs_expire_after: - for file in set(self.log_folder.rglob("*.log")) | set(self.log_folder.rglob("*.csv")): - file_date = Path(file).stat().st_ctime - t_delta = now - datetime.fromtimestamp(file_date) - if t_delta > self.manager.config_manager.settings_data.logs.logs_expire_after: - file.unlink(missing_ok=True) - purge_dir_tree(self.log_folder) - - def _create_output_folders(self): - for model_name in self._logs_model_names: - internal_name = f"{model_name.replace('_log', '')}_log" - path: Path = getattr(self, internal_name) - path.parent.mkdir(parents=True, exist_ok=True) - - if self.manager.config_manager.settings_data.files.save_pages_html: - self.pages_folder.mkdir(parents=True, exist_ok=True) - - def add_completed(self, media_item: MediaItem) -> None: - if media_item.is_segment: - return - self._completed_downloads.add(media_item) - self._completed_downloads_paths.add(media_item.complete_file) - - def add_prev(self, media_item: MediaItem) -> None: - self._prev_downloads.add(media_item) - self._prev_downloads_paths.add(media_item.complete_file) - - @property - def completed_downloads(self) -> set[MediaItem]: - return self._completed_downloads - - @property - def prev_downloads(self) -> set[MediaItem]: - return self._prev_downloads diff --git a/cyberdrop_dl/managers/progress_manager.py b/cyberdrop_dl/managers/progress_manager.py deleted file mode 100644 index 8f6b46454..000000000 --- a/cyberdrop_dl/managers/progress_manager.py +++ /dev/null @@ -1,244 +0,0 @@ -from __future__ import annotations - -import time -from contextlib import asynccontextmanager -from dataclasses import field -from datetime import timedelta -from functools import partial -from typing import TYPE_CHECKING - -from pydantic import ByteSize -from rich.columns import Columns -from rich.console import Group -from rich.layout import Layout -from rich.progress import Progress, SpinnerColumn, TaskID -from rich.text import Text -from yarl import URL - -from cyberdrop_dl import __version__ -from cyberdrop_dl.ui.progress.downloads_progress import DownloadsProgress -from cyberdrop_dl.ui.progress.file_progress import FileProgress -from cyberdrop_dl.ui.progress.hash_progress import HashProgress -from cyberdrop_dl.ui.progress.scraping_progress import ScrapingProgress -from cyberdrop_dl.ui.progress.sort_progress import SortProgress -from cyberdrop_dl.ui.progress.statistic_progress import DownloadStatsProgress, ScrapeStatsProgress -from cyberdrop_dl.utils.logger import log, log_spacer, log_with_color - -if TYPE_CHECKING: - from collections.abc import AsyncGenerator - from pathlib import Path - - from rich.console import RenderableType - - from cyberdrop_dl.managers.manager import Manager - from cyberdrop_dl.ui.progress.statistic_progress import UiFailureTotal - -log_cyan = partial(log_with_color, style="cyan", level=20) -log_yellow = partial(log_with_color, style="yellow", level=20) -log_green = partial(log_with_color, style="green", level=20) -log_red = partial(log_with_color, style="red", level=20) - - -class ProgressManager: - def __init__(self, manager: Manager) -> None: - # File Download Bars - self.manager = manager - ui_options = manager.config_manager.global_settings_data.ui_options - self.portrait = manager.parsed_args.cli_only_args.portrait - self.file_progress = FileProgress(manager) - self.scraping_progress = ScrapingProgress(manager) - - # Overall Progress Bars & Stats - self.download_progress = DownloadsProgress(manager) - self.download_stats_progress = DownloadStatsProgress() - self.scrape_stats_progress = ScrapeStatsProgress() - self.hash_progress = HashProgress(manager) - self.sort_progress = SortProgress(1, manager) - - self.ui_refresh_rate = ui_options.refresh_rate - - self.hash_remove_layout: RenderableType = field(init=False) - self.hash_layout: RenderableType = field(init=False) - self.sort_layout: RenderableType = field(init=False) - self.status_message: Progress = field(init=False) - self.status_message_task_id: TaskID = field(init=False) - - @asynccontextmanager - async def show_status_msg(self, msg: str | None) -> AsyncGenerator[None]: - try: - self.status_message.update(self.status_message_task_id, description=msg, visible=bool(msg)) - yield - finally: - self.status_message.update(self.status_message_task_id, visible=False) - - def pause_or_resume(self): - if self.manager.states.RUNNING.is_set(): - self.pause() - else: - self.resume() - - def pause(self, msg: str = ""): - self.manager.states.RUNNING.clear() - suffix = f" [{msg}]" if msg else "" - self.activity.update(self.activity_task_id, description=f"Paused{suffix}") - - def resume(self): - self.manager.states.RUNNING.set() - self.activity.update(self.activity_task_id, description="Running Cyberdrop-DL") - - def startup(self) -> None: - """Startup process for the progress manager.""" - spinner = SpinnerColumn(style="green", spinner_name="dots") - activity = Progress(spinner, "[progress.description]{task.description}") - self.status_message = Progress(spinner, "[progress.description]{task.description}") - - self.status_message_task_id = self.status_message.add_task("", total=100, completed=0, visible=False) - self.activity_task_id = activity.add_task(f"Running Cyberdrop-DL: v{__version__}", total=100, completed=0) - self.activity = activity - - simple_layout = Group(activity, self.download_progress.simple_progress) - - status_message_columns = Columns([activity, self.status_message], expand=False) - - horizontal_layout = Layout() - vertical_layout = Layout() - - upper_layouts = ( - Layout(renderable=self.download_progress.get_progress(), name="Files", ratio=1, minimum_size=9), - Layout(renderable=self.scrape_stats_progress.get_progress(), name="Scrape Failures", ratio=1), - Layout(renderable=self.download_stats_progress.get_progress(), name="Download Failures", ratio=1), - ) - - lower_layouts = ( - Layout(renderable=self.scraping_progress.get_renderable(), name="Scraping", ratio=20), - Layout(renderable=self.file_progress.get_renderable(), name="Downloads", ratio=20), - Layout(renderable=status_message_columns, name="status_message", ratio=2), - ) - - horizontal_layout.split_column(Layout(name="upper", ratio=20), *lower_layouts) - vertical_layout.split_column(Layout(name="upper", ratio=60), *lower_layouts) - - horizontal_layout["upper"].split_row(*upper_layouts) - vertical_layout["upper"].split_column(*upper_layouts) - - self.horizontal_layout = horizontal_layout - self.vertical_layout = vertical_layout - self.activity_layout = activity - self.simple_layout = simple_layout - self.hash_remove_layout = self.hash_progress.get_removed_progress() - self.hash_layout = self.hash_progress.get_renderable() - self.sort_layout = self.sort_progress.get_renderable() - - @property - def fullscreen_layout(self) -> Layout: - if self.portrait: - return self.vertical_layout - return self.horizontal_layout - - def print_stats(self, start_time: float) -> None: - """Prints the stats of the program.""" - if not self.manager.parsed_args.cli_only_args.print_stats: - return - end_time = time.perf_counter() - runtime = timedelta(seconds=int(end_time - start_time)) - total_data_written = ByteSize(self.manager.storage_manager.total_data_written).human_readable(decimal=True) - - log_spacer(20) - log("Printing Stats...\n", 20) - config_path = self.manager.path_manager.config_folder / self.manager.config_manager.loaded_config - config_path_text = get_console_hyperlink(config_path, text=self.manager.config_manager.loaded_config) - input_file_text = get_input(self.manager) - log_folder_text = get_console_hyperlink(self.manager.path_manager.log_folder) - - log_concat("Run Stats (config: ", config_path_text, ")", style="cyan") - log_concat(" Input File: ", input_file_text, style="yellow") - log_yellow(f" Input URLs: {self.manager.scrape_mapper.count:,}") - log_yellow(f" Input URL Groups: {self.manager.scrape_mapper.group_count:,}") - log_concat(" Log Folder: ", log_folder_text, style="yellow") - log_yellow(f" Total Runtime: {runtime}") - log_yellow(f" Total Downloaded Data: {total_data_written}") - - log_spacer(20, "") - log_cyan("Download Stats:") - log_green(f" Downloaded: {self.download_progress.completed_files:,} files") - log_yellow(f" Skipped (By Config): {self.download_progress.skipped_files:,} files") - log_yellow(f" Skipped (Previously Downloaded): {self.download_progress.previously_completed_files:,} files") - log_red(f" Failed: {self.download_stats_progress.failed_files:,} files") - - log_spacer(20, "") - log_cyan("Unsupported URLs Stats:") - log_yellow(f" Sent to Jdownloader: {self.scrape_stats_progress.sent_to_jdownloader:,}") - log_yellow(f" Skipped: {self.scrape_stats_progress.unsupported_urls_skipped:,}") - - self.print_dedupe_stats() - - log_spacer(20, "") - log_cyan("Sort Stats:") - log_green(f" Audios: {self.sort_progress.audio_count:,}") - log_green(f" Images: {self.sort_progress.image_count:,}") - log_green(f" Videos: {self.sort_progress.video_count:,}") - log_green(f" Other Files: {self.sort_progress.other_count:,}") - - last_padding = log_failures(self.scrape_stats_progress.return_totals(), "Scrape Failures:") - log_failures(self.download_stats_progress.return_totals(), "Download Failures:", last_padding) - - def print_dedupe_stats(self) -> None: - log_spacer(20, "") - log_cyan("Dupe Stats:") - log_yellow(f" Newly Hashed: {self.hash_progress.hashed_files:,} files") - log_yellow(f" Previously Hashed: {self.hash_progress.prev_hashed_files:,} files") - log_yellow(f" Removed (Downloads): {self.hash_progress.removed_files:,} files") - - -def log_failures(failures: list[UiFailureTotal], title: str = "Failures:", last_padding: int = 0) -> int: - log_spacer(20, "") - log_cyan(title) - if not failures: - log_green(" None") - return 0 - error_padding = last_padding - error_codes = [f.error_code for f in failures if f.error_code is not None] - if error_codes: - error_padding = max(len(str(max(error_codes))), error_padding) - for f in failures: - error = f.error_code if f.error_code is not None else "" - log_red(f" {error:>{error_padding}}{' ' if error_padding else ''}{f.msg}: {f.total:,}") - return error_padding - - -def get_input(manager: Manager) -> Text | str: - if manager.parsed_args.cli_only_args.retry_all: - return "--retry-all" - if manager.parsed_args.cli_only_args.retry_failed: - return "--retry-failed" - if manager.parsed_args.cli_only_args.retry_maintenance: - return "--retry-maintenance" - if manager.scrape_mapper.using_input_file: - return get_console_hyperlink(manager.path_manager.input_file) - return "--links (CLI args)" - - -def get_console_hyperlink(file_path: Path, text: str = "") -> Text: - full_path = file_path - show_text = text or full_path - file_url = URL(full_path.as_posix()).with_scheme("file") - return Text(str(show_text), style=f"link {file_url}") - - -def concat_as_text(*text_or_str, style: str = "") -> Text: - result = Text() - for elem in text_or_str: - if isinstance(elem, Text): - text = elem - if style and text.style != style: - text.stylize(f"{style} {text.style}") - else: - text = Text(elem, style=style) - - result.append(text) - return result - - -def log_concat(*text_or_str, style: str = "", **kwargs) -> None: - text = concat_as_text(*text_or_str, style=style) - log_with_color(text, style, **kwargs) diff --git a/cyberdrop_dl/managers/storage_manager.py b/cyberdrop_dl/managers/storage_manager.py index 670b17e13..d51d05805 100644 --- a/cyberdrop_dl/managers/storage_manager.py +++ b/cyberdrop_dl/managers/storage_manager.py @@ -11,6 +11,7 @@ import psutil from pydantic import ByteSize +from cyberdrop_dl import config from cyberdrop_dl.exceptions import InsufficientFreeSpaceError from cyberdrop_dl.utils.logger import log, log_debug @@ -20,7 +21,7 @@ from psutil._ntuples import sdiskpart from cyberdrop_dl.data_structures.url_objects import MediaItem - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager @dataclass(frozen=True, slots=True, order=True) @@ -89,7 +90,6 @@ def _mount_stats(self) -> Generator[MountStats]: async def check_free_space(self, media_item: MediaItem) -> None: """Checks if there is enough free space to download this item.""" - await self.manager.states.RUNNING.wait() if not await self._has_sufficient_space(media_item.download_folder): raise InsufficientFreeSpaceError(origin=media_item) @@ -164,7 +164,7 @@ async def _has_sufficient_space(self, folder: Path) -> bool: free_space = self._free_space[mount] if free_space == -1: return True - return free_space > self.manager.global_config.general.required_free_space + return free_space > config.get().general.required_free_space async def _get_free_space(self, mount: Path) -> int: exc_info = None @@ -201,7 +201,6 @@ async def _check_free_space_loop(self) -> None: last_check = -1 while True: - await self.manager.states.RUNNING.wait() self._updated.clear() last_check += 1 if self._used_mounts: diff --git a/cyberdrop_dl/models/__init__.py b/cyberdrop_dl/models/__init__.py index f34b64ca4..a5d7bbe53 100755 --- a/cyberdrop_dl/models/__init__.py +++ b/cyberdrop_dl/models/__init__.py @@ -1,6 +1,12 @@ +from __future__ import annotations + +from typing import Any, TypeVar + from pydantic import BaseModel -from .base_models import AliasModel, AppriseURLModel, FrozenModel, HttpAppriseURL +from .base import AliasModel, AppriseURLModel, FlatNamespace, FrozenModel, HttpAppriseURL, Settings, SettingsGroup + +M = TypeVar("M", bound=BaseModel) def get_model_fields(model: BaseModel, *, exclude_unset: bool = True) -> set[str]: @@ -12,4 +18,37 @@ def get_model_fields(model: BaseModel, *, exclude_unset: bool = True) -> set[str return fields -__all__ = ["AliasModel", "AppriseURLModel", "FrozenModel", "HttpAppriseURL", "get_model_fields"] +def merge_dicts(dict1: dict[str, Any], dict2: dict[str, Any]) -> dict[str, Any]: + for key, val in dict1.items(): + if isinstance(val, dict): + if key in dict2 and isinstance(dict2[key], dict): + merge_dicts(dict1[key], dict2[key]) + else: + if key in dict2: + dict1[key] = dict2[key] + + for key, val in dict2.items(): + if key not in dict1: + dict1[key] = val + + return dict1 + + +def merge_models(old: M, new: M) -> M: + old.model_copy() + old_values = old.model_dump() + new_values = new.model_dump(exclude_unset=True) + updated_dict = merge_dicts(old_values, new_values) + return old.model_validate(updated_dict) + + +__all__ = [ + "AliasModel", + "AppriseURLModel", + "FlatNamespace", + "FrozenModel", + "HttpAppriseURL", + "Settings", + "SettingsGroup", + "get_model_fields", +] diff --git a/cyberdrop_dl/models/base_models.py b/cyberdrop_dl/models/base.py similarity index 73% rename from cyberdrop_dl/models/base_models.py rename to cyberdrop_dl/models/base.py index af03d43a3..f2381570b 100755 --- a/cyberdrop_dl/models/base_models.py +++ b/cyberdrop_dl/models/base.py @@ -1,9 +1,10 @@ """Pydantic models""" from collections.abc import Iterator, Mapping, Sequence -from typing import TypeVar +from typing import TypeVar, Unpack import yarl +from cyclopts import Parameter from pydantic import ( AnyUrl, BaseModel, @@ -22,11 +23,24 @@ class AliasModel(BaseModel): - model_config = ConfigDict(populate_by_name=True) + model_config = ConfigDict(populate_by_name=True, defer_build=True) class FrozenModel(BaseModel): - model_config = ConfigDict(frozen=True) + model_config = ConfigDict(frozen=True, defer_build=True) + + +@Parameter(name="*") +class FlatNamespace: ... + + +class Settings(FlatNamespace, AliasModel): ... + + +class SettingsGroup(Settings): + def __init_subclass__(cls, name: str | None = None, **kwargs: Unpack[ConfigDict]) -> None: + _ = Parameter(group=name or cls.__name__)(cls) + return super().__init_subclass__(**kwargs) class AppriseURLModel(FrozenModel): diff --git a/cyberdrop_dl/plugins.py b/cyberdrop_dl/plugins.py index 0284045e8..1aa13f933 100644 --- a/cyberdrop_dl/plugins.py +++ b/cyberdrop_dl/plugins.py @@ -25,7 +25,7 @@ from collections.abc import Iterable from importlib.metadata import EntryPoint - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager _GROUP_NAME: Final = "cyberdrop_dl_plugins" diff --git a/cyberdrop_dl/progress/__init__.py b/cyberdrop_dl/progress/__init__.py new file mode 100644 index 000000000..b4181d4c2 --- /dev/null +++ b/cyberdrop_dl/progress/__init__.py @@ -0,0 +1,200 @@ +from __future__ import annotations + +import dataclasses +import logging +import time +from contextlib import asynccontextmanager +from datetime import timedelta +from typing import TYPE_CHECKING, Self + +from pydantic import ByteSize +from rich.columns import Columns +from rich.console import Group, RenderableType +from rich.layout import Layout +from rich.progress import Progress, SpinnerColumn +from rich.text import Text +from yarl import URL + +from cyberdrop_dl import __version__, config +from cyberdrop_dl.progress._common import ProgressProxy +from cyberdrop_dl.progress.errors import DownloadErrors, ScrapeErrors +from cyberdrop_dl.progress.files import FileStats +from cyberdrop_dl.progress.hashing import HashingPanel +from cyberdrop_dl.progress.scrape import DownloadsPanel, ScrapingPanel +from cyberdrop_dl.progress.sorting import SortingPanel + +if TYPE_CHECKING: + from collections.abc import AsyncGenerator + from pathlib import Path + + from cyberdrop_dl.managers import Manager + from cyberdrop_dl.progress.errors import UIFailure + + +logger = logging.getLogger(__name__) + + +class StatusMessage(ProgressProxy): + _columns = ( + SpinnerColumn(style="green", spinner_name="dots"), + "[progress.description]{task.description}", + ) + + def __init__(self) -> None: + super().__init__() + self.activity = Progress(*self._columns) + _ = self.activity.add_task(f"Running Cyberdrop-DL: v{__version__}", total=100, completed=0) + self._task_id = self._progress.add_task("", total=100, completed=0, visible=False) + self._panel = Columns([self.activity, self._progress]) + + def __rich__(self) -> Columns: + return self._panel + + def update(self, description: str | None = None) -> None: + self._progress.update(self._task_id, description=description, visible=bool(description)) + + def __str__(self) -> str: + return self._tasks[self._task_id].description + + def __repr__(self) -> str: + return f"{type(self).__name__}(msg={self!s})" + + @asynccontextmanager + async def show(self, msg: str | None) -> AsyncGenerator[None]: + try: + self.update(msg) + yield + finally: + self.update() + + +@dataclasses.dataclass(slots=True, frozen=True) +class UILayouts: + horizontal: Layout + vertical: Layout + simple: Group + hashing: RenderableType + sorting: RenderableType + + @classmethod + def build(cls, progress: ProgressManager) -> Self: + horizontal = Layout() + vertical = Layout() + + top = ( + Layout(progress.files, ratio=1, minimum_size=9), + Layout(progress.scrape_errors, ratio=1), + Layout(progress.download_errors, ratio=1), + ) + + bottom = ( + Layout(progress.scrape, ratio=20), + Layout(progress.downloads, ratio=20), + Layout(progress.status, ratio=2), + ) + + horizontal.split_column(Layout(name="top", ratio=20), *bottom) + vertical.split_column(Layout(name="top", ratio=60), *bottom) + + horizontal["top"].split_row(*top) + vertical["top"].split_column(*top) + + simple = Group(progress.status.activity, progress.files.simple) + return cls(horizontal, vertical, simple, progress.hashing, progress.sorting) + + +@dataclasses.dataclass(slots=True) +class ProgressManager: + manager: Manager + + portrait: bool + + layouts: UILayouts = dataclasses.field(init=False) + status: StatusMessage = dataclasses.field(default_factory=StatusMessage) + + downloads: DownloadsPanel = dataclasses.field(default_factory=DownloadsPanel) + scrape: ScrapingPanel = dataclasses.field(default_factory=ScrapingPanel) + hashing: HashingPanel = dataclasses.field(default_factory=HashingPanel) + sorting: SortingPanel = dataclasses.field(default_factory=SortingPanel) + + files: FileStats = dataclasses.field(default_factory=FileStats) + download_errors: DownloadErrors = dataclasses.field(default_factory=DownloadErrors) + scrape_errors: ScrapeErrors = dataclasses.field(default_factory=ScrapeErrors) + + def __post_init__(self) -> None: + self.layouts = UILayouts.build(self) + + @property + def layout(self) -> Layout: + if self.portrait: + return self.layouts.vertical + return self.layouts.horizontal + + def print_stats(self, start_time: float) -> None: + """Prints the stats of the program.""" + # if not self.manager.parsed_args.cli_only_args.print_stats: + # return + from cyberdrop_dl.utils.logger import log_spacer + + end_time = time.perf_counter() + runtime = timedelta(seconds=int(end_time - start_time)) + total_data_written = ByteSize(self.manager.storage_manager.total_data_written).human_readable(decimal=True) + + log_spacer(20) + logger.info("Printing Stats...\n") + logger.info("Run Stats") + logger.info(f" Input File: {config.get().source}") + logger.info(f" Input URLs: {self.manager.scrape_mapper.count:,}") + logger.info(f" Input URL Groups: {self.manager.scrape_mapper.group_count:,}") + # logger.info(f" Log Folder: {log_folder_text}") + logger.info(f" Total Runtime: {runtime}") + logger.info(f" Total Downloaded Data: {total_data_written}") + + logger.info("Download Stats:") + logger.info(f" Downloaded: {self.files.completed_files:,} files") + logger.info(f" Skipped (By Config): {self.files.skipped_files:,} files") + logger.info(f" Skipped (Previously Downloaded): {self.files.previously_completed:,} files") + logger.info(f" Failed: {self.download_errors.failed_files:,} files") + + logger.info("Unsupported URLs Stats:") + logger.info(f" Sent to Jdownloader: {self.scrape_errors.sent_to_jdownloader:,}") + logger.info(f" Skipped: {self.scrape_errors.unsupported_urls_skipped:,}") + + self.print_dedupe_stats() + + logger.info("Sort Stats:") + logger.info(f" Audios: {self.sorting.audio_count:,}") + logger.info(f" Images: {self.sorting.image_count:,}") + logger.info(f" Videos: {self.sorting.video_count:,}") + logger.info(f" Other Files: {self.sorting.other_count:,}") + + last_padding = log_failures(self.scrape_errors.return_totals(), "Scrape Failures:") + log_failures(self.download_errors.return_totals(), "Download Failures:", last_padding) + + def print_dedupe_stats(self) -> None: + logger.info("Dupe Stats:") + logger.info(f" Newly Hashed: {self.hashing.hashed_files:,} files") + logger.info(f" Previously Hashed: {self.hashing.prev_hashed_files:,} files") + logger.info(f" Removed (Downloads): {self.hashing.removed_files:,} files") + + +def log_failures(failures: list[UIFailure], title: str = "Failures:", last_padding: int = 0) -> int: + logger.info(title) + if not failures: + logger.info(" None") + return 0 + error_padding = last_padding + error_codes = [f.error_code for f in failures if f.error_code is not None] + if error_codes: + error_padding = max(len(str(max(error_codes))), error_padding) + for f in failures: + error = f.error_code if f.error_code is not None else "" + logger.info(f" {error:>{error_padding}}{' ' if error_padding else ''}{f.msg}: {f.total:,}") + return error_padding + + +def _get_console_hyperlink(file_path: Path, text: str = "") -> Text: + full_path = file_path + show_text = text or full_path + file_url = URL(full_path.as_posix()).with_scheme("file") + return Text(str(show_text), style=f"link {file_url}") diff --git a/cyberdrop_dl/progress/_common.py b/cyberdrop_dl/progress/_common.py new file mode 100644 index 000000000..a91dd6e6e --- /dev/null +++ b/cyberdrop_dl/progress/_common.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +import dataclasses +from typing import TYPE_CHECKING, Self + +if TYPE_CHECKING: + from collections.abc import Callable + + from rich.console import RenderableType + from rich.progress import TaskID + + +from types import MappingProxyType +from typing import TYPE_CHECKING, ClassVar + +from rich.markup import escape +from rich.progress import ( + Progress, + ProgressColumn, + Task, + TaskID, +) + + +def truncate(s: str, length: int = 40, placeholder: str = "...") -> str: + return f"{s[: length - len(placeholder)]}{placeholder}" if len(s) >= length else s.ljust(length) + + +@dataclasses.dataclass(slots=True) +class TaskCounter: + id: TaskID + count: int = 0 + + +@dataclasses.dataclass(slots=True, frozen=True) +class ProgressHook: + advance: Callable[[int], None] + done: Callable[[], None] + speed: Callable[[], float] + + def __enter__(self) -> Self: + return self + + def __exit__(self, *_) -> None: + self.done() + + +class ProgressProxy: + _columns: ClassVar[tuple[ProgressColumn | str, ...]] + + @classmethod + def _clean_task_desc(cls, desc: str) -> str: + return escape(truncate(desc.encode("ascii", "ignore").decode().strip())) + + def __init__(self) -> None: + self._progress: Progress = Progress(*self._columns) + self._tasks: MappingProxyType[TaskID, Task] = MappingProxyType(self._progress._tasks) + self._tasks_map: dict[str, TaskCounter] = {} + + def __rich__(self) -> RenderableType: + return self._progress diff --git a/cyberdrop_dl/progress/errors.py b/cyberdrop_dl/progress/errors.py new file mode 100644 index 000000000..6b8155d72 --- /dev/null +++ b/cyberdrop_dl/progress/errors.py @@ -0,0 +1,153 @@ +from __future__ import annotations + +import dataclasses +import functools + +from rich.panel import Panel +from rich.progress import BarColumn, TaskID + +from cyberdrop_dl.progress._common import ProgressProxy + + +@dataclasses.dataclass(slots=True, order=True) +class UIFailure: + full_msg: str + total: int + error_code: int | None = None + msg: str = dataclasses.field(init=False) + + def __post_init__(self) -> None: + parts = self.full_msg.split(" ", 1) + if len(parts) > 1 and parts[0].isdigit(): + error_code, self.msg = parts + self.error_code = int(error_code) + else: + self.msg = self.full_msg + + +class _ErrorsPanel(ProgressProxy): + """Base class that keeps track of errors and reasons.""" + + _columns = ( + "[progress.description]{task.description}", + BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>6.2f}%", + "━", + "{task.completed:,}", + ) + + def __repr__(self) -> str: + return f"{type(self).__name__}(failed_files={self.failed_files!r}, failures={self._failures.keys()!r})" + + def __init__(self) -> None: + super().__init__() + self.title = type(self).__name__.removesuffix("Errors") + " Failures" + self._failures: dict[str, TaskID] = {} + self.failed_files: int = 0 + self._panel = Panel( + self._progress, + title=self.title, + border_style="green", + padding=(1, 1), + subtitle=self._subtitle, + ) + + @property + def _subtitle(self) -> str: + return f"Total {self.title}: [white]{self.failed_files:,}" + + def __rich__(self) -> Panel: + return self._panel + + def add_failure(self, failure: str) -> None: + self.failed_files += 1 + key = _get_pretty_error(failure) + if (task_id := self._failures.get(key)) is not None: + self._progress.advance(task_id) + else: + self._failures[key] = self._progress.add_task(key, total=self.failed_files, completed=1) + + self._redraw() + + def _redraw(self) -> None: + self._panel.subtitle = self._subtitle + for task_id in self._failures.values(): + self._progress.update(task_id, total=self.failed_files) + + tasks = list(self._tasks.values()) + tasks_sorted = sorted(tasks, key=lambda x: x.completed, reverse=True) + if tasks == tasks_sorted: + return + + for task in tasks_sorted: + self._progress.remove_task(task.id) + self._failures[task.description] = self._progress.add_task( + task.description, + total=task.total, + completed=int(task.completed), + ) + + def return_totals(self) -> list[UIFailure]: + """Returns the total number of failed sites and reasons.""" + + return sorted(UIFailure(msg, int(self._tasks[task_id].completed)) for msg, task_id in self._failures.items()) + + +class DownloadErrors(_ErrorsPanel): + """Class that keeps track of download failures and reasons.""" + + +class ScrapeErrors(_ErrorsPanel): + """Class that keeps track of scraping failures and reasons.""" + + def __init__(self) -> None: + super().__init__() + self.unsupported_urls: int = 0 + self.sent_to_jdownloader: int = 0 + self.unsupported_urls_skipped: int = 0 + + def add_unsupported(self, *, sent_to_jdownloader: bool = False) -> None: + self.unsupported_urls += 1 + if sent_to_jdownloader: + self.sent_to_jdownloader += 1 + else: + self.unsupported_urls_skipped += 1 + + +@functools.cache +def _get_pretty_error(failure: str) -> str: + return _FAILURE_OVERRIDES.get(failure) or _capitalize_words(failure) + + +def _capitalize_words(text: str) -> str: + """Capitalize first letter of each word + + Unlike `str.capwords()`, this only caps the first letter of each word without modifying the rest of the word""" + + def cap(word: str) -> str: + return word[0].capitalize() + word[1:] + + return " ".join([cap(word) for word in text.split()]) + + +_FAILURE_OVERRIDES = { + "ClientConnectorCertificateError": "Client Connector Certificate Error", + "ClientConnectorDNSError": "Client Connector DNS Error", + "ClientConnectorError": "Client Connector Error", + "ClientConnectorSSLError": "Client Connector SSL Error", + "ClientHttpProxyError": "Client HTTP Proxy Error", + "ClientPayloadError": "Client Payload Error", + "ClientProxyConnectionError": "Client Proxy Connection Error", + "ConnectionTimeoutError": "Connection Timeout", + "ContentTypeError": "Content Type Error", + "InvalidURL": "Invalid URL", + "InvalidUrlClientError": "Invalid URL Client Error", + "InvalidUrlRedirectClientError": "Invalid URL Redirect", + "NonHttpUrlRedirectClientError": "Non HTTP URL Redirect", + "RedirectClientError": "Redirect Error", + "ServerConnectionError": "Server Connection Error", + "ServerDisconnectedError": "Server Disconnected", + "ServerFingerprintMismatch": "Server Fingerprint Mismatch", + "ServerTimeoutError": "Server Timeout Error", + "SocketTimeoutError": "Socket Timeout Error", +} diff --git a/cyberdrop_dl/progress/files.py b/cyberdrop_dl/progress/files.py new file mode 100644 index 000000000..293cfcc26 --- /dev/null +++ b/cyberdrop_dl/progress/files.py @@ -0,0 +1,107 @@ +from __future__ import annotations + +from rich.panel import Panel +from rich.progress import BarColumn, Progress + +from cyberdrop_dl.progress._common import ProgressProxy, TaskCounter + + +class FileStats(ProgressProxy): + """Class that keeps track of completed, skipped and failed files.""" + + _columns = ( + "[progress.description]{task.description}", + BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>6.2f}%", + "━", + "{task.completed:,}", + ) + + def __repr__(self) -> str: + return f"{type(self).__name__}({vars(self)!r})" + + def __init__(self) -> None: + super().__init__() + self._total_files: int = 0 + + for name, color, desc in ( + ("completed", "green", "Completed"), + ("previously_completed", "yellow", "Previously Downloaded"), + ("skipped", "yellow", "Skipped By Configuration"), + ("queued", "cyan", "Queued"), + ("failed", "red", "Failed"), + ): + self._tasks_map[name] = TaskCounter(self._progress.add_task(f"[{color}]{desc}", total=0)) + + self.simple: Progress = Progress(*self._columns) + self._tasks_map["simple"] = TaskCounter(self.simple.add_task("Completed", total=0)) + self._panel = Panel( + self._progress, + title="Files", + border_style="green", + padding=(1, 1), + subtitle=self.subtitle, + ) + + def __rich__(self) -> Panel: + return self._panel + + @property + def subtitle(self) -> str: + return f"Total Files: [white]{self._total_files:,}" + + def update_total(self, increase_total: bool = True) -> None: + self._panel.subtitle = self.subtitle + if not increase_total: + return + + self._total_files = self._total_files + 1 + self._progress.update(self._tasks_map["completed"].id, total=self._total_files) + self._progress.update(self._tasks_map["previously_completed"].id, total=self._total_files) + self._progress.update(self._tasks_map["skipped"].id, total=self._total_files) + self._progress.update(self._tasks_map["failed"].id, total=self._total_files) + self._progress.update(self._tasks_map["queued"].id, total=self._total_files) + self.simple.update( + self._tasks_map["simple"].id, + total=self._total_files, + completed=self._total_files - self._tasks_map["queued"].count, + ) + + def add_completed(self) -> None: + self._progress.advance(self._tasks_map["completed"].id) + self._tasks_map["completed"].count += 1 + + def add_previously_completed(self, increase_total: bool = True) -> None: + if increase_total: + self.update_total() + + self._tasks_map["previously_completed"].count += 1 + self._progress.advance(self._tasks_map["previously_completed"].id) + + def add_skipped(self) -> None: + self._progress.advance(self._tasks_map["skipped"].id) + self._tasks_map["skipped"].count += 1 + + def add_failed(self) -> None: + self._progress.advance(self._tasks_map["failed"].id) + self._tasks_map["failed"].count += 1 + + def update_queued(self, count: int) -> None: + self._tasks_map["queued"].count = count + self._progress.update(self._tasks_map["queued"].id, completed=count) + + @property + def skipped_files(self) -> int: + return self._tasks_map["skipped"].count + + @property + def failed_files(self) -> int: + return self._tasks_map["failed"].count + + @property + def completed_files(self) -> int: + return self._tasks_map["completed"].count + + @property + def previously_completed(self) -> int: + return self._tasks_map["previously_completed"].count diff --git a/cyberdrop_dl/progress/hashing.py b/cyberdrop_dl/progress/hashing.py new file mode 100644 index 000000000..28ac2f131 --- /dev/null +++ b/cyberdrop_dl/progress/hashing.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +import asyncio +import contextlib +from contextvars import ContextVar +from pathlib import Path +from typing import TYPE_CHECKING + +from pydantic import ByteSize +from rich.console import Group +from rich.markup import escape +from rich.panel import Panel +from rich.progress import BarColumn, Progress + +from cyberdrop_dl import config + +from ._common import ProgressProxy, TaskCounter + +if TYPE_CHECKING: + from collections.abc import Generator + + +def _get_enabled_hashes(): + yield "xxh128" + if config.get().dupe_cleanup_options.add_md5_hash: + yield "md5" + if config.get().dupe_cleanup_options.add_sha256_hash: + yield "sha256" + + +_base_dir: ContextVar[Path] = ContextVar("_base_dir") + + +class HashingPanel(ProgressProxy): + """Class that keeps track of hashed files.""" + + _columns = ("{task.description}",) + + def __init__(self) -> None: + super().__init__() + self._hash_progress = Progress( + "[progress.description]{task.description}", BarColumn(bar_width=None), "{task.completed:,}" + ) + self._enabled_hashes: tuple[str, ...] = tuple(_get_enabled_hashes()) + self._computed_hashes: int = 0 + self._prev_hashed: int = 0 + + for hash_type in self._enabled_hashes: + desc = "[green]Hashed " + escape(f"[{hash_type}]") + self._tasks_map[hash_type] = TaskCounter(self._hash_progress.add_task(desc, total=None)) + + self._tasks_map.update( + prev_hashed=TaskCounter(self._hash_progress.add_task("[green]Previously Hashed", total=None)), + removed=TaskCounter(self._progress.add_task("", visible=False)), + base_dir=TaskCounter(self._progress.add_task("")), + file=TaskCounter(self._progress.add_task("")), + ) + + self._panel = Panel( + Group(self._progress, self._hash_progress), + title="Hashing", + border_style="green", + padding=(1, 1), + ) + + def __rich__(self) -> Panel: + return self._panel + + @property + def hashed_files(self) -> int: + return int(self._computed_hashes / len(self._enabled_hashes)) + + @property + def prev_hashed_files(self) -> int: + return int(self._prev_hashed / len(self._enabled_hashes)) + + @property + def removed_files(self) -> int: + return self._tasks_map["removed"].count + + @contextlib.contextmanager + def currently_hashing_dir(self, path: Path) -> Generator[None]: + token = _base_dir.set(path) + desc = "[green]Base dir: [blue]" + escape(str(path)) + self._progress.update(self._tasks_map["base_dir"].id, description=desc) + try: + yield + finally: + _base_dir.reset(token) + self._progress.update(self._tasks_map["base_dir"].id, description="") + + async def update_currently_hashing(self, file: Path | str) -> None: + file = Path(file) + size = await asyncio.to_thread(lambda *_: file.stat().st_size) + size_text = ByteSize(size).human_readable(decimal=True) + path = file.relative_to(_base_dir.get()) + self._progress.update( + self._tasks_map["file"].id, + description="[green]Current file: [blue]" + escape(f"{path}") + f" [green]({size_text})", + ) + + def add_new_completed_hash(self, hash_type: str) -> None: + self._hash_progress.advance(self._tasks_map[hash_type].id) + self._tasks_map[hash_type].count += 1 + + def add_prev_hash(self) -> None: + self._hash_progress.advance(self._tasks_map["prev_hashed"].id) + self._tasks_map["prev_hashed"].count += 1 + + def add_removed_file(self) -> None: + self._hash_progress.advance(self._tasks_map["removed"].id) + self._tasks_map["removed"].count += 1 diff --git a/cyberdrop_dl/progress/scrape.py b/cyberdrop_dl/progress/scrape.py new file mode 100644 index 000000000..448fb3a70 --- /dev/null +++ b/cyberdrop_dl/progress/scrape.py @@ -0,0 +1,154 @@ +from __future__ import annotations + +from contextvars import ContextVar +from typing import TYPE_CHECKING, ClassVar + +from rich.console import Group +from rich.panel import Panel +from rich.progress import ( + BarColumn, + DownloadColumn, + SpinnerColumn, + TaskID, + TimeRemainingColumn, + TransferSpeedColumn, +) + +from cyberdrop_dl.progress._common import ProgressHook, ProgressProxy + +if TYPE_CHECKING: + from yarl import URL + +_COLOR: str = "plum3" + +_links: ContextVar[ProgressHook] = ContextVar("_links") +_downloads: ContextVar[ProgressHook] = ContextVar("_downloads") + + +class OverFlow(ProgressProxy): + _desc: ClassVar[str] = "[{color}]... and {number:,} other {name}" + _columns = ("[progress.description]{task.description}",) + + def __init__(self, name: str) -> None: + super().__init__() + self.name: str = name + self._count: int = 0 + self._task_id: TaskID = self._progress.add_task(str(self), visible=False) + + def __str__(self) -> str: + return self._desc.format(color=_COLOR, number=self._count, name=self.name) + + def __repr__(self) -> str: + return f"{type(self).__name__}(desc={self!s})" + + def update(self, count: int) -> None: + self._count = count + self._progress.update(self._task_id, description=str(self), visible=count > 0) + + +class UIPanel(ProgressProxy): + unit: ClassVar[str] + _desc_fmt: ClassVar[str] = "[{color}]{description}" + + def __repr__(self) -> str: + return f"{type(self).__name__}(progress={self._progress!r})" + + def __init__(self, visible_tasks_limit: int) -> None: + super().__init__() + self.title = type(self).__name__.removesuffix("Panel") + self._overflow = OverFlow(self.unit) + self._limit = visible_tasks_limit + self._panel = Panel( + Group(self._progress, self._overflow), + title=self.title, + border_style="green", + padding=(1, 1), + ) + + def __rich__(self) -> Panel: + return self._panel + + def _redraw(self) -> None: + self._overflow.update(count=len(self._tasks) - self._limit) + + def _add_task(self, description: str, total: float | None = None) -> TaskID: + task_id = self._progress.add_task( + self._desc_fmt.format(color=_COLOR, description=description), + total=total, + visible=len(self._tasks) < self._limit, + ) + self._redraw() + return task_id + + def remove_task(self, task_id: TaskID) -> None: + self._progress.remove_task(task_id) + self._redraw() + + def new_hook(self, description: object, total: float | None = None) -> ProgressHook: + task_id = self._add_task(str(description), total) + + def advance(amount: int) -> None: + self._advance(task_id, amount) + + def done() -> None: + self.remove_task(task_id) + + def speed() -> float: + return self.get_speed(task_id) + + return ProgressHook(advance, done, speed) + + def _advance(self, task_id: TaskID, amount: int) -> None: + self._progress.advance(task_id, amount) + + def get_speed(self, task_id: TaskID) -> float: + task = self._tasks[task_id] + return task.finished_speed or task.speed or 0 + + +class ScrapingPanel(UIPanel): + unit: ClassVar[str] = "URLs" + _columns = SpinnerColumn(), "[progress.description]{task.description}" + + def __init__(self) -> None: + super().__init__(visible_tasks_limit=5) + + def new_task(self, url: URL) -> TaskID: # type: ignore[reportIncompatibleMethodOverride] + return self._add_task(str(url)) + + +class DownloadsPanel(UIPanel): + unit: ClassVar[str] = "files" + _columns = ( + SpinnerColumn(), + "[progress.description]{task.description}", + BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>6.2f}%", + "━", + DownloadColumn(), + "━", + TransferSpeedColumn(), + "━", + TimeRemainingColumn(), + ) + + def __init__(self) -> None: + self.total_data_written: int = 0 + super().__init__(visible_tasks_limit=10) + + @property + def current_hook(self) -> ProgressHook: + return _downloads.get() + + def new(self, filename: str, size: float | None = None) -> ProgressHook: + description = self._clean_task_desc(str(filename).rsplit("/", 1)[-1]) + hook = self.new_hook(description, size) + _ = _downloads.set(hook) + return hook + + def _advance(self, task_id: TaskID, amount: int) -> None: + self.total_data_written += amount + super()._advance(task_id, amount) + + def advance_file(self, task_id: TaskID, amount: int) -> None: + self._advance(task_id, amount) diff --git a/cyberdrop_dl/progress/sorting.py b/cyberdrop_dl/progress/sorting.py new file mode 100644 index 000000000..2d4128f48 --- /dev/null +++ b/cyberdrop_dl/progress/sorting.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from typing import ClassVar + +from rich.progress import BarColumn, SpinnerColumn, TaskID + +from cyberdrop_dl.progress.scrape import UIPanel + + +class SortingPanel(UIPanel): + """Class that keeps track of sorted files.""" + + unit: ClassVar[str] = "Folders" + _columns = ( + SpinnerColumn(), + "[progress.description]{task.description}", + BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>6.2f}%", + "━", + "{task.completed}/{task.total} files", + ) + + def __init__(self) -> None: + super().__init__(visible_tasks_limit=1) + self.audio_count = self.video_count = self.image_count = self.other_count = 0 + + def new_task(self, folder: str, expected_size: int | None) -> TaskID: + description = self._clean_task_desc(folder) + return super()._add_task(description, expected_size) + + def advance_folder(self, task_id: TaskID, amount: int = 1) -> None: + self._advance(task_id, amount) + + def increment_audio(self) -> None: + self.audio_count += 1 + + def increment_video(self) -> None: + self.video_count += 1 + + def increment_image(self) -> None: + self.image_count += 1 + + def increment_other(self) -> None: + self.other_count += 1 diff --git a/cyberdrop_dl/scraper/scrape_mapper.py b/cyberdrop_dl/scrape_mapper.py similarity index 76% rename from cyberdrop_dl/scraper/scrape_mapper.py rename to cyberdrop_dl/scrape_mapper.py index 594817cb7..6a1a68408 100644 --- a/cyberdrop_dl/scraper/scrape_mapper.py +++ b/cyberdrop_dl/scrape_mapper.py @@ -2,14 +2,15 @@ import asyncio import contextlib +import datetime import re -from datetime import date, datetime from pathlib import Path from typing import TYPE_CHECKING, Literal, Self import aiofiles -from yarl import URL +from cyberdrop_dl import config +from cyberdrop_dl.clients.jdownloader import JDownloader from cyberdrop_dl.constants import REGEX_LINKS, BlockedDomains from cyberdrop_dl.crawlers._chevereto import CheveretoCrawler from cyberdrop_dl.crawlers.crawler import Crawler, create_crawlers @@ -19,25 +20,39 @@ from cyberdrop_dl.crawlers.wordpress import WordPressHTMLCrawler, WordPressMediaCrawler from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, ScrapeItem from cyberdrop_dl.exceptions import JDownloaderError, NoExtensionError -from cyberdrop_dl.scraper.filters import is_in_domain_list, is_outside_date_range, is_valid_url -from cyberdrop_dl.scraper.jdownloader import JDownloader from cyberdrop_dl.utils.logger import log, log_spacer from cyberdrop_dl.utils.utilities import get_download_path, remove_trailing_slash if TYPE_CHECKING: - from collections.abc import AsyncGenerator, Generator + from collections.abc import AsyncGenerator, Generator, Sequence import aiosqlite - from cyberdrop_dl.config.global_model import GenericCrawlerInstances, GlobalSettings + from cyberdrop_dl.config.settings import GenericCrawlerInstances from cyberdrop_dl.crawlers import Crawler - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager existing_crawlers: dict[str, Crawler] = {} _seen_urls: set[AbsoluteHttpURL] = set() _crawlers_disabled_at_runtime: set[str] = set() +def is_outside_date_range(scrape_item: ScrapeItem, before: datetime.date | None, after: datetime.date | None) -> bool: + skip = False + item_date = scrape_item.completed_at or scrape_item.created_at + if not item_date: + return False + date = datetime.datetime.fromtimestamp(item_date).date() + if (after and date < after) or (before and date > before): + skip = True + + return skip + + +def is_in_domain_list(scrape_item: ScrapeItem, domain_list: Sequence[str]) -> bool: + return any(domain in scrape_item.url.host for domain in domain_list) + + class ScrapeMapper: """This class maps links to their respective handlers, or JDownloader if they are unsupported.""" @@ -45,8 +60,8 @@ def __init__(self, manager: Manager) -> None: self.manager = manager self.existing_crawlers: dict[str, Crawler] = {} self.direct_crawler = DirectHttpFile(self.manager) - self.jdownloader = JDownloader(self.manager) - self.jdownloader_whitelist = self.manager.config_manager.settings_data.runtime_options.jdownloader_whitelist + self.jdownloader = JDownloader.new(config.get()) + self.jdownloader_whitelist = config.get().runtime_options.jdownloader_whitelist self.using_input_file = False self.groups = set() self.count = 0 @@ -58,23 +73,15 @@ def __init__(self, manager: Manager) -> None: def group_count(self) -> int: return len(self.groups) - @property - def global_settings(self) -> GlobalSettings: - return self.manager.config_manager.global_settings_data - - @property - def enable_generic_crawler(self) -> bool: - return self.global_settings.general.enable_generic_crawler - def start_scrapers(self) -> None: """Starts all scrapers.""" from cyberdrop_dl import plugins self.existing_crawlers = get_crawlers_mapping(self.manager) - generic_crawlers = create_generic_crawlers_by_config(self.global_settings.generic_crawlers_instances) + generic_crawlers = create_generic_crawlers_by_config(config.get().generic_crawlers_instances) for crawler in generic_crawlers: register_crawler(self.existing_crawlers, crawler(self.manager), from_user=True) - disable_crawlers_by_config(self.existing_crawlers, self.global_settings.general.disable_crawlers) + disable_crawlers_by_config(self.existing_crawlers, config.get().general.disable_crawlers) plugins.load(self.manager) async def start_real_debrid(self) -> None: @@ -99,32 +106,19 @@ async def run(self) -> None: """Starts the orchestra.""" self.start_scrapers() await self.manager.db_manager.history_table.update_previously_unsupported(self.existing_crawlers) - self.jdownloader.connect() + await self.jdownloader.connect() await self.start_real_debrid() self.direct_crawler._init_downloader() async for item in self.get_input_items(): self.manager.task_group.create_task(self.send_to_crawler(item)) - async def get_input_items(self) -> AsyncGenerator[ScrapeItem]: - item_limit = 0 - if self.manager.parsed_args.cli_only_args.retry_any and self.manager.parsed_args.cli_only_args.max_items_retry: - item_limit = self.manager.parsed_args.cli_only_args.max_items_retry - - if self.manager.parsed_args.cli_only_args.retry_failed: - items_generator = self.load_failed_links() - elif self.manager.parsed_args.cli_only_args.retry_all: - items_generator = self.load_all_links() - elif self.manager.parsed_args.cli_only_args.retry_maintenance: - items_generator = self.load_all_bunkr_failed_links_via_hash() - else: - items_generator = self.load_links() + async def get_input_items(self, input_file) -> AsyncGenerator[ScrapeItem]: + items_generator = self.load_links(input_file) + children_limits = config.get().download_options.maximum_number_of_children async for item in items_generator: - await self.manager.states.RUNNING.wait() - item.children_limits = self.manager.config_manager.settings_data.download_options.maximum_number_of_children - if self.filter_items(item): - if item_limit and self.count >= item_limit: - break + item.children_limits = children_limits + if self.should_scrape(item): yield item self.count += 1 @@ -133,9 +127,9 @@ async def get_input_items(self) -> AsyncGenerator[ScrapeItem]: """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - async def parse_input_file_groups(self) -> AsyncGenerator[tuple[str, list[AbsoluteHttpURL]]]: + async def parse_input_file_groups(self, input_file) -> AsyncGenerator[tuple[str, list[AbsoluteHttpURL]]]: """Split URLs from input file by their groups.""" - input_file = self.manager.path_manager.input_file + if not await asyncio.to_thread(input_file.is_file): yield ("", []) return @@ -158,10 +152,10 @@ async def parse_input_file_groups(self) -> AsyncGenerator[tuple[str, list[Absolu # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`` - async def load_links(self) -> AsyncGenerator[ScrapeItem]: + async def load_links(self, source: list[AbsoluteHttpURL] | Path) -> AsyncGenerator[ScrapeItem]: """Loads links from args / input file.""" - if not self.manager.parsed_args.cli_only_args.links: + if isinstance(source, Path): self.using_input_file = True async for group_name, urls in self.parse_input_file_groups(): for url in urls: @@ -175,7 +169,7 @@ async def load_links(self) -> AsyncGenerator[ScrapeItem]: return - for url in self.manager.parsed_args.cli_only_args.links: + for url in source: yield ScrapeItem(url=url) async def load_failed_links(self) -> AsyncGenerator[ScrapeItem]: @@ -184,14 +178,6 @@ async def load_failed_links(self) -> AsyncGenerator[ScrapeItem]: for row in rows: yield _create_item_from_row(row) - async def load_all_links(self) -> AsyncGenerator[ScrapeItem]: - """Loads all links from database.""" - after = self.manager.parsed_args.cli_only_args.completed_after or date.min - before = self.manager.parsed_args.cli_only_args.completed_before or datetime.now().date() - async for rows in self.manager.db_manager.history_table.get_all_items(after, before): - for row in rows: - yield _create_item_from_row(row) - async def load_all_bunkr_failed_links_via_hash(self) -> AsyncGenerator[ScrapeItem]: """Loads all bunkr links with maintenance hash.""" async for rows in self.manager.db_manager.history_table.get_all_bunkr_failed(): @@ -202,9 +188,7 @@ async def load_all_bunkr_failed_links_via_hash(self) -> AsyncGenerator[ScrapeIte async def filter_and_send_to_crawler(self, scrape_item: ScrapeItem) -> None: """Send scrape_item to a supported crawler.""" - if not isinstance(scrape_item.url, URL): - scrape_item.url = AbsoluteHttpURL(scrape_item.url) - if self.filter_items(scrape_item): + if self.should_scrape(scrape_item): await self.send_to_crawler(scrape_item) async def send_to_crawler(self, scrape_item: ScrapeItem) -> None: @@ -233,13 +217,13 @@ async def send_to_crawler(self, scrape_item: ScrapeItem) -> None: except (NoExtensionError, ValueError): pass - if self.jdownloader.enabled and jdownloader_whitelisted: + if self.jdownloader._enabled and jdownloader_whitelisted: log(f"Sending unsupported URL to JDownloader: {scrape_item.url}", 20) success = False try: download_folder = get_download_path(self.manager, scrape_item, "jdownloader") - relative_download_dir = download_folder.relative_to(self.manager.path_manager.download_folder) - self.jdownloader.direct_unsupported_to_jdownloader( + relative_download_dir = download_folder.relative_to(config.get().files.download_folder) + self.jdownloader.send( scrape_item.url, scrape_item.parent_title, relative_download_dir, @@ -247,27 +231,20 @@ async def send_to_crawler(self, scrape_item: ScrapeItem) -> None: success = True except JDownloaderError as e: log(f"Failed to send {scrape_item.url} to JDownloader\n{e.message}", 40) - self.manager.log_manager.write_unsupported_urls_log( - scrape_item.url, - scrape_item.parents[0] if scrape_item.parents else None, - ) - self.manager.progress_manager.scrape_stats_progress.add_unsupported(sent_to_jdownloader=success) + self.manager.logs.write_unsupported(scrape_item.url, scrape_item) + self.manager.progress_manager.scrape_errors.add_unsupported(sent_to_jdownloader=success) return log(f"Unsupported URL: {scrape_item.url}", 30) - self.manager.log_manager.write_unsupported_urls_log( - scrape_item.url, - scrape_item.parents[0] if scrape_item.parents else None, - ) - self.manager.progress_manager.scrape_stats_progress.add_unsupported() + self.manager.logs.write_unsupported(scrape_item.url, scrape_item) + self.manager.progress_manager.scrape_errors.add_unsupported() - def filter_items(self, scrape_item: ScrapeItem) -> bool: + def should_scrape(self, scrape_item: ScrapeItem) -> bool: """Pre-filter scrape items base on URL.""" - if not is_valid_url(scrape_item): - return False if scrape_item.url in _seen_urls: return False + _seen_urls.add(scrape_item.url) if ( @@ -277,18 +254,12 @@ def filter_items(self, scrape_item: ScrapeItem) -> bool: log(f"Skipping {scrape_item.url} as it is a blocked domain", 10) return False - before = self.manager.parsed_args.cli_only_args.completed_before - after = self.manager.parsed_args.cli_only_args.completed_after - if is_outside_date_range(scrape_item, before, after): - log(f"Skipping {scrape_item.url} as it is outside of the desired date range", 10) - return False - - skip_hosts = self.manager.config_manager.settings_data.ignore_options.skip_hosts + skip_hosts = config.get().ignore_options.skip_hosts if skip_hosts and is_in_domain_list(scrape_item, skip_hosts): log(f"Skipping URL by skip_hosts config: {scrape_item.url}", 10) return False - only_hosts = self.manager.config_manager.settings_data.ignore_options.only_hosts + only_hosts = config.get().ignore_options.only_hosts if only_hosts and not is_in_domain_list(scrape_item, only_hosts): log(f"Skipping URL by only_hosts config: {scrape_item.url}", 10) return False @@ -342,9 +313,9 @@ def _create_item_from_row(row: aiosqlite.Row) -> ScrapeItem: url = AbsoluteHttpURL(referer, encoded="%" in referer) item = ScrapeItem(url=url, retry_path=Path(row["download_path"]), part_of_album=True) if completed_at := row["completed_at"]: - item.completed_at = int(datetime.fromisoformat(completed_at).timestamp()) + item.completed_at = int(datetime.datetime.fromisoformat(completed_at).timestamp()) if created_at := row["created_at"]: - item.created_at = int(datetime.fromisoformat(created_at).timestamp()) + item.created_at = int(datetime.datetime.fromisoformat(created_at).timestamp()) return item diff --git a/cyberdrop_dl/scraper/__init__.py b/cyberdrop_dl/scraper/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/cyberdrop_dl/scraper/filters.py b/cyberdrop_dl/scraper/filters.py deleted file mode 100644 index 7c6dc8993..000000000 --- a/cyberdrop_dl/scraper/filters.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import annotations - -import datetime -from typing import TYPE_CHECKING - -from yarl import URL - -from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL - -if TYPE_CHECKING: - from collections.abc import Sequence - - from cyberdrop_dl.data_structures.url_objects import ScrapeItem - - -def is_valid_url(scrape_item: ScrapeItem) -> bool: - if not scrape_item.url: - return False - if not isinstance(scrape_item.url, URL): - try: - scrape_item.url = AbsoluteHttpURL(scrape_item.url) - except AttributeError: - return False - try: - if not scrape_item.url.host: - return False - except AttributeError: - return False - - return True - - -def is_outside_date_range(scrape_item: ScrapeItem, before: datetime.date | None, after: datetime.date | None) -> bool: - skip = False - item_date = scrape_item.completed_at or scrape_item.created_at - if not item_date: - return False - date = datetime.datetime.fromtimestamp(item_date).date() - if (after and date < after) or (before and date > before): - skip = True - - return skip - - -def is_in_domain_list(scrape_item: ScrapeItem, domain_list: Sequence[str]) -> bool: - return any(domain in scrape_item.url.host for domain in domain_list) diff --git a/cyberdrop_dl/scraper/jdownloader.py b/cyberdrop_dl/scraper/jdownloader.py deleted file mode 100644 index d3c9fd94d..000000000 --- a/cyberdrop_dl/scraper/jdownloader.py +++ /dev/null @@ -1,98 +0,0 @@ -from __future__ import annotations - -import dataclasses -from typing import TYPE_CHECKING - -from myjdapi import myjdapi - -from cyberdrop_dl.exceptions import JDownloaderError -from cyberdrop_dl.utils.logger import log - -if TYPE_CHECKING: - from pathlib import Path - - from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL - from cyberdrop_dl.managers.manager import Manager - - -@dataclasses.dataclass(slots=True) -class JDownloaderConfig: - enabled: bool - username: str - password: str - device: str - download_dir: Path - autostart: bool - - @staticmethod - def from_manager(manager: Manager) -> JDownloaderConfig: - download_dir = manager.config.runtime_options.jdownloader_download_dir or manager.path_manager.download_folder - return JDownloaderConfig( - enabled=manager.config.runtime_options.send_unsupported_to_jdownloader, - device=manager.auth_config.jdownloader.device, - username=manager.auth_config.jdownloader.username, - password=manager.auth_config.jdownloader.password, - download_dir=download_dir.resolve(), - autostart=manager.config.runtime_options.jdownloader_autostart, - ) - - -class JDownloader: - """Class that handles connecting and passing links to JDownloader.""" - - def __init__(self, options: Manager | JDownloaderConfig, /) -> None: - if isinstance(options, JDownloaderConfig): - self._config = options - else: - self._config = JDownloaderConfig.from_manager(options) - self.enabled = self._config.enabled - self._agent = None - - def _connect(self) -> None: - if not all((self._config.username, self._config.password, self._config.device)): - raise JDownloaderError("JDownloader credentials were not provided.") - jd = myjdapi.Myjdapi() - jd.set_app_key("CYBERDROP-DL") - jd.connect(self._config.username, self._config.password) - self._agent = jd.get_device(self._config.device) - - def connect(self) -> None: - if not self.enabled or self._agent is not None: - return - try: - return self._connect() - except JDownloaderError as e: - msg = e.message - except myjdapi.MYJDDeviceNotFoundException: - msg = f"Device not found ({self._config.device})" - except myjdapi.MYJDApiException as e: - msg = e - - log(f"Failed to connect to jDownloader: {msg}", 40) - self.enabled = False - - def direct_unsupported_to_jdownloader( - self, - url: AbsoluteHttpURL, - title: str, - relative_download_path: Path | None = None, - ) -> None: - """Sends links to JDownloader.""" - try: - assert self._agent is not None - download_folder = self._config.download_dir - if relative_download_path: - download_folder = download_folder / relative_download_path - self._agent.linkgrabber.add_links( - [ - { - "autostart": self._config.autostart, - "links": str(url), - "packageName": title if title else "Cyberdrop-DL", - "destinationFolder": str(download_folder), - "overwritePackagizerRules": True, - }, - ], - ) - except (AssertionError, myjdapi.MYJDException) as e: - raise JDownloaderError(str(e)) from e diff --git a/cyberdrop_dl/ui/__init__.py b/cyberdrop_dl/ui/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/cyberdrop_dl/ui/program_ui.py b/cyberdrop_dl/ui/program_ui.py deleted file mode 100644 index d7d113b73..000000000 --- a/cyberdrop_dl/ui/program_ui.py +++ /dev/null @@ -1,296 +0,0 @@ -from __future__ import annotations - -import asyncio -import sqlite3 -import sys -from functools import wraps -from typing import TYPE_CHECKING, Any, ParamSpec, TypeVar - -from requests import request -from rich.console import Console -from rich.markdown import Markdown -from rich.text import Text - -from cyberdrop_dl.clients.hash_client import hash_directory_scanner -from cyberdrop_dl.dependencies import browser_cookie3 -from cyberdrop_dl.ui.prompts import user_prompts -from cyberdrop_dl.ui.prompts.basic_prompts import ask_dir_path, enter_to_continue -from cyberdrop_dl.ui.prompts.defaults import DONE_CHOICE, EXIT_CHOICE -from cyberdrop_dl.utils.cookie_management import clear_cookies -from cyberdrop_dl.utils.sorting import Sorter -from cyberdrop_dl.utils.text_editor import open_in_text_editor -from cyberdrop_dl.utils.updates import check_latest_pypi -from cyberdrop_dl.utils.utilities import clear_term - -if TYPE_CHECKING: - from collections.abc import Callable - from pathlib import Path - - from InquirerPy.base.control import Choice - - from cyberdrop_dl.managers.manager import Manager - -P = ParamSpec("P") -R = TypeVar("R") - -console = Console() -ERROR_PREFIX = Text("ERROR: ", style="bold red") - - -def repeat_until_done(func: Callable[P, R]) -> Callable[P, R]: - @wraps(func) - def wrapper(*args, **kwargs) -> R: - done = False - while not done: - done = func(*args, **kwargs) - return done - - return wrapper - - -class ProgramUI: - def __init__(self, manager: Manager, run: bool = True) -> None: - self.manager = manager - if run: - self.run() - - @staticmethod - def print_error(msg: str, critical: bool = False) -> None: - text = ERROR_PREFIX + msg - console.print(text, style="bold red" if critical else None) - if critical: - sys.exit(1) - enter_to_continue() - - @repeat_until_done - def run(self) -> bool | None: - """Program UI.""" - clear_term() - options_map = { - 1: self._download, - 2: self._retry_failed_download, - 3: self._scan_and_create_hashes, - 4: self._sort_files, - 5: self._edit_urls, - 6: self._change_config, - 7: self._manage_configs, - 8: self._check_updates, - 9: self._view_changelog, - } - - answer = user_prompts.main_prompt(self.manager) - result = self._process_answer(answer, options_map) - return_to_main = result and result != DONE_CHOICE - if return_to_main: - clear_term() - return return_to_main - - def _download(self) -> bool: - """Starts download process.""" - return True - - def _retry_failed_download(self) -> bool: - """Sets retry failed and starts download process.""" - self.manager.parsed_args.cli_only_args.retry_failed = True - return True - - def _scan_and_create_hashes(self) -> None: - """Scans a folder and creates hashes for all of its files.""" - path = ask_dir_path("Select the directory to scan", default=str(self.manager.path_manager.download_folder)) - hash_directory_scanner(self.manager, path) - - def _sort_files(self) -> None: - """Sort files in download folder""" - sorter = Sorter(self.manager) - asyncio.run(sorter.run()) - - def _check_updates(self) -> None: - """Checks Cyberdrop-DL updates.""" - check_latest_pypi(logging="CONSOLE") - enter_to_continue() - - def _change_config(self) -> None: - configs = self.manager.config_manager.get_configs() - selected_config = user_prompts.select_config(configs) - self.manager.config_manager.change_config(selected_config) - if user_prompts.switch_default_config_to(self.manager, selected_config): - self.manager.config_manager.change_default_config(selected_config) - self.manager.config_manager.change_config(selected_config) - - def _view_changelog(self) -> None: - clear_term() - changelog_content = self._get_changelog() - if not changelog_content: - return - with console.pager(links=True): - console.print(Markdown(changelog_content, justify="left")) - - @repeat_until_done - def _manage_configs(self) -> Choice | None: - options_map = { - 1: self._change_default_config, - 2: self._create_new_config, - 3: self._delete_config, - 4: self._edit_config, - 5: self._edit_auth_config, - 6: self._edit_global_config, - 7: self._edit_auto_cookies_extration, - 8: self._import_cookies_now, - 9: self._clear_cookies, - 10: self._clear_cache, - } - answer = user_prompts.manage_configs(self.manager) - return self._process_answer(answer, options_map) - - def _clear_cookies(self) -> None: - domains, _ = user_prompts.domains_prompt(domain_message="Select site(s) to clear cookies for:") - clear_cookies(self.manager, domains) - console.print("Finished clearing cookies", style="green") - enter_to_continue() - - def _clear_cache(self) -> None: - domains, _ = user_prompts.domains_prompt(domain_message="Select site(s) to clear cache for:") - if not domains: - console.print("No domains selected", style="red") - enter_to_continue() - return - urls = user_prompts.filter_cache_urls(self.manager, domains) - for url in urls: - asyncio.run(self.manager.cache_manager.request_cache.delete_url(url)) - - console.print("\nExecuting database vacuum. This may take several minutes, please wait...") - try: - vacuum_database(self.manager.path_manager.cache_db) - except sqlite3.Error as e: - return self.print_error(f"Unable to clean request database. Database may be corrupted : {e!s}") - console.print("Finished clearing the cache", style="green") - enter_to_continue() - - def _edit_auth_config(self) -> None: - config_file = self.manager.config_manager.authentication_settings - self._open_in_text_editor(config_file) - - def _edit_global_config(self) -> None: - config_file = self.manager.config_manager.global_settings - self._open_in_text_editor(config_file) - - def _edit_config(self) -> None: - config_file = self.manager.config_manager.settings - self._open_in_text_editor(config_file) - - def _create_new_config(self) -> None: - config_name = user_prompts.create_new_config(self.manager) - if not config_name: - return - if user_prompts.switch_default_config_to(self.manager, config_name): - self.manager.config_manager.change_default_config(config_name) - self.manager.config_manager.change_config(config_name) - config_file = self.manager.config_manager.settings - self._open_in_text_editor(config_file) - - def _edit_urls(self) -> None: - self._open_in_text_editor(self.manager.path_manager.input_file, reload_config=False) - - def _change_default_config(self) -> None: - configs = self.manager.config_manager.get_configs() - selected_config = user_prompts.select_config(configs) - self.manager.config_manager.change_default_config(selected_config) - if user_prompts.activate_config(self.manager, selected_config) is not None: - self.manager.config_manager.change_config(selected_config) - - def _delete_config(self) -> None: - configs = self.manager.config_manager.get_configs() - if len(configs) == 1: - self.print_error("There is only one config") - return - - selected_config = user_prompts.select_config(configs) - if selected_config == self.manager.config_manager.loaded_config: - self.print_error("You cannot delete the currently active config") - return - - if self.manager.cache_manager.get("default_config") == selected_config: - self.print_error("You cannot delete the default config") - return - - self.manager.config_manager.delete_config(selected_config) - if user_prompts.switch_default_config(): - self._change_default_config() - - def _edit_auto_cookies_extration(self) -> None: - user_prompts.auto_cookie_extraction(self.manager) - - def _import_cookies_now(self) -> None: - try: - user_prompts.extract_cookies(self.manager) - except browser_cookie3.BrowserCookieError as e: - self.print_error(str(e)) - - def _place_holder(self) -> None: - self.print_error("Option temporarily disabled on this version") - - def _open_in_text_editor(self, file_path: Path, *, reload_config: bool = True): - try: - open_in_text_editor(file_path) - except ValueError as e: - self.print_error(str(e)) - return - if reload_config: - console.print("Revalidating config, please wait..") - self.manager.config_manager.change_config(self.manager.config_manager.loaded_config) - - def _process_answer(self, answer: Any, options_map: dict) -> Choice | None: - """Checks prompt answer and executes corresponding function.""" - if answer == EXIT_CHOICE.value: - asyncio.run(self.manager.cache_manager.close()) - sys.exit(0) - if answer == DONE_CHOICE.value: - return DONE_CHOICE - - function_to_call = options_map.get(answer) - if not function_to_call: - self.print_error("Something went wrong. Please report it to the developer", critical=True) - sys.exit(1) - - return function_to_call() - - def _get_changelog(self) -> str | None: - """Get latest changelog file from github. Returns its content.""" - path = self.manager.path_manager.config_folder.parent / "CHANGELOG.md" - url = "https://raw.githubusercontent.com/NTFSvolume/cdl/refs/heads/master/CHANGELOG.md" - _, latest_version = check_latest_pypi(logging="OFF") - if not latest_version: - self.print_error("UNABLE TO GET LATEST VERSION INFORMATION") - return None - - name = f"{path.stem}_{latest_version}{path.suffix}" - changelog = path.with_name(name) - if not changelog.is_file(): - changelog_pattern = f"{path.stem}*{path.suffix}" - for old_changelog in path.parent.glob(changelog_pattern): - old_changelog.unlink() - try: - with request("GET", url, timeout=15) as response: - response.raise_for_status() - with changelog.open("wb") as f: - f.write(response.content) - except Exception: - self.print_error("UNABLE TO GET CHANGELOG INFORMATION") - return None - - lines = changelog.read_text(encoding="utf8").splitlines() - # remove keep_a_changelog disclaimer - return "\n".join(lines[:21] + lines[25:]) - - -def vacuum_database(db_path: Path) -> None: - if not db_path.is_file(): - return - conn = None - try: - conn = sqlite3.connect(db_path) - conn.execute("VACUUM") - conn.commit() - finally: - if conn: - conn.close() diff --git a/cyberdrop_dl/ui/progress/__init__.py b/cyberdrop_dl/ui/progress/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/cyberdrop_dl/ui/progress/deque_progress.py b/cyberdrop_dl/ui/progress/deque_progress.py deleted file mode 100644 index ec1645501..000000000 --- a/cyberdrop_dl/ui/progress/deque_progress.py +++ /dev/null @@ -1,118 +0,0 @@ -from __future__ import annotations - -from abc import ABC, abstractmethod -from collections import deque -from itertools import islice -from typing import TYPE_CHECKING - -from rich.console import Group -from rich.panel import Panel -from rich.progress import Progress, TaskID - -if TYPE_CHECKING: - from collections.abc import Sequence - - -def adjust_title(s: str, length: int = 40, placeholder: str = "...") -> str: - """Collapse and truncate or pad the given string to fit in the given length.""" - return f"{s[: length - len(placeholder)]}{placeholder}" if len(s) >= length else s.ljust(length) - - -class DequeProgress(ABC): - _progress: Progress - type_str: str = "files" - color = "plum3" - progress_str = "[{color}]{description}" - overflow_str = "[{color}]... and {number:,} other {type_str}" - queue_str = "[{color}]... and {number:,} {type_str} in {title} queue" - - def __init__(self, title: str, visible_tasks_limit: int) -> None: - self.title = title - self.title_lower = title.lower() - self._overflow = Progress("[progress.description]{task.description}") - self._queue = Progress("[progress.description]{task.description}") - self._progress_group = Group(self._progress, self._overflow, self._queue) - - self._overflow_task_id = self._overflow.add_task( - self.overflow_str.format(color=self.color, number=0, type_str=self.type_str), - visible=False, - ) - self._queue_task_id = self._queue.add_task( - self.queue_str.format(color=self.color, number=0, type_str=self.type_str, title=self.title_lower), - visible=False, - ) - self._tasks: deque[TaskID] = deque() - self._tasks_visibility_limit = visible_tasks_limit - - @abstractmethod - def get_queue_length(self) -> int: ... - - @property - def visible_tasks(self) -> Sequence[TaskID]: - if len(self._tasks) > self._tasks_visibility_limit: - return [self._tasks[i] for i in range(self._tasks_visibility_limit)] - return self._tasks - - @property - def invisible_tasks(self) -> Sequence[TaskID]: - return list(islice(self._tasks, self._tasks_visibility_limit, None)) - - @property - def invisible_tasks_len(self) -> int: - """Faster to compute than `len(self.invisible_tasks)`""" - return max(0, len(self._tasks) - self._tasks_visibility_limit) - - def has_visible_capacity(self) -> bool: - return len(self._tasks) < self._tasks_visibility_limit - - def get_renderable(self) -> Panel: - """Returns the progress bar.""" - return Panel(self._progress_group, title=self.title, border_style="green", padding=(1, 1)) - - def add_task(self, description: str, total: float | None = None) -> TaskID: - """Adds a new task to the progress bar.""" - task_id = self._progress.add_task( - self.progress_str.format(color=self.color, description=description), - total=total, - visible=self.has_visible_capacity(), - ) - self._tasks.append(task_id) - self.redraw() - return task_id - - def remove_task(self, task_id: TaskID) -> None: - """Removes a task from the progress bar.""" - if task_id not in self._tasks: - msg = "Task ID not found" - raise ValueError(msg) - - self._tasks.remove(task_id) - self._progress.remove_task(task_id) - self.redraw() - - def redraw(self) -> None: - """Redraws the progress bar.""" - for task in self.visible_tasks: - self._progress.update(task, visible=True) - - invisible_tasks_len = self.invisible_tasks_len - - self._overflow.update( - self._overflow_task_id, - description=self.overflow_str.format( - color=self.color, - number=invisible_tasks_len, - type_str=self.type_str, - ), - visible=invisible_tasks_len > 0, - ) - - queue_length = self.get_queue_length() - - self._queue.update( - self._queue_task_id, - description=self.queue_str.format( - color=self.color, number=queue_length, type_str=self.type_str, title=self.title_lower - ), - visible=queue_length > 0, - ) diff --git a/cyberdrop_dl/ui/progress/downloads_progress.py b/cyberdrop_dl/ui/progress/downloads_progress.py deleted file mode 100644 index 25e83989a..000000000 --- a/cyberdrop_dl/ui/progress/downloads_progress.py +++ /dev/null @@ -1,101 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from rich.console import Group -from rich.panel import Panel -from rich.progress import BarColumn, Progress - -if TYPE_CHECKING: - from cyberdrop_dl.managers.manager import Manager - - -class DownloadsProgress: - """Class that keeps track of completed, skipped and failed files.""" - - def __init__(self, manager: Manager) -> None: - self.manager = manager - self.progress = Progress( - "[progress.description]{task.description}", - BarColumn(bar_width=None), - "[progress.percentage]{task.percentage:>6.2f}%", - "━", - "{task.completed:,}", - ) - self.progress_group = Group(self.progress) - - self.total_files = 0 - self.completed_files_task_id = self.progress.add_task("[green]Completed", total=0) - self.completed_files = 0 - self.previously_completed_files_task_id = self.progress.add_task("[yellow]Previously Downloaded", total=0) - self.previously_completed_files = 0 - self.skipped_files_task_id = self.progress.add_task("[yellow]Skipped By Configuration", total=0) - self.skipped_files = 0 - self.queued_files_task_id = self.progress.add_task("[cyan]Queued", total=0) - self.queued_files = 0 - self.failed_files_task_id = self.progress.add_task("[red]Failed", total=0) - self.failed_files = 0 - self.panel = Panel( - self.progress_group, - title=f"Config: {self.manager.config_manager.loaded_config}", - border_style="green", - padding=(1, 1), - subtitle=f"Total Files: [white]{self.total_files:,}", - ) - self.simple_progress = Progress( - "[progress.description]{task.description}", - BarColumn(bar_width=None), - "[progress.percentage]{task.percentage:>6.2f}%", - "━", - "{task.completed:,}", - ) - self.simple_progress_task_id = self.simple_progress.add_task("Completed", total=0) - - @property - def simple_completed(self): - return self.total_files - self.queued_files - - def get_progress(self) -> Panel: - """Returns the progress bar.""" - return self.panel - - def update_total(self, increase_total: bool = True) -> None: - """Updates the total number of files to be downloaded.""" - if increase_total: - self.total_files = self.total_files + 1 - self.progress.update(self.completed_files_task_id, total=self.total_files) - self.progress.update(self.previously_completed_files_task_id, total=self.total_files) - self.progress.update(self.skipped_files_task_id, total=self.total_files) - self.progress.update(self.failed_files_task_id, total=self.total_files) - self.progress.update(self.queued_files_task_id, total=self.total_files) - self.simple_progress.update( - self.simple_progress_task_id, total=self.total_files, completed=self.simple_completed - ) - self.panel.subtitle = f"Total Files: [white]{self.total_files:,}" - - def add_completed(self) -> None: - """Adds a completed file to the progress bar.""" - self.progress.advance(self.completed_files_task_id, 1) - self.completed_files += 1 - - def add_previously_completed(self, increase_total: bool = True) -> None: - """Adds a previously completed file to the progress bar.""" - if increase_total: - self.update_total() - self.previously_completed_files += 1 - self.progress.advance(self.previously_completed_files_task_id, 1) - - def add_skipped(self) -> None: - """Adds a skipped file to the progress bar.""" - self.progress.advance(self.skipped_files_task_id, 1) - self.skipped_files += 1 - - def add_failed(self) -> None: - """Adds a failed file to the progress bar.""" - self.progress.advance(self.failed_files_task_id, 1) - self.failed_files += 1 - - def update_queued(self, number: int) -> None: - """Adds a queed file to the progress bar.""" - self.queued_files = number - self.progress.update(self.queued_files_task_id, completed=self.queued_files) diff --git a/cyberdrop_dl/ui/progress/file_progress.py b/cyberdrop_dl/ui/progress/file_progress.py deleted file mode 100644 index d2b17ed3e..000000000 --- a/cyberdrop_dl/ui/progress/file_progress.py +++ /dev/null @@ -1,78 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from rich.markup import escape -from rich.progress import ( - BarColumn, - DownloadColumn, - Progress, - SpinnerColumn, - TaskID, - TimeRemainingColumn, - TransferSpeedColumn, -) - -from cyberdrop_dl.ui.progress.deque_progress import DequeProgress, adjust_title - -if TYPE_CHECKING: - from cyberdrop_dl.managers.manager import Manager - - -class FileProgress(DequeProgress): - """Class that manages the download progress of individual files.""" - - def __init__(self, manager: Manager) -> None: - self.manager = manager - progress_colums = (SpinnerColumn(), "[progress.description]{task.description}", BarColumn(bar_width=None)) - visible_tasks_limit: int = 10 - horizontal_columns = ( - *progress_colums, - "[progress.percentage]{task.percentage:>6.2f}%", - "━", - DownloadColumn(), - "━", - TransferSpeedColumn(), - "━", - TimeRemainingColumn(), - ) - vertical_columns = (*progress_colums, DownloadColumn(), "━", TransferSpeedColumn()) - use_columns = horizontal_columns - if manager.parsed_args.cli_only_args.portrait: - use_columns = vertical_columns - self._progress = Progress(*use_columns) - super().__init__("Downloads", visible_tasks_limit) - - def get_queue_length(self) -> int: - """Returns the number of tasks in the downloader queue.""" - total = 0 - unique_crawler_ids = set() - for crawler in self.manager.scrape_mapper.existing_crawlers.values(): - crawler_id = id(crawler) # Only count each instance of the crawler once - if crawler_id in unique_crawler_ids: - continue - unique_crawler_ids.add(crawler_id) - total += getattr(crawler.downloader, "waiting_items", 0) - - return total - - def add_task(self, *, domain: str, filename: str, expected_size: int | None = None) -> TaskID: # type: ignore[reportIncompatibleMethodOverride] - """Adds a new task to the progress bar.""" - filename = filename.split("/")[-1].encode("ascii", "ignore").decode().strip() - description = escape(adjust_title(filename, length=40)) - if not self.manager.progress_manager.portrait: - description = f"({domain.upper()}) {description}" - return super().add_task(description, expected_size) - - def advance_file(self, task_id: TaskID, amount: int) -> None: - """Advances the progress of the given task by the given amount.""" - self.manager.storage_manager.total_data_written += amount - self._progress.advance(task_id, amount) - - def get_speed(self, task_id: TaskID) -> float: - if task_id not in self._tasks: - msg = "Task ID not found" - raise ValueError(msg) - - task = self._progress._tasks[task_id] - return task.finished_speed or task.speed or 0 diff --git a/cyberdrop_dl/ui/progress/hash_progress.py b/cyberdrop_dl/ui/progress/hash_progress.py deleted file mode 100644 index 252dfe74f..000000000 --- a/cyberdrop_dl/ui/progress/hash_progress.py +++ /dev/null @@ -1,125 +0,0 @@ -from __future__ import annotations - -import contextlib -from pathlib import Path -from typing import TYPE_CHECKING - -from pydantic import ByteSize -from rich.console import Group -from rich.markup import escape -from rich.panel import Panel -from rich.progress import BarColumn, Progress, TaskID - -if TYPE_CHECKING: - from cyberdrop_dl.managers.manager import Manager - - -def _generic_progress() -> Progress: - return Progress("[progress.description]{task.description}", BarColumn(bar_width=None), "{task.completed:,}") - - -class HashProgress: - """Class that keeps track of hashed files.""" - - def __init__(self, manager: Manager) -> None: - self.manager = manager - self._hash_progress = _generic_progress() - self._remove_progress = _generic_progress() - self._match_progress = _generic_progress() - self._file_info = Progress("{task.description}") - self._base_dir: Path | None = None - - # hashing - self._computed_hashes = self._prev_hashed = 0 - self.hash_progress_group = Group(self._file_info, self._hash_progress) - - self._tasks: dict[str, TaskID] = {} - - def add_hashed_task(hash_type: str) -> None: - desc = "[green]Hashed " + escape(f"[{hash_type}]") - self._tasks[hash_type] = self._hash_progress.add_task(desc, total=None) - - add_hashed_task("xxh128") - if manager.config.dupe_cleanup_options.add_md5_hash: - add_hashed_task("md5") - if manager.config.dupe_cleanup_options.add_sha256_hash: - add_hashed_task("sha256") - - self.prev_hashed_files_task_id = self._hash_progress.add_task("[green]Previously Hashed", total=None) - - self._base_dir_task_id = self._file_info.add_task("") - self._file_task_id = self._file_info.add_task("") - - # remove - self.removed_files = 0 - self.removed_progress_group = Group(self._match_progress, self._remove_progress) - self.removed_files_task_id = self._remove_progress.add_task( - "[green]Removed From Downloaded Files", - total=None, - ) - - @property - def hashed_files(self) -> int: - return int(self._computed_hashes / len(self._tasks)) - - @property - def prev_hashed_files(self) -> int: - return int(self._prev_hashed / len(self._tasks)) - - def get_renderable(self) -> Panel: - """Returns the progress bar.""" - return Panel( - self.hash_progress_group, - title=f"Config: {self.manager.config_manager.loaded_config}", - border_style="green", - padding=(1, 1), - ) - - def get_removed_progress(self) -> Panel: - """Returns the progress bar.""" - return Panel(self.removed_progress_group, border_style="green", padding=(1, 1)) - - @contextlib.contextmanager - def currently_hashing_dir(self, path: Path): - self._base_dir = path - desc = "[green]Base dir: [blue]" + escape(f"{self._base_dir}") - self._file_info.update(self._base_dir_task_id, description=desc) - try: - yield - finally: - self._base_dir = None - self._file_info.update(self._base_dir_task_id, description="") - - def update_currently_hashing(self, file: Path) -> None: - if not self._base_dir: - return - file_size = ByteSize(Path(file).stat().st_size) - size_text = file_size.human_readable(decimal=True) - path = file.relative_to(self._base_dir) - desc = "[green]Current file: [blue]" + escape(f"{path}") + f" [green]({size_text})" - self._file_info.update(self._file_task_id, description=desc) - - def add_new_completed_hash(self, hash_type: str) -> None: - """Adds a completed file to the progress bar.""" - self._hash_progress.advance(self._tasks[hash_type], 1) - self._computed_hashes += 1 - - def add_prev_hash(self) -> None: - """Adds a completed file to the progress bar.""" - self._hash_progress.advance(self.prev_hashed_files_task_id, 1) - self._prev_hashed += 1 - - def add_removed_file(self) -> None: - """Adds a removed file to the progress bar.""" - self._remove_progress.advance(self.removed_files_task_id, 1) - self.removed_files += 1 - - def reset(self): - """Resets the progress bar.""" - for task in self._tasks.values(): - self._hash_progress.reset(task) - self._hash_progress.reset(self.prev_hashed_files_task_id) - self._computed_hashes = self._prev_hashed = 0 - - self._remove_progress.reset(self.removed_files_task_id) - self.removed_files = 0 diff --git a/cyberdrop_dl/ui/progress/scraping_progress.py b/cyberdrop_dl/ui/progress/scraping_progress.py deleted file mode 100644 index 9139c65e6..000000000 --- a/cyberdrop_dl/ui/progress/scraping_progress.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from rich.progress import Progress, SpinnerColumn, TaskID - -from cyberdrop_dl.ui.progress.deque_progress import DequeProgress - -if TYPE_CHECKING: - from yarl import URL - - from cyberdrop_dl.managers.manager import Manager - - -class ScrapingProgress(DequeProgress): - """Class that manages the download progress of individual files.""" - - type_str = "URLs" - - def __init__(self, manager: Manager) -> None: - self.manager = manager - self._progress = Progress(SpinnerColumn(), "[progress.description]{task.description}") - visible_tasks_limit: int = 5 - super().__init__("Scraping", visible_tasks_limit) - - def get_queue_length(self) -> int: - """Returns the number of tasks in the scraper queue.""" - total = 0 - unique_crawler_ids = set() - for crawler in self.manager.scrape_mapper.existing_crawlers.values(): - crawler_id = id(crawler) # Only count each instance of the crawler once - if crawler_id in unique_crawler_ids: - continue - unique_crawler_ids.add(crawler_id) - total += crawler.waiting_items - - return total - - def redraw(self, passed: bool = False) -> None: - super().redraw() - if not passed: - self.manager.progress_manager.file_progress.redraw() - - def add_task(self, url: URL) -> TaskID: # type: ignore[reportIncompatibleMethodOverride] - """Adds a new task to the progress bar.""" - return super().add_task(str(url)) diff --git a/cyberdrop_dl/ui/progress/sort_progress.py b/cyberdrop_dl/ui/progress/sort_progress.py deleted file mode 100644 index e57684285..000000000 --- a/cyberdrop_dl/ui/progress/sort_progress.py +++ /dev/null @@ -1,74 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from rich.markup import escape -from rich.panel import Panel -from rich.progress import BarColumn, Progress, SpinnerColumn, TaskID - -from cyberdrop_dl.ui.progress.deque_progress import DequeProgress, adjust_title - -if TYPE_CHECKING: - from cyberdrop_dl.managers.manager import Manager - - -class SortProgress(DequeProgress): - """Class that keeps track of sorted files.""" - - type_str = "Folders" - - def __init__(self, visible_tasks_limit: int, manager: Manager) -> None: - """Sorter to track the progress of folders being sorted. - - Should work similar to the file_progress but for folders, with a percentage and progress bar for the files within the folders""" - self.manager = manager - self._progress = Progress( - SpinnerColumn(), - "[progress.description]{task.description}", - BarColumn(bar_width=None), - "[progress.percentage]{task.percentage:>6.2f}%", - "━", - "{task.completed}/{task.total} files", - ) - super().__init__("Sort", visible_tasks_limit) - - # counts - self.queue_length = self.audio_count = self.video_count = self.image_count = self.other_count = 0 - - def get_queue_length(self) -> int: - return self.queue_length - - def get_renderable(self) -> Panel: - """Returns the progress bar.""" - return Panel( - self._progress_group, - title=f"Sorting Downloads ━ Config: {self.manager.config_manager.loaded_config}", - border_style="green", - padding=(1, 1), - ) - - def set_queue_length(self, length: int) -> None: - self.queue_length = length - - def add_task(self, folder: str, expected_size: int | None) -> TaskID: - """Adds a new task to the progress bar.""" - # description = f'Sorting {folder}' - description = folder.encode("ascii", "ignore").decode().strip() - description = escape(adjust_title(description)) - return super().add_task(description, expected_size) - - def advance_folder(self, task_id: TaskID, amount: int = 1) -> None: - """Advances the progress of the given task by the given amount.""" - self._progress.advance(task_id, amount) - - def increment_audio(self) -> None: - self.audio_count += 1 - - def increment_video(self) -> None: - self.video_count += 1 - - def increment_image(self) -> None: - self.image_count += 1 - - def increment_other(self) -> None: - self.other_count += 1 diff --git a/cyberdrop_dl/ui/progress/statistic_progress.py b/cyberdrop_dl/ui/progress/statistic_progress.py deleted file mode 100644 index f31965aef..000000000 --- a/cyberdrop_dl/ui/progress/statistic_progress.py +++ /dev/null @@ -1,186 +0,0 @@ -from __future__ import annotations - -import contextlib -import functools -from typing import NamedTuple - -from rich.console import Group -from rich.panel import Panel -from rich.progress import BarColumn, Progress, TaskID - -FAILURE_OVERRIDES = { - "ClientConnectorCertificateError": "Client Connector Certificate Error", - "ClientConnectorDNSError": "Client Connector DNS Error", - "ClientConnectorError": "Client Connector Error", - "ClientConnectorSSLError": "Client Connector SSL Error", - "ClientHttpProxyError": "Client HTTP Proxy Error", - "ClientPayloadError": "Client Payload Error", - "ClientProxyConnectionError": "Client Proxy Connection Error", - "ConnectionTimeoutError": "Connection Timeout", - "ContentTypeError": "Content Type Error", - "InvalidURL": "Invalid URL", - "InvalidUrlClientError": "Invalid URL Client Error", - "InvalidUrlRedirectClientError": "Invalid URL Redirect", - "NonHttpUrlRedirectClientError": "Non HTTP URL Redirect", - "RedirectClientError": "Redirect Error", - "ServerConnectionError": "Server Connection Error", - "ServerDisconnectedError": "Server Disconnected", - "ServerFingerprintMismatch": "Server Fingerprint Mismatch", - "ServerTimeoutError": "Server Timeout Error", - "SocketTimeoutError": "Socket Timeout Error", -} - - -class TaskInfo(NamedTuple): - id: TaskID - description: str - completed: float - total: float | None - progress: float - - -class UiFailureTotal(NamedTuple): - full_msg: str - total: int - error_code: int | None - msg: str - - @classmethod - def from_pair(cls, full_msg: str, total: int) -> UiFailureTotal: - parts = full_msg.split(" ", 1) - if len(parts) > 1 and parts[0].isdigit(): - error_code, msg = parts - return cls(full_msg, total, int(error_code), msg) - return cls(full_msg, total, None, full_msg) - - -def get_tasks_info_sorted(progress: Progress) -> tuple[list[TaskInfo], bool]: - tasks = [ - TaskInfo( - id=task.id, - description=task.description, - completed=task.completed, - total=task.total, - progress=(task.completed / task.total if task.total else 0), - ) - for task in progress.tasks - ] - - tasks_sorted = sorted(tasks, key=lambda x: x.completed, reverse=True) - were_sorted = tasks == tasks_sorted - return tasks_sorted, were_sorted - - -class StatsProgress: - """Base class that keeps track of failures and reasons.""" - - title = "Download Failures" - - def __init__(self) -> None: - self.progress = Progress( - "[progress.description]{task.description}", - BarColumn(bar_width=None), - "[progress.percentage]{task.percentage:>6.2f}%", - "━", - "{task.completed:,}", - ) - self.progress_group = Group(self.progress) - self.failure_types: dict[str, TaskID] = {} - self.failed_files = 0 - self.panel = Panel( - self.progress_group, - title=self.title, - border_style="green", - padding=(1, 1), - subtitle=self.subtitle, - ) - - @property - def subtitle(self) -> str: - return f"Total {self.title}: [white]{self.failed_files:,}" - - def get_progress(self) -> Panel: - """Returns the progress bar.""" - return self.panel - - def update_total(self, total: int) -> None: - """Updates the total number download failures.""" - self.panel.subtitle = self.subtitle - for key in self.failure_types: - self.progress.update(self.failure_types[key], total=total) - - tasks_sorted, were_sorted = get_tasks_info_sorted(self.progress) - if not were_sorted: - self.sort_tasks(tasks_sorted) - - def sort_tasks(self, tasks_sorted: list[TaskInfo]) -> None: - for task_id in [task.id for task in tasks_sorted]: - self.progress.remove_task(task_id) - - for task in tasks_sorted: - self.failure_types[task.description] = self.progress.add_task( - task.description, - total=task.total, - completed=task.completed, # type: ignore - ) - - def add_failure(self, failure: str) -> None: - """Adds a failed file to the progress bar.""" - self.failed_files += 1 - key = get_pretty_failure(failure) - task_id = self.failure_types.get(key) - if task_id is not None: - self.progress.advance(task_id) - else: - self.failure_types[key] = self.progress.add_task(key, total=self.failed_files, completed=1) - self.update_total(self.failed_files) - - def return_totals(self) -> list[UiFailureTotal]: - """Returns the total number of failed sites and reasons.""" - failures = {} - for key, task_id in self.failure_types.items(): - task = next(task for task in self.progress.tasks if task.id == task_id) - failures[key] = task.completed - return sorted(UiFailureTotal.from_pair(*f) for f in failures.items()) - - -class DownloadStatsProgress(StatsProgress): - """Class that keeps track of download failures and reasons.""" - - -class ScrapeStatsProgress(StatsProgress): - """Class that keeps track of scraping failures and reasons.""" - - title = "Scrape Failures" - - def __init__(self) -> None: - super().__init__() - self.unsupported_urls = 0 - self.sent_to_jdownloader = 0 - self.unsupported_urls_skipped = 0 - - def add_unsupported(self, sent_to_jdownloader: bool = False) -> None: - """Adds an unsupported url to the progress bar.""" - self.unsupported_urls += 1 - if sent_to_jdownloader: - self.sent_to_jdownloader += 1 - else: - self.unsupported_urls_skipped += 1 - - -@functools.lru_cache -def get_pretty_failure(failure: str) -> str: - with contextlib.suppress(KeyError): - return FAILURE_OVERRIDES[failure] - return capitalize_words(failure) - - -def capitalize_words(text: str) -> str: - """Capitalize first letter of each word - - Unlike `str.capwords()`, this only caps the first letter of each word without modifying the rest of the word""" - return " ".join([capitalize_first_letter(word) for word in text.split()]) - - -def capitalize_first_letter(word: str) -> str: - return word[0].capitalize() + word[1:] diff --git a/cyberdrop_dl/ui/prompts/__init__.py b/cyberdrop_dl/ui/prompts/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/cyberdrop_dl/ui/prompts/basic_prompts.py b/cyberdrop_dl/ui/prompts/basic_prompts.py deleted file mode 100644 index 86462a157..000000000 --- a/cyberdrop_dl/ui/prompts/basic_prompts.py +++ /dev/null @@ -1,111 +0,0 @@ -# type: ignore[reportPrivateImportUsage] -import sys -from pathlib import Path - -from InquirerPy import inquirer -from InquirerPy.base.control import Choice -from InquirerPy.separator import Separator -from InquirerPy.validator import EmptyInputValidator, PathValidator - -from cyberdrop_dl.ui.prompts.defaults import DEFAULT_OPTIONS, DONE_CHOICE - - -def ask_text(message: str, validate_empty: bool = True, **kwargs): - options = DEFAULT_OPTIONS | kwargs - return inquirer.text( - message=message, - validate=EmptyInputValidator("Input should not be empty") if validate_empty else None, - **options, - ).execute() - - -def ask_choice(choices: list[Choice], *, message: str = "What would you like to do:", **kwargs): - options = DEFAULT_OPTIONS | kwargs - return inquirer.select(message=message, choices=choices, **options).execute() - - -def ask_multi_choice(choices: list[Choice], *, message: str = "What would you like to do:", **kwargs): - return ask_choice(choices, message=message, multiselect=True, **kwargs) - - -def ask_checkbox(choices: list[Choice], *, message: str = "Select multiple options:", **kwargs): - options = DEFAULT_OPTIONS | {"long_instruction": "ARROW KEYS: Navigate | SPACE: Select | ENTER: Confirm"} | kwargs - return inquirer.checkbox(message=message, choices=choices, **options).execute() - - -def ask_choice_fuzzy(choices: list[Choice], message: str, validate_empty: bool = True, **kwargs): - options = ( - DEFAULT_OPTIONS - | {"long_instruction": "ARROW KEYS: Navigate | TYPE: Filter | TAB: select, ENTER: Finish Selection"} - | kwargs - ) - custom_validate = options.pop("validate", None) - validate = ( - EmptyInputValidator("Input should not be empty") - if validate_empty and custom_validate is None - else custom_validate - ) - return inquirer.fuzzy( - message=message, - choices=choices, - validate=validate, - **options, - ).execute() - - -def ask_path(message: str = "Select path", *, validator_options: dict | None = None, **kwargs) -> Path: - options = DEFAULT_OPTIONS | {"default": str(Path.home())} | kwargs - return Path( - inquirer.filepath(message=message, validate=PathValidator(**(validator_options or {})), **options).execute() - ) - - -def ask_file_path(message: str = "Select file path", **kwargs) -> Path: - options = DEFAULT_OPTIONS | kwargs - validator_options = {"is_file": True, "message": "Input is not a file"} - return ask_path(message, validator_options=validator_options, **options) - - -def ask_dir_path(message: str = "Select dir path", **kwargs) -> Path: - options = DEFAULT_OPTIONS | kwargs - validator_options = {"is_dir": True, "message": "Input is not a directory"} - return ask_path(message, validator_options=validator_options, **options) - - -def ask_toggle(message: str = "enable", **kwargs): - options = DEFAULT_OPTIONS | {"long_instruction": "Y: Yes | N: No"} | kwargs - return inquirer.confirm(message=message, **options).execute() - - -def enter_to_continue(message: str = "Press to continue", **kwargs): - if "pytest" in sys.modules: - return - options = DEFAULT_OPTIONS | {"long_instruction": "ENTER: continue"} | kwargs - msg = f"\n{message}" - return inquirer.confirm(message=msg, qmark="", **options).execute() - - -def create_choices( - options_groups: list[list[str]] | dict[str, list[list[str]]], - append_last: Choice = DONE_CHOICE, - *, - disabled_choices: list[str] | None = None, -): - if isinstance(options_groups, dict): - options_groups = list(options_groups.values()) - disabled_choices = disabled_choices or [] - options = [option for group in options_groups for option in group] - choices = [] - for index, option in enumerate(options, 1): - enabled = option not in disabled_choices - choices.append(Choice(index, option, enabled)) - choices.append(append_last) - - separator_indexes = [] - for group in options_groups: - separator_indexes.append(len(group) + (separator_indexes[-1] if separator_indexes else 0)) - - for count, index in enumerate(separator_indexes): - choices.insert(index + count, Separator()) - - return choices diff --git a/cyberdrop_dl/ui/prompts/defaults.py b/cyberdrop_dl/ui/prompts/defaults.py deleted file mode 100644 index bc5e4b993..000000000 --- a/cyberdrop_dl/ui/prompts/defaults.py +++ /dev/null @@ -1,7 +0,0 @@ -from InquirerPy.base.control import Choice - -EXIT_CHOICE = Choice("Exit") -DONE_CHOICE = Choice("Done") -ALL_CHOICE = Choice("All of the above") - -DEFAULT_OPTIONS = {"long_instruction": "ARROW KEYS: Navigate | ENTER: Select", "vi_mode": False} diff --git a/cyberdrop_dl/ui/prompts/user_prompts.py b/cyberdrop_dl/ui/prompts/user_prompts.py deleted file mode 100644 index 1d76d1593..000000000 --- a/cyberdrop_dl/ui/prompts/user_prompts.py +++ /dev/null @@ -1,309 +0,0 @@ -# type: ignore[reportPrivateImportUsage] -from __future__ import annotations - -import asyncio -from enum import IntEnum -from platform import system -from typing import TYPE_CHECKING - -from InquirerPy import get_style -from InquirerPy.base.control import Choice -from InquirerPy.enum import ( - INQUIRERPY_EMPTY_CIRCLE_SEQUENCE, - INQUIRERPY_FILL_CIRCLE_SEQUENCE, -) -from rich.console import Console - -from cyberdrop_dl import __version__ -from cyberdrop_dl.constants import BROWSERS, RESERVED_CONFIG_NAMES -from cyberdrop_dl.ui.prompts import basic_prompts -from cyberdrop_dl.ui.prompts.defaults import ALL_CHOICE, DONE_CHOICE, EXIT_CHOICE -from cyberdrop_dl.utils.cookie_management import get_cookies_from_browsers -from cyberdrop_dl.utils.utilities import clear_term - -if TYPE_CHECKING: - from pathlib import Path - - from yarl import URL - - from cyberdrop_dl.managers.manager import Manager - -console = Console() - - -def main_prompt(manager: Manager) -> int: - """Main prompt for the program.""" - prompt_header(manager) - OPTIONS = { - "group_1": ["Download", "Retry failed downloads", "Create file hashes", "Sort files in download folder"], - "group_2": ["Edit URLs.txt", "Change config", "Edit configs"], - "group_3": ["Check for updates", "View changelog"], - } - - choices = basic_prompts.create_choices(OPTIONS, append_last=EXIT_CHOICE) - - return basic_prompts.ask_choice(choices) - - -""" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MANAGE CONFIG PROMPTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - -def manage_configs(manager: Manager) -> int: - """Manage Configs Prompt.""" - prompt_header(manager) - OPTIONS = { - "group_1": [ - "Change default config", - "Create a new config", - "Delete a config", - ], - "group_2": [ - "Edit current config", - "Edit authentication config", - "Edit global config", - ], - "group_3": ["Edit auto cookie extraction settings", "Import cookies now", "Clear cookies"], - "group_4": [ - "Clear cache", - ], - } - choices = basic_prompts.create_choices(OPTIONS) - return basic_prompts.ask_choice(choices) - - -def create_new_config(manager: Manager, *, title: str = "Create a new config file") -> str | None: - """Asks the user for a new config name. Returns `None` if the config name is invalid.""" - clear_term() - console.print(title) - answer: str = basic_prompts.ask_text("Enter the name of the config:") - return _check_valid_new_config_name(answer, manager) - - -def select_config(configs: list) -> str: - """Asks the user to select an existing config name.""" - return basic_prompts.ask_choice_fuzzy( - choices=configs, - message="Select a config file:", - validate_empty=True, - long_instruction="ARROW KEYS: Navigate | TYPE: Filter | TAB: select, ENTER: Finish Selection", - invalid_message="Need to select a config.", - ) - - -def switch_default_config_to(manager: Manager, config_name: str) -> str: - """Asks the user if they want to switch the default config to the provided config""" - if manager.config_manager.get_default_config() == config_name: - return - return basic_prompts.ask_toggle( - message=f"Do you want to switch the default config to {config_name}?", - ) - - -def switch_default_config() -> str: - """Asks the user if they want to switch the default config""" - return basic_prompts.ask_toggle( - message="Do you want to switch the default config?", - ) - - -def activate_config(manager: Manager, config) -> str: - """Asks the user if they want to activate the provided config""" - if manager.config_manager.get_loaded_config() == config: - return - return basic_prompts.ask_toggle(message=f"Do also want to activate the {config} config?") - - -def _check_valid_new_config_name(answer: str, manager: Manager) -> str | None: - """Check if the provided config name if. Returns `None` if the config name is invalid.""" - msg = None - if answer.casefold() in RESERVED_CONFIG_NAMES: - msg = f"[bold red]ERROR:[/bold red] Config name '{answer}' is a reserved internal name" - - elif manager.path_manager.config_folder.joinpath(answer).is_dir(): - msg = f"[bold red]ERROR:[/bold red] Config with name '{answer}' already exists!" - if msg: - console.print(msg) - basic_prompts.enter_to_continue() - return None - - return answer - - -""" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ AUTHENTICATION PROMPTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - -def auto_cookie_extraction(manager: Manager): - answer = basic_prompts.ask_toggle("Enable auto cookies import:") - manager.config_manager.settings_data.browser_cookies.auto_import = answer - if answer: - extract_cookies(manager, dry_run=True) - manager.config_manager.write_updated_settings_config() - - -class DomainType(IntEnum): - WEBSITE = 0 - FORUM = 1 - - -def domains_prompt(*, domain_message: str = "Select site(s):") -> tuple[list[str], list[str]]: - """Asks the user to select website(s) for cookie actions and cache actions.""" - from cyberdrop_dl.supported_domains import SUPPORTED_FORUMS, SUPPORTED_SITES_DOMAINS, SUPPORTED_WEBSITES - - OPTIONS = [["Forum", "File Host"], ["All Supported Websites"]] - choices = basic_prompts.create_choices(OPTIONS) - domain_type = basic_prompts.ask_choice(choices, message="Select category:") - - if domain_type == DONE_CHOICE.value: - return [], [] - - if domain_type == 3: - return SUPPORTED_SITES_DOMAINS, SUPPORTED_SITES_DOMAINS - - all_domains = list(SUPPORTED_FORUMS.values() if domain_type == DomainType.FORUM else SUPPORTED_WEBSITES.values()) - domain_choices = [Choice(site) for site in all_domains] + [ALL_CHOICE] - - domains = basic_prompts.ask_choice_fuzzy( - choices=domain_choices, - message=domain_message, - validate_empty=True, - multiselect=True, - marker_pl=f" {INQUIRERPY_EMPTY_CIRCLE_SEQUENCE} ", - marker=f" {INQUIRERPY_FILL_CIRCLE_SEQUENCE} ", - style=get_style( - { - "marker": "#98c379", - "questionmark": "#e5c07b", - "pointer": "#61afef", - "long_instruction": "#abb2bf", - "fuzzy_prompt": "#c678dd", - "fuzzy_info": "#abb2bf", - "fuzzy_border": "#4b5263", - "fuzzy_match": "#c678dd", - } - ), - ) - if ALL_CHOICE.value in domains: - domains = all_domains - return domains, all_domains - - -def extract_cookies(manager: Manager, *, dry_run: bool = False) -> None: - """Asks the user to select browser(s) and domains(s) to import cookies from.""" - from cyberdrop_dl.supported_domains import SUPPORTED_FORUMS, SUPPORTED_SITES_DOMAINS, SUPPORTED_WEBSITES - - supported_forums, supported_websites = list(SUPPORTED_FORUMS.values()), list(SUPPORTED_WEBSITES.values()) - domains, all_domains = domains_prompt(domain_message="Select site(s) to import cookies from:") - if domains == []: - return - browser = BROWSERS(browser_prompt()) - - if dry_run: - manager.config_manager.settings_data.browser_cookies.browser = browser - current_sites = set(manager.config_manager.settings_data.browser_cookies.sites) - new_sites = current_sites - set(all_domains) - if domains == supported_forums: - new_sites -= {"all"} - new_sites.add("all_forums") - elif domains == supported_websites: - new_sites -= {"all"} - new_sites.add("all_file_hosts") - elif domains == SUPPORTED_SITES_DOMAINS: - new_sites -= {"all_forums", "all_file_hosts"} - new_sites.add("all") - else: - new_sites -= {"all", "all_forums", "all_file_hosts"} - new_sites.update(domains) - if "all_forums" in new_sites and "all_file_hosts" in new_sites: - new_sites -= {"all_forums", "all_file_hosts"} - new_sites.add("all") - manager.config_manager.settings_data.browser_cookies.sites = sorted(new_sites) - return - - get_cookies_from_browsers(manager, browser=browser, domains=domains) - console.print("Import finished", style="green") - basic_prompts.enter_to_continue() - - -def browser_prompt() -> str: - """Asks the user to select browser(s) for cookie extraction.""" - unsupported_browsers = { - "Windows": { - "arc", - "brave", - "chrome", - "chromium", - "edge", - "lynx", - "opera", - "opera_gx", - "safari", - "vivaldi", - "w3m", - }, - "Linux": {"arc", "opera_gx", "safari"}, - "Darwin": {"lynx", "w3m"}, - }.get(system(), set()) - choices = [ - Choice(browser, browser.capitalize() if browser != "opera_gx" else "Opera GX") - for browser in BROWSERS - if browser not in unsupported_browsers - ] - return basic_prompts.ask_choice(choices, message="Select the browser(s) for extraction:") - - -""" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CACHE PROMPTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - -async def _get_urls(manager: Manager) -> set[URL]: - urls = set() - async for url in manager.cache_manager.request_cache.get_urls(): - urls.add(url) - return urls - - -def filter_cache_urls(manager: Manager, domains: list) -> set[URL]: - urls_to_remove = set() - cached_urls = asyncio.run(_get_urls(manager)) - cached_urls_copy = cached_urls.copy() - for domain in domains: - cached_urls = cached_urls_copy.copy() - cached_urls_copy = cached_urls.copy() - for url in cached_urls: - if url.host == domain: - urls_to_remove.add(url) - cached_urls_copy.remove(url) - return urls_to_remove - - -""" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ V4 IMPORT PROMPTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - -def import_cyberdrop_v4_items_prompt(manager: Manager) -> int: - """Import Cyberdrop_V4 Items.""" - prompt_header(manager) - OPTIONS = [["Import config", "Import download_history.sql"]] - choices = basic_prompts.create_choices(OPTIONS) - console.print("V4 Import Menu") - return basic_prompts.ask_choice(choices) - - -def import_v4_config_prompt(manager: Manager) -> tuple[str, Path] | None: - """Asks the user for the name and path of the config to import. Returns `None` if the config name is invalid.""" - new_config = create_new_config(manager, title="What should this config be called:") - if not new_config: - return None - return new_config, basic_prompts.ask_file_path("Select the config file to import:") - - -def import_v4_download_history_prompt() -> Path: - return basic_prompts.ask_file_path("Select the download_history.sql file to import:") - - -""" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ OTHERS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - -def prompt_header(manager: Manager, title: str | None = None) -> None: - clear_term() - title = title or f"[bold]Cyberdrop Downloader ([blue]V{__version__!s}[/blue])[/bold]" - console.print(title) - console.print(f"[bold]Current config:[/bold] [blue]{manager.config_manager.loaded_config}[/blue]") diff --git a/cyberdrop_dl/utils/apprise.py b/cyberdrop_dl/utils/apprise.py index 654d3c0ee..78283fb8f 100644 --- a/cyberdrop_dl/utils/apprise.py +++ b/cyberdrop_dl/utils/apprise.py @@ -15,14 +15,14 @@ from pydantic import ValidationError from rich.text import Text -from cyberdrop_dl import constants +from cyberdrop_dl import config, constants from cyberdrop_dl.dependencies import apprise from cyberdrop_dl.models import AppriseURLModel from cyberdrop_dl.utils.logger import log, log_debug, log_spacer -from cyberdrop_dl.utils.yaml import handle_validation_error +from cyberdrop_dl.utils.yaml import format_validation_error if TYPE_CHECKING: - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager @dataclass @@ -89,7 +89,7 @@ def get_apprise_urls(*, file: Path | None = None, urls: list[str] | None = None) return _simplify_urls([AppriseURLModel.model_validate({"url": url}) for url in set(urls)]) except ValidationError as e: - handle_validation_error(e, title="Apprise", file=file) + format_validation_error(e, title="Apprise", file=file) sys.exit(1) @@ -193,7 +193,7 @@ async def send_apprise_notifications(manager: Manager) -> tuple[constants.Notifi for apprise_url in apprise_urls: apprise_obj.add(apprise_url.url, tag=list(apprise_url.tags)) - main_log = manager.path_manager.main_log + main_log = config.get().logs.main_log all_urls = [x.raw_url for x in apprise_urls] log_lines = [] diff --git a/cyberdrop_dl/utils/dates.py b/cyberdrop_dl/utils/dates.py index c77771a24..c5cf291ac 100644 --- a/cyberdrop_dl/utils/dates.py +++ b/cyberdrop_dl/utils/dates.py @@ -1,7 +1,11 @@ from __future__ import annotations +import asyncio import datetime import email.utils +import shutil +import subprocess +import sys import warnings from functools import lru_cache from typing import TYPE_CHECKING, Literal, NewType, ParamSpec, TypeAlias, TypeVar @@ -10,6 +14,94 @@ if TYPE_CHECKING: from collections.abc import Callable + from pathlib import Path + +try: + import tzlocal + +except (ImportError, LookupError): + tzlocal = None + +_TIMEZONE = tzlocal.get_localzone if tzlocal else None + + +if sys.platform == "win32": + # Try to import win32con for Windows constants, fallback to hardcoded values if unavailable + try: + import win32con # type: ignore[reportMissingModuleSource] # pyright: ignore[reportMissingModuleSource] + + except ImportError: + win32con = None + + FILE_WRITE_ATTRIBUTES = 256 + OPEN_EXISTING = win32con.OPEN_EXISTING if win32con else 3 + FILE_ATTRIBUTE_NORMAL = win32con.FILE_ATTRIBUTE_NORMAL if win32con else 128 + FILE_FLAG_BACKUP_SEMANTICS = win32con.FILE_FLAG_BACKUP_SEMANTICS if win32con else 33554432 + + # Windows epoch is January 1, 1601. Unix epoch is January 1, 1970 + WIN_EPOCH_OFFSET = 116444736e9 + + from ctypes import byref, windll, wintypes + + def _set_win_time(file: Path, datetime: float) -> None: + nano_ts: float = datetime * 1e7 # Windows uses nano seconds for dates + timestamp = int(nano_ts + WIN_EPOCH_OFFSET) + + # Windows dates are 64bits, split into 2 32bits unsigned ints (dwHighDateTime , dwLowDateTime) + # XOR to get the date as bytes, then shift to get the first 32 bits (dwHighDateTime) + ctime = wintypes.FILETIME(timestamp & 0xFFFFFFFF, timestamp >> 32) + access_mode = FILE_WRITE_ATTRIBUTES + sharing_mode = 0 # Exclusive access + security_mode = None # Use default security attributes + creation_disposition = OPEN_EXISTING + + # FILE_FLAG_BACKUP_SEMANTICS allows access to directories + flags = FILE_ATTRIBUTE_NORMAL | FILE_FLAG_BACKUP_SEMANTICS + template_file = None + + params = ( + access_mode, + sharing_mode, + security_mode, + creation_disposition, + flags, + template_file, + ) + + handle = windll.kernel32.CreateFileW(str(file), *params) + windll.kernel32.SetFileTime( + handle, + byref(ctime), # Creation time + None, # Access time + None, # Modification time + ) + windll.kernel32.CloseHandle(handle) + + async def set_creation_time(file: Path, timestamp: float) -> None: + return await asyncio.to_thread(_set_win_time, file, timestamp) + + +elif sys.platform == "darwin": + # SetFile is non standard in macOS. Only users that have xcode installed will have SetFile + MAC_OS_SET_FILE = shutil.which("SetFile") + + async def set_creation_time(file: Path, timestamp: float) -> None: + if MAC_OS_SET_FILE: + time_string = datetime.datetime.fromtimestamp(timestamp).strftime("%m/%d/%Y %H:%M:%S") + process = await asyncio.subprocess.create_subprocess_exec( + MAC_OS_SET_FILE, + "-d", + time_string, + file, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + _ = await process.wait() + +else: + + async def set_creation_time(file: Path, timestamp: float) -> None: ... + TimeStamp = NewType("TimeStamp", int) DateOrder: TypeAlias = Literal["DMY", "DYM", "MDY", "MYD", "YDM", "YMD"] @@ -22,13 +114,6 @@ _DEFAULT_PARSERS: list[ParserKind] = ["relative-time", "custom-formats", "absolute-time", "no-spaces-time"] _DEFAULT_DATE_ORDER = "MDY" -try: - from tzlocal import get_localzone - - _TIMEZONE = get_localzone() -except (ImportError, LookupError): - _TIMEZONE = None - def _coerce_to_list(value: _S | set[_S] | list[_S] | tuple[_S, ...] | None) -> list[_S]: if value is None: diff --git a/cyberdrop_dl/utils/json.py b/cyberdrop_dl/utils/json.py index a218160a4..20f4adc34 100644 --- a/cyberdrop_dl/utils/json.py +++ b/cyberdrop_dl/utils/json.py @@ -1,6 +1,5 @@ from __future__ import annotations -import asyncio import base64 import dataclasses import datetime @@ -151,14 +150,11 @@ def dumps(obj: object, /, *, sort_keys: bool = False, indent: int | None = None) return encoder.encode(obj) -async def dump_jsonl(data: Iterable[dict[str, Any]], /, file: Path, *, append: bool = True) -> None: - def dump(): - with file.open(mode="a" if append else "w", encoding="utf8") as f: - for item in data: - f.writelines(_DEFAULT_ENCODER.iterencode(item)) - f.write("\n") - - await asyncio.to_thread(dump) +def dump_jsonl(data: Iterable[dict[str, Any]], /, file: Path, *, append: bool = True) -> None: + with file.open(mode="a" if append else "w", encoding="utf8") as f: + for item in data: + f.writelines(_DEFAULT_ENCODER.iterencode(item)) + f.write("\n") loads = _verbose_decode_error_msg(json.loads) diff --git a/cyberdrop_dl/utils/logger.py b/cyberdrop_dl/utils/logger.py index 662f2f213..b24fa0664 100644 --- a/cyberdrop_dl/utils/logger.py +++ b/cyberdrop_dl/utils/logger.py @@ -12,7 +12,6 @@ from rich._log_render import LogRender from rich.console import Console, Group -from rich.containers import Lines, Renderables from rich.logging import RichHandler from rich.measure import Measurement from rich.padding import Padding @@ -36,8 +35,9 @@ from datetime import datetime from rich.console import ConsoleRenderable + from rich.containers import Lines - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager _P = ParamSpec("_P") _ExitCode = str | int | None @@ -57,7 +57,7 @@ def getMessage(self) -> str: # noqa: N802 msg = str(self._proccess_msg(self.msg)) if self.args: - args = map(self._proccess_msg, self.args) + args = tuple(map(self._proccess_msg, self.args)) try: return msg.format(*args) except Exception: @@ -75,12 +75,14 @@ def _proccess_msg(msg: object) -> object: logging.setLogRecordFactory(JsonLogRecord) +DEFAULT_CONSOLE_WIDTH = 240 + class LogHandler(RichHandler): """Rich Handler with default settings, automatic console creation and custom log render to remove padding in files.""" def __init__( - self, level: int = 10, file: IO[str] | None = None, width: int | None = None, debug: bool = False, **kwargs + self, level: int = 10, file: IO[str] | None = None, width: int | None = None, debug: bool = False ) -> None: is_file: bool = file is not None redacted: bool = is_file and not debug @@ -89,9 +91,17 @@ def __init__( console = _DEFAULT_CONSOLE else: console = console_cls(file=file, width=width) - options = constants.RICH_HANDLER_DEBUG_CONFIG if debug else constants.RICH_HANDLER_CONFIG - options = options | kwargs - super().__init__(level, console, show_time=is_file, **options) + + super().__init__( + level, + console, + show_time=is_file, + rich_tracebacks=True, + tracebacks_show_locals=True, + locals_max_string=DEFAULT_CONSOLE_WIDTH, + tracebacks_extra_lines=2, + locals_max_length=20, + ) if is_file: self._log_render = NoPaddingLogRender(show_level=True) @@ -136,7 +146,7 @@ class NoPaddingLogRender(LogRender): cdl_padding: int = 0 EXCLUDE_PATH_LOGGING_FROM: tuple[str, ...] = "logger.py", "base.py", "session.py", "cache_control.py" - def __call__( # type: ignore[reportIncompatibleMethodOverride] + def __call__( # type: ignore[reportIncompatibleMethodOverride] # pyright: ignore[reportIncompatibleMethodOverride] self, console: Console, renderables: Iterable[ConsoleRenderable], @@ -163,6 +173,7 @@ def __call__( # type: ignore[reportIncompatibleMethodOverride] output.append(log_time_display) output.pad_right(1) self._last_time = log_time_display + if self.show_level: output.append(level) output.pad_right(1) @@ -184,12 +195,13 @@ def __call__( # type: ignore[reportIncompatibleMethodOverride] padded_lines: list[ConsoleRenderable] = [] - for renderable in Renderables(renderables): # type: ignore + for renderable in renderables: if isinstance(renderable, Text): renderable = _indent_text(renderable, console, self.cdl_padding) renderable.stylize("log.message") - output.append(renderable) + _ = output.append(renderable) continue + padded_lines.append(Padding(renderable, (0, 0, 0, self.cdl_padding), expand=False)) return Group(output, *padded_lines) @@ -244,7 +256,7 @@ def log_with_color(message: Text | str, style: str, level: int = 20, show_in_sta def log_spacer(level: int, char: str = "-", *, log_to_console: bool = True, log_to_file: bool = True) -> None: - spacer = char * min(int(constants.DEFAULT_CONSOLE_WIDTH / 2), 50) + spacer = char * min(int(DEFAULT_CONSOLE_WIDTH / 2), 50) if log_to_file: log(spacer, level) if log_to_console and constants.CONSOLE_LEVEL >= 50: @@ -280,7 +292,7 @@ def _setup_startup_logger() -> Generator[None]: file_handler = LogHandler( level=10, file=file.open("w", encoding="utf8"), - width=constants.DEFAULT_CONSOLE_WIDTH, + width=DEFAULT_CONSOLE_WIDTH, ) startup_logger.addHandler(file_handler) except OSError: diff --git a/cyberdrop_dl/utils/sorting.py b/cyberdrop_dl/utils/sorting.py index f817f44f8..4f3b4ea4b 100644 --- a/cyberdrop_dl/utils/sorting.py +++ b/cyberdrop_dl/utils/sorting.py @@ -9,18 +9,17 @@ import imagesize -from cyberdrop_dl import constants -from cyberdrop_dl.constants import FILE_FORMATS +from cyberdrop_dl import config, constants from cyberdrop_dl.utils import strings from cyberdrop_dl.utils.ffmpeg import probe from cyberdrop_dl.utils.logger import log, log_with_color from cyberdrop_dl.utils.utilities import purge_dir_tree if TYPE_CHECKING: - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager -async def get_modified_date(file: Path) -> datetime: +async def _get_modified_date(file: Path) -> datetime: stat = await asyncio.to_thread(file.stat) return datetime.fromtimestamp(stat.st_mtime).replace(microsecond=0) @@ -28,12 +27,11 @@ async def get_modified_date(file: Path) -> datetime: class Sorter: def __init__(self, manager: Manager) -> None: self.manager = manager - self.download_folder = manager.path_manager.scan_folder or manager.path_manager.download_folder - self.sorted_folder = manager.path_manager.sorted_folder - self.incrementer_format: str = manager.config_manager.settings_data.sorting.sort_incrementer_format - self.db_manager = manager.db_manager + self.download_folder = config.get().sorting.scan_folder or config.get().files.download_folder + self.sorted_folder = config.get().sorting.sort_folder + self.incrementer_format: str = config.get().sorting.sort_incrementer_format - settings = manager.config_manager.settings_data.sorting + settings = config.get().sorting self.audio_format: str | None = settings.sorted_audio self.image_format: str | None = settings.sorted_image self.video_format: str | None = settings.sorted_video @@ -49,7 +47,7 @@ def _move_file(self, old_path: Path, new_path: Path) -> bool: new_path.parent.mkdir(parents=True, exist_ok=True) try: - old_path.rename(new_path) + _ = old_path.rename(new_path) except FileExistsError: if old_path.stat().st_size == new_path.stat().st_size: old_path.unlink() @@ -58,7 +56,7 @@ def _move_file(self, old_path: Path, new_path: Path) -> bool: new_filename = f"{new_path.stem}{self.incrementer_format.format(i=auto_index)}{new_path.suffix}" possible_new_path = new_path.parent / new_filename try: - old_path.rename(possible_new_path) + _ = old_path.rename(possible_new_path) break except FileExistsError: continue @@ -88,11 +86,8 @@ async def run(self) -> None: _ = purge_dir_tree(self.download_folder) async def _sort_files(self, files_to_sort: dict[str, list[Path]]) -> None: - queue_length = len(files_to_sort) - self.manager.progress_manager.sort_progress.set_queue_length(queue_length) - for folder_name, files in files_to_sort.items(): - task_id = self.manager.progress_manager.sort_progress.add_task(folder_name, len(files)) + task_id = self.manager.progress_manager.sorting.new_task(folder_name, len(files)) for file in files: ext = file.suffix.lower() @@ -100,20 +95,18 @@ async def _sort_files(self, files_to_sort: dict[str, list[Path]]) -> None: if ext in constants.TempExt: continue - if ext in FILE_FORMATS["Audio"]: + if ext in constants.FileFormats.AUDIO: await self.sort_audio(file, folder_name) - elif ext in FILE_FORMATS["Images"]: + elif ext in constants.FileFormats.IMAGE: await self.sort_image(file, folder_name) - elif ext in FILE_FORMATS["Videos"]: + elif ext in constants.FileFormats.VIDEO: await self.sort_video(file, folder_name) else: await self.sort_other(file, folder_name) - self.manager.progress_manager.sort_progress.advance_folder(task_id) + self.manager.progress_manager.sorting.advance_folder(task_id) - self.manager.progress_manager.sort_progress.remove_task(task_id) - queue_length -= 1 - self.manager.progress_manager.sort_progress.set_queue_length(queue_length) + self.manager.progress_manager.sorting.remove_task(task_id) async def sort_audio(self, file: Path, base_name: str) -> None: """Sorts an audio file into the sorted audio folder.""" @@ -140,7 +133,7 @@ async def sort_audio(self, file: Path, base_name: str) -> None: length=duration, sample_rate=sample_rate, ): - self.manager.progress_manager.sort_progress.increment_audio() + self.manager.progress_manager.sorting.increment_audio() async def sort_image(self, file: Path, base_name: str) -> None: """Sorts an image file into the sorted image folder.""" @@ -166,7 +159,7 @@ async def sort_image(self, file: Path, base_name: str) -> None: resolution=resolution, width=width, ): - self.manager.progress_manager.sort_progress.increment_image() + self.manager.progress_manager.sorting.increment_image() async def sort_video(self, file: Path, base_name: str) -> None: """Sorts a video file into the sorted video folder.""" @@ -199,7 +192,7 @@ async def sort_video(self, file: Path, base_name: str) -> None: resolution=resolution, width=width, ): - self.manager.progress_manager.sort_progress.increment_video() + self.manager.progress_manager.sorting.increment_video() async def sort_other(self, file: Path, base_name: str) -> None: """Sorts an other file into the sorted other folder.""" @@ -207,10 +200,10 @@ async def sort_other(self, file: Path, base_name: str) -> None: return if await self._process_file_move(file, base_name, self.other_format): - self.manager.progress_manager.sort_progress.increment_other() + self.manager.progress_manager.sorting.increment_other() async def _process_file_move(self, file: Path, base_name: str, format_str: str, **kwargs: Any) -> bool: - file_date = await get_modified_date(file) + file_date = await _get_modified_date(file) file_date_us = file_date.strftime("%Y-%d-%m") file_date_iso = file_date.strftime("%Y-%m-%d") @@ -232,7 +225,7 @@ async def _process_file_move(self, file: Path, base_name: str, format_str: str, ) new_file = Path(file_path) - return self._move_file(file, new_file) + return await asyncio.to_thread(self._move_file, file, new_file) def _subfolders(directory: Path) -> list[Path]: diff --git a/cyberdrop_dl/utils/utilities.py b/cyberdrop_dl/utils/utilities.py index 4e985db0f..0788697bc 100644 --- a/cyberdrop_dl/utils/utilities.py +++ b/cyberdrop_dl/utils/utilities.py @@ -35,7 +35,7 @@ from pydantic import ValidationError from yarl import URL -from cyberdrop_dl import constants +from cyberdrop_dl import config, constants from cyberdrop_dl.data_structures import AbsoluteHttpURL from cyberdrop_dl.exceptions import ( CDLBaseError, @@ -56,7 +56,7 @@ from cyberdrop_dl.crawlers import Crawler from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, MediaItem, ScrapeItem from cyberdrop_dl.downloader.downloader import Downloader - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager CrawerOrDownloader = TypeVar("CrawerOrDownloader", bound=Crawler | Downloader) Origin = TypeVar("Origin", bound=ScrapeItem | MediaItem | URL) @@ -138,8 +138,8 @@ def error_handling_context(self: Crawler | Downloader, item: ScrapeItem | MediaI return log(f"Scrape Failed: {link_to_show} ({error_log_msg.main_log_msg})", 40, exc_info=exc_info) - self.manager.log_manager.write_scrape_error_log(link_to_show, error_log_msg.csv_log_msg, origin) - self.manager.progress_manager.scrape_stats_progress.add_failure(error_log_msg.ui_failure) + self.manager.logs.write_scrape_error_log(link_to_show, error_log_msg.csv_log_msg, origin) + self.manager.progress_manager.scrape_errors.add_failure(error_log_msg.ui_failure) @overload @@ -201,7 +201,7 @@ def sanitize_folder(title: str) -> str: title = title.replace("\n", "").strip() title = title.replace("\t", "").strip() - title = re.sub(" +", " ", title) + title = re.sub(r" +", " ", title) title = sanitize_filename(title, "-") title = re.sub(r"\.{2,}", ".", title) title = title.rstrip(".").strip() @@ -256,15 +256,15 @@ def get_filename_and_ext(filename: str, forum: bool = False, mime_type: str | No def get_download_path(manager: Manager, scrape_item: ScrapeItem, domain: str) -> Path: """Returns the path to the download folder.""" - download_dir = manager.path_manager.download_folder + download_dir = config.get().files.download_folder return download_dir / scrape_item.create_download_path(domain) -def remove_file_id(manager: Manager, filename: str, ext: str) -> tuple[str, str]: +def remove_file_id(filename: str, ext: str) -> tuple[str, str]: """Removes the additional string some websites adds to the end of every filename.""" original_filename = filename - if not manager.config_manager.settings_data.download_options.remove_generated_id_from_filenames: + if not config.get().download_options.remove_generated_id_from_filenames: return original_filename, filename filename = filename.rsplit(ext, 1)[0] @@ -275,7 +275,7 @@ def remove_file_id(manager: Manager, filename: str, ext: str) -> tuple[str, str] if re.match(constants.RAR_MULTIPART_PATTERN, tail_no_dot) and ext == ".rar" and "-" in filename: filename, part = filename.rsplit("-", 1) filename = f"{filename}.{part}" - elif ext_no_dot.isdigit() and tail in constants.FILE_FORMATS["7z"] and "-" in filename: + elif ext_no_dot.isdigit() and tail in constants.FileFormats._7Z and "-" in filename: filename, _7z_ext = filename.rsplit("-", 1) filename = f"{filename}.{_7z_ext}" if not filename.endswith(ext): @@ -286,11 +286,11 @@ def remove_file_id(manager: Manager, filename: str, ext: str) -> tuple[str, str] """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" -def clear_term(): - os.system("cls" if os.name == "nt" else "clear") +def clear_term() -> None: + _ = os.system("cls" if os.name == "nt" else "clear") -def get_size(path: os.DirEntry) -> int | None: +def get_size(path: os.DirEntry[str]) -> int | None: try: return path.stat(follow_symlinks=False).st_size except (OSError, ValueError): @@ -332,7 +332,7 @@ def purge_dir_tree(dirname: Path | str) -> bool: def check_partials_and_empty_folders(manager: Manager) -> None: """Checks for partial downloads, deletes partial files and empty folders.""" - settings = manager.config_manager.settings_data.runtime_options + settings = config.get().runtime_options if settings.delete_partial_files: delete_partial_files(manager) if not settings.skip_check_for_partial_files: @@ -361,25 +361,25 @@ def _partial_files(dir: Path | str) -> Generator[Path]: def delete_partial_files(manager: Manager) -> None: """Deletes partial download files recursively.""" log_red("Deleting partial downloads...") - for file in _partial_files(manager.path_manager.download_folder): + for file in _partial_files(config.get().files.download_folder): file.unlink(missing_ok=True) def check_for_partial_files(manager: Manager) -> None: """Checks if there are partial downloads in any subdirectory and logs if found.""" log_yellow("Checking for partial downloads...") - has_partial_files = next(_partial_files(manager.path_manager.download_folder), None) + has_partial_files = next(_partial_files(config.get().files.download_folder), None) if has_partial_files: log_yellow("There are partial downloads in the downloads folder") -def delete_empty_folders(manager: Manager): +def delete_empty_folders(manager: Manager) -> None: """Deletes empty folders efficiently.""" log_yellow("Checking for empty folders...") - purge_dir_tree(manager.path_manager.download_folder) + purge_dir_tree(config.get().files.download_folder) - sorted_folder = manager.path_manager.sorted_folder - if sorted_folder and manager.config_manager.settings_data.sorting.sort_downloads: + sorted_folder = config.get().sorting.sort_folder + if sorted_folder and config.get().sorting.sort_downloads: purge_dir_tree(sorted_folder) diff --git a/cyberdrop_dl/utils/webhook.py b/cyberdrop_dl/utils/webhook.py index a80197348..2615d2cca 100644 --- a/cyberdrop_dl/utils/webhook.py +++ b/cyberdrop_dl/utils/webhook.py @@ -7,7 +7,7 @@ import rich from aiohttp import FormData -from cyberdrop_dl import constants +from cyberdrop_dl import config, constants from cyberdrop_dl.utils import aio from cyberdrop_dl.utils.logger import log, log_debug, log_spacer @@ -15,8 +15,8 @@ from pathlib import Path from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL - from cyberdrop_dl.managers.manager import Manager - from cyberdrop_dl.models.base_models import HttpAppriseURL + from cyberdrop_dl.managers import Manager + from cyberdrop_dl.models.base import HttpAppriseURL _DEFAULT_DIFF_LINE_FORMAT: str = "{}" @@ -64,14 +64,14 @@ async def _prepare_form(webhook: HttpAppriseURL, main_log: Path) -> FormData: async def send_webhook_message(manager: Manager) -> None: """Outputs the stats to a code block for webhook messages.""" - webhook = manager.config_manager.settings_data.logs.webhook + webhook = config.get().logs.webhook if not webhook: return rich.print("\nSending Webhook Notifications.. ") url = cast("AbsoluteHttpURL", webhook.url.get_secret_value()) - form = await _prepare_form(webhook, manager.path_manager.main_log) + form = await _prepare_form(webhook, config.get().logs.main_log) logger = log result = constants.NotificationResult.FAILED.value diff --git a/cyberdrop_dl/utils/yaml.py b/cyberdrop_dl/utils/yaml.py index eefc4b527..785362328 100644 --- a/cyberdrop_dl/utils/yaml.py +++ b/cyberdrop_dl/utils/yaml.py @@ -1,7 +1,5 @@ from __future__ import annotations -import logging -import sys from datetime import date, timedelta from enum import Enum from pathlib import Path, PurePath @@ -11,7 +9,6 @@ from pydantic import BaseModel, ValidationError from yarl import URL -from cyberdrop_dl.constants import CLI_VALIDATION_ERROR_FOOTER, VALIDATION_ERROR_FOOTER from cyberdrop_dl.exceptions import InvalidYamlError if TYPE_CHECKING: @@ -70,10 +67,9 @@ def load(file: Path, *, create: bool = False) -> dict[str, Any]: raise InvalidYamlError(file, e) from None -def handle_validation_error(e: ValidationError, *, title: str = "", file: Path | None = None): +def format_validation_error(e: ValidationError, *, title: str = "", file: Path | None = None): """Logs the validation error details and exits the program.""" - startup_logger = logging.getLogger("cyberdrop_dl_startup") error_count = e.error_count() msg = "" if file: @@ -82,15 +78,16 @@ def handle_validation_error(e: ValidationError, *, title: str = "", file: Path | show_title = title or e.title msg += f"Found {error_count} error{'s' if error_count > 1 else ''} [{show_title}]:" from_cli = title == "CLI arguments" - footer = CLI_VALIDATION_ERROR_FOOTER if from_cli else VALIDATION_ERROR_FOOTER + for error in e.errors(include_url=False): option_name = get_field_name(error, from_cli) msg += f"\n\nOption '{option_name}' with value '{error['input']}' is invalid:\n" msg += f" {error['msg']}" - msg += "\n\n" + footer - startup_logger.error(msg) - sys.exit(1) + if not from_cli: + msg += "\n\n" + """Please delete the file or fix the errors""" + + return msg def get_field_name(error: ErrorDetails, from_cli: bool = False) -> str: diff --git a/pyproject.toml b/pyproject.toml index fa2fbac93..9e6536d5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ dependencies = [ "beautifulsoup4 >=4.14.3", "certifi >=2026.1.4", "curl-cffi >=0.13,<0.14; implementation_name == 'cpython' ", + "cyclopts>=4.5.4", "dateparser >=1.2.2", "imagesize >=1.4.1", "inquirerpy >=0.3.4", diff --git a/tests/conftest.py b/tests/conftest.py index a4a8a40f4..7a73f6e1c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,10 +4,10 @@ from typing import TYPE_CHECKING import pytest - -from cyberdrop_dl.managers.manager import Manager from cyberdrop_dl.scraper import scrape_mapper +from cyberdrop_dl.managers import Manager + if TYPE_CHECKING: from collections.abc import AsyncGenerator from pathlib import Path @@ -69,9 +69,6 @@ def post_startup_manager(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Man downloads = str(tmp_path / "Downloads") monkeypatch.chdir(tmp_path) bare_manager = Manager(("--appdata-folder", appdata, "-d", downloads, "--download-tiktok-audios")) - bare_manager.startup() - bare_manager.path_manager.startup() - bare_manager.log_manager.startup() return bare_manager diff --git a/tests/crawlers/test_crawlers.py b/tests/crawlers/test_crawlers.py index 7fa246d26..244a0f939 100644 --- a/tests/crawlers/test_crawlers.py +++ b/tests/crawlers/test_crawlers.py @@ -14,12 +14,12 @@ from cyberdrop_dl.data_structures import AbsoluteHttpURL from cyberdrop_dl.data_structures.url_objects import MediaItem, ScrapeItem -from cyberdrop_dl.scraper.scrape_mapper import ScrapeMapper +from cyberdrop_dl.scrape_mapper import ScrapeMapper from cyberdrop_dl.utils.utilities import parse_url if TYPE_CHECKING: from cyberdrop_dl.crawlers.crawler import Crawler - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager def _crawler_mock(func: str = "handle_media_item") -> mock._patch[mock.AsyncMock]: diff --git a/tests/crawlers/test_xenforo.py b/tests/crawlers/test_xenforo.py index 1fd3f48b1..b882545f4 100644 --- a/tests/crawlers/test_xenforo.py +++ b/tests/crawlers/test_xenforo.py @@ -11,7 +11,7 @@ from cyberdrop_dl.crawlers.xenforo import xenforo from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, ScrapeItem from cyberdrop_dl.exceptions import ScrapeError -from cyberdrop_dl.managers.manager import Manager +from cyberdrop_dl.managers import Manager if TYPE_CHECKING: from collections.abc import AsyncGenerator @@ -74,9 +74,6 @@ async def post_startup_manager(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) downloads = str(tmp_path / "Downloads") monkeypatch.chdir(tmp_path) manager = Manager(("--appdata-folder", appdata, "-d", downloads)) - manager.startup() - manager.path_manager.startup() - manager.log_manager.startup() await manager.async_startup() yield manager await manager.async_db_close() diff --git a/tests/fake_classes/managers.py b/tests/fake_classes/managers.py index 90dbb1fdb..dba1fef1f 100644 --- a/tests/fake_classes/managers.py +++ b/tests/fake_classes/managers.py @@ -1,9 +1,9 @@ from typing import Literal -from cyberdrop_dl.managers.cache_manager import CacheManager +from cyberdrop_dl.managers.cache_manager import Cache -class FakeCacheManager(CacheManager): +class FakeCacheManager(Cache): def get(self, _: str) -> Literal[True]: return True diff --git a/tests/test_apprise.py b/tests/test_apprise.py index fe815b27f..53912ef16 100644 --- a/tests/test_apprise.py +++ b/tests/test_apprise.py @@ -4,13 +4,12 @@ from pathlib import Path import pytest +from cyberdrop_dl.managers.config_manager import ConfigManager from rich.text import Text from cyberdrop_dl import constants from cyberdrop_dl.constants import NotificationResult -from cyberdrop_dl.managers.config_manager import ConfigManager -from cyberdrop_dl.managers.manager import Manager -from cyberdrop_dl.managers.path_manager import PathManager +from cyberdrop_dl.managers import Manager from cyberdrop_dl.utils import apprise from tests.fake_classes.managers import FakeCacheManager @@ -81,11 +80,6 @@ def test_get_apprise_urls() -> None: async def send_notification(test_case: AppriseTestCase) -> None: - FAKE_MANAGER.config_manager.apprise_urls = [] - if test_case.urls and any(test_case.urls): - FAKE_MANAGER.config_manager.apprise_urls = apprise.get_apprise_urls(urls=test_case.urls) - FAKE_MANAGER.path_manager = PathManager(FAKE_MANAGER) - FAKE_MANAGER.path_manager.main_log = test_case.file or TEST_FILES_PATH / "valid_single_url.txt" constants.LOG_OUTPUT_TEXT = Text(test_case.name) result, logs = await apprise.send_apprise_notifications(FAKE_MANAGER) assert result.value == test_case.result.value, f"Result for this case should be {test_case.result.value}" diff --git a/tests/test_cli.py b/tests/test_cli.py index 6fc36026f..c8428ddd1 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -4,6 +4,7 @@ import pytest from pydantic import ValidationError +from cyberdrop_dl import appdata, config from cyberdrop_dl.cli import parse_args from cyberdrop_dl.main import _create_director, run @@ -34,11 +35,11 @@ def test_startup_logger_should_not_be_created_on_invalid_cookies(tmp_cwd: Path) from cyberdrop_dl.utils.logger import catch_exceptions director = _create_director("--download") - cookies_file = director.manager.path_manager.cookies_dir / "cookies.txt" + cookies_file = appdata.get().cookies_dir / "cookies.txt" cookies_file.write_text("Not a cookie file", encoding="utf8") catch_exceptions(director.run)() - logs = director.manager.path_manager.main_log.read_text(encoding="utf8") + logs = config.get().logs.main_log.read_text(encoding="utf8") assert "does not look like a Netscape format cookies file" in logs startup_file = tmp_cwd / "startup.log" diff --git a/tests/test_flaresolverr.py b/tests/test_flaresolverr.py index 612df2856..261222159 100644 --- a/tests/test_flaresolverr.py +++ b/tests/test_flaresolverr.py @@ -4,8 +4,8 @@ from cyberdrop_dl.clients.flaresolverr import FlareSolverr, _Command from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL -from cyberdrop_dl.managers.manager import Manager -from cyberdrop_dl.scraper.scrape_mapper import ScrapeMapper +from cyberdrop_dl.managers import Manager +from cyberdrop_dl.scrape_mapper import ScrapeMapper ENV_NAME = "CDL_FLARESOLVERR" FLARESOLVER_URL = os.environ.get(ENV_NAME, "") # or "http://localhost:8191" diff --git a/tests/test_hashing.py b/tests/test_hashing.py index 0ae9498de..0bab7bf4e 100644 --- a/tests/test_hashing.py +++ b/tests/test_hashing.py @@ -6,12 +6,13 @@ import pytest +from cyberdrop_dl import appdata, config from cyberdrop_dl.clients.hash_client import hash_directory_scanner if TYPE_CHECKING: from pathlib import Path - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager def get_hashes(path: Path) -> set[tuple[str, str]]: @@ -54,15 +55,15 @@ def test_hash_directory_scanner(manager: Manager, expected_results: set[tuple[st n_files = max(count.values()) algos = count.keys() assert len(expected_results) == len(algos) * n_files - manager.config.dupe_cleanup_options.add_md5_hash = "md5" in algos - manager.config.dupe_cleanup_options.add_sha256_hash = "sha256" in algos + config.get().dupe_cleanup_options.add_md5_hash = "md5" in algos + config.get().dupe_cleanup_options.add_sha256_hash = "sha256" in algos - manager.path_manager.download_folder.mkdir(parents=True) - db_path = manager.path_manager.history_db - hash_directory_scanner(manager, manager.path_manager.download_folder) + config.get().files.download_folder.mkdir(parents=True) + db_path = appdata.get().db_file + hash_directory_scanner(manager, config.get().files.download_folder) assert not get_hashes(db_path) - create_files(manager.path_manager.download_folder, n_files) - hash_directory_scanner(manager, manager.path_manager.download_folder) + create_files(config.get().files.download_folder, n_files) + hash_directory_scanner(manager, config.get().files.download_folder) results = get_hashes(db_path) assert len(results) == len(expected_results) assert results == expected_results diff --git a/tests/test_manager.py b/tests/test_manager.py index 42fdd84aa..21b8750d7 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -5,7 +5,9 @@ import pytest -from cyberdrop_dl.managers.manager import Manager, merge_dicts +from cyberdrop_dl import config +from cyberdrop_dl.managers import Manager, log_app_state +from cyberdrop_dl.models import merge_dicts if TYPE_CHECKING: from pydantic import BaseModel @@ -105,9 +107,10 @@ def test_value_should_not_overwrite_dict(self) -> None: def test_args_logging_should_censor_webhook( running_manager: Manager, logs: pytest.LogCaptureFixture, webhook: str, output: str ) -> None: - logs_model = running_manager.config_manager.settings_data.logs - running_manager.config_manager.settings_data.logs = update_model(logs_model, webhook=webhook) - running_manager.args_logging() + logs_model = config.get().logs + config.get().logs = update_model(logs_model, webhook=webhook) + log_app_state() + assert logs.messages assert "Starting Cyberdrop-DL Process" in logs.text assert webhook not in logs.text @@ -121,7 +124,7 @@ async def test_async_db_close(running_manager: Manager) -> None: await running_manager.async_startup() assert not isinstance(running_manager.db_manager, Field) assert not isinstance(running_manager.hash_manager, Field) - assert "overwrite" not in str(running_manager.log_manager.main_log) + assert "overwrite" not in str(running_manager.logs.main_log) await running_manager.async_db_close() assert isinstance(running_manager.db_manager, Field) assert isinstance(running_manager.hash_manager, Field) diff --git a/tests/test_scrape_mapper.py b/tests/test_scrape_mapper.py index 71523b318..7680fea7a 100644 --- a/tests/test_scrape_mapper.py +++ b/tests/test_scrape_mapper.py @@ -9,7 +9,7 @@ from cyberdrop_dl.scraper import scrape_mapper if TYPE_CHECKING: - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager TEST_BASE_CRAWLER = next(iter(crawlers.GENERIC_CRAWLERS)) diff --git a/tests/test_startup.py b/tests/test_startup.py index bd1a434bc..30ae20b69 100644 --- a/tests/test_startup.py +++ b/tests/test_startup.py @@ -1,9 +1,9 @@ from pathlib import Path import pytest +from cyberdrop_dl.ui.program_ui import ProgramUI from cyberdrop_dl.main import run -from cyberdrop_dl.ui.program_ui import ProgramUI def test_startup(tmp_cwd: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None: diff --git a/tests/test_storage.py b/tests/test_storage.py index 0e6badec1..885ba8799 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -6,7 +6,7 @@ import pytest -from cyberdrop_dl.managers.manager import Manager +from cyberdrop_dl.managers import Manager from cyberdrop_dl.managers.storage_manager import StorageManager diff --git a/uv.lock b/uv.lock index c1c1de2ba..179ae2423 100644 --- a/uv.lock +++ b/uv.lock @@ -730,6 +730,7 @@ dependencies = [ { name = "beautifulsoup4" }, { name = "certifi" }, { name = "curl-cffi", marker = "implementation_name == 'cpython'" }, + { name = "cyclopts" }, { name = "dateparser" }, { name = "imagesize" }, { name = "inquirerpy" }, @@ -775,6 +776,7 @@ requires-dist = [ { name = "beautifulsoup4", specifier = ">=4.14.3" }, { name = "certifi", specifier = ">=2026.1.4" }, { name = "curl-cffi", marker = "implementation_name == 'cpython'", specifier = ">=0.13,<0.14" }, + { name = "cyclopts", specifier = ">=4.5.4" }, { name = "dateparser", specifier = ">=1.2.2" }, { name = "imagesize", specifier = ">=1.4.1" }, { name = "inquirerpy", specifier = ">=0.3.4" }, @@ -807,6 +809,21 @@ dev = [ ] extras = [{ name = "apprise", specifier = ">=1.9.7" }] +[[package]] +name = "cyclopts" +version = "4.5.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "docstring-parser" }, + { name = "rich" }, + { name = "rich-rst" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3b/d2/f37df900b163f51b4faacdb01bf4895c198906d67c5b2a85c2522de85459/cyclopts-4.5.4.tar.gz", hash = "sha256:eed4d6c76d4391aa796d8fcaabd50e5aad7793261792beb19285f62c5c456c8b", size = 162438, upload-time = "2026-02-20T00:58:46.161Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/0f/119fa63fa93e0a331fbedcb27162d8f88d3ba2f38eba1567e3e44307b857/cyclopts-4.5.4-py3-none-any.whl", hash = "sha256:ad001986ec403ca1dc1ed20375c439d62ac796295ea32b451dfe25d6696bc71a", size = 200225, upload-time = "2026-02-20T00:58:47.275Z" }, +] + [[package]] name = "dateparser" version = "1.3.0" @@ -831,6 +848,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" }, ] +[[package]] +name = "docstring-parser" +version = "0.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/9d/c3b43da9515bd270df0f80548d9944e389870713cc1fe2b8fb35fe2bcefd/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912", size = 27442, upload-time = "2025-07-21T07:35:01.868Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" }, +] + +[[package]] +name = "docutils" +version = "0.22.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ae/b6/03bb70946330e88ffec97aefd3ea75ba575cb2e762061e0e62a213befee8/docutils-0.22.4.tar.gz", hash = "sha256:4db53b1fde9abecbb74d91230d32ab626d94f6badfc575d6db9194a49df29968", size = 2291750, upload-time = "2025-12-18T19:00:26.443Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/10/5da547df7a391dcde17f59520a231527b8571e6f46fc8efb02ccb370ab12/docutils-0.22.4-py3-none-any.whl", hash = "sha256:d0013f540772d1420576855455d050a2180186c91c15779301ac2ccb3eeb68de", size = 633196, upload-time = "2025-12-18T19:00:18.077Z" }, +] + [[package]] name = "filelock" version = "3.24.3" @@ -1897,6 +1932,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" }, ] +[[package]] +name = "rich-rst" +version = "1.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "docutils" }, + { name = "rich" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bc/6d/a506aaa4a9eaa945ed8ab2b7347859f53593864289853c5d6d62b77246e0/rich_rst-1.3.2.tar.gz", hash = "sha256:a1196fdddf1e364b02ec68a05e8ff8f6914fee10fbca2e6b6735f166bb0da8d4", size = 14936, upload-time = "2025-10-14T16:49:45.332Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/2f/b4530fbf948867702d0a3f27de4a6aab1d156f406d72852ab902c4d04de9/rich_rst-1.3.2-py3-none-any.whl", hash = "sha256:a99b4907cbe118cf9d18b0b44de272efa61f15117c61e39ebdc431baf5df722a", size = 12567, upload-time = "2025-10-14T16:49:42.953Z" }, +] + [[package]] name = "ruff" version = "0.15.2"