From f31a882b4cd11bf6f0d44ea809a7318ceea8e353 Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Sun, 22 Feb 2026 01:41:08 -0500 Subject: [PATCH 01/23] refactor: move to cyclopts --- cyberdrop_dl/cli/__init__.py | 62 +++++++++++++++++++++-------- cyberdrop_dl/cli/model.py | 4 ++ cyberdrop_dl/config/_common.py | 18 +++++++-- cyberdrop_dl/config/auth_model.py | 4 +- cyberdrop_dl/config/config_model.py | 29 +++++++------- cyberdrop_dl/config/global_model.py | 14 +++---- cyberdrop_dl/models/base_models.py | 4 +- pyproject.toml | 1 + uv.lock | 48 ++++++++++++++++++++++ 9 files changed, 140 insertions(+), 44 deletions(-) diff --git a/cyberdrop_dl/cli/__init__.py b/cyberdrop_dl/cli/__init__.py index d7d1af189..851a762e6 100644 --- a/cyberdrop_dl/cli/__init__.py +++ b/cyberdrop_dl/cli/__init__.py @@ -4,8 +4,10 @@ import sys from argparse import SUPPRESS, ArgumentParser, RawDescriptionHelpFormatter from shutil import get_terminal_size -from typing import TYPE_CHECKING, Any, Final, NoReturn +from typing import TYPE_CHECKING, Annotated, Any, Final, NoReturn +from cyclopts import App, Parameter +from cyclopts.bind import normalize_tokens from pydantic import BaseModel, ValidationError from cyberdrop_dl import __version__, env @@ -17,6 +19,8 @@ from argparse import _ArgumentGroup as ArgGroup # pyright: ignore[reportPrivateUsage] from collections.abc import Sequence + from cyberdrop_dl.models.types import HttpURL + def is_terminal_in_portrait() -> bool: """Check if CDL is being run in portrait mode based on a few conditions.""" @@ -102,32 +106,58 @@ def make_parser() -> CLIParser: return CLIParser(parser, groups) +app = App(result_action="return_value", version=f"{__version__}NTFS") + + +@app.command() +def download( + links: Annotated[ + list[HttpURL], + Parameter( + name="links", + consume_multiple=True, + negative=[], + help="link(s) to content to download (passing multiple links is supported)", + ), + ] = [], # noqa: B006 + /, + *, + parsed_settings: ParsedArgs = ParsedArgs(), # pyright: ignore[reportCallInDefaultInitializer] # noqa: B008 +) -> ParsedArgs: + return parsed_settings + + +@app.command() +def show_supported_sites() -> NoReturn: + from cyberdrop_dl.utils.markdown import get_crawlers_info_as_rich_table + + table = get_crawlers_info_as_rich_table() + app.console.print(table) + sys.exit(0) + + def parse_args(args: Sequence[str] | None = None) -> ParsedArgs: """Parses the command line arguments passed into the program.""" + from cyberdrop_dl.utils.yaml import handle_validation_error - parsed_args = make_parser().parse_args(args) + args = normalize_tokens(args) + # if not args or args[0] != "download": + # args = ["download", *args] + try: - model = ParsedArgs.model_validate(parsed_args, extra="forbid") + command, bound, _ = app.parse_args(args, print_error=False, exit_on_error=False) + # assert command is download + settings: ParsedArgs = command(*bound.args, **bound.kwargs) except ValidationError as e: handle_validation_error(e, title="CLI arguments") sys.exit(1) - if model.cli_only_args.show_supported_sites: - show_supported_sites() + # if settings.cli_only_args.show_supported_sites: + # show_supported_sites() - return model - - -def show_supported_sites() -> NoReturn: - from rich import print - - from cyberdrop_dl.utils.markdown import get_crawlers_info_as_rich_table - - table = get_crawlers_info_as_rich_table() - print(table) - sys.exit(0) + return settings def _unflatten_nested_args(data: dict[str, Any]) -> dict[str, Any]: diff --git a/cyberdrop_dl/cli/model.py b/cyberdrop_dl/cli/model.py index 59a1ad8b5..e7a6174d7 100644 --- a/cyberdrop_dl/cli/model.py +++ b/cyberdrop_dl/cli/model.py @@ -4,6 +4,7 @@ from pathlib import Path from typing import Annotated, Any, Literal, Self +from cyclopts import Parameter from pydantic import BaseModel, Field, computed_field, field_validator, model_validator from cyberdrop_dl.cli.arguments import ArgumentParams @@ -18,10 +19,12 @@ class UIOptions(StrEnum): FULLSCREEN = auto() +@Parameter(name="*") class CLIargs(BaseModel): links: Annotated[ list[HttpURL], ArgumentParams(positional_only=True, metavar="LINK(s)"), + Parameter(show=False), ] = Field( default=[], description="link(s) to content to download (passing multiple links is supported)", @@ -135,6 +138,7 @@ def _check_mutually_exclusive(group: Iterable[Any], msg: str) -> None: raise ValueError(msg) +@Parameter(name="*") class ParsedArgs(BaseModel): cli_only_args: CLIargs = CLIargs() config_settings: ConfigSettings = ConfigSettings() diff --git a/cyberdrop_dl/config/_common.py b/cyberdrop_dl/config/_common.py index 997bb9b7e..cfffe48cc 100755 --- a/cyberdrop_dl/config/_common.py +++ b/cyberdrop_dl/config/_common.py @@ -1,14 +1,26 @@ from pathlib import Path -from typing import Self +from typing import Self, Unpack -from pydantic import BaseModel +from cyclopts import Parameter +from pydantic import BaseModel, ConfigDict from cyberdrop_dl.exceptions import InvalidYamlError from cyberdrop_dl.models import AliasModel, get_model_fields from cyberdrop_dl.utils import yaml -class ConfigModel(AliasModel): +@Parameter(name="*") +class FlatCLIParams: ... + + +class Settings(FlatCLIParams, AliasModel): ... + + +class ConfigGroup(Settings): + def __init_subclass__(cls, name: str | None = None, **kwargs: Unpack[ConfigDict]) -> None: + _ = Parameter(group=name or cls.__name__)(cls) + return super().__init_subclass__(**kwargs) + @classmethod def load_file(cls, file: Path, update_if_has_string: str) -> Self: default = cls() diff --git a/cyberdrop_dl/config/auth_model.py b/cyberdrop_dl/config/auth_model.py index af9cb8d78..bd918df0f 100755 --- a/cyberdrop_dl/config/auth_model.py +++ b/cyberdrop_dl/config/auth_model.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from cyberdrop_dl.config._common import ConfigModel +from cyberdrop_dl.config._common import ConfigGroup from cyberdrop_dl.models import AliasModel @@ -39,7 +39,7 @@ class RealDebridAuth(AliasModel): api_key: str = "" -class AuthSettings(ConfigModel): +class AuthSettings(ConfigGroup): coomer: CoomerAuth = CoomerAuth() gofile: GoFileAuth = GoFileAuth() imgur: ImgurAuth = ImgurAuth() diff --git a/cyberdrop_dl/config/config_model.py b/cyberdrop_dl/config/config_model.py index 0b33aa67e..8c131b664 100755 --- a/cyberdrop_dl/config/config_model.py +++ b/cyberdrop_dl/config/config_model.py @@ -1,14 +1,15 @@ +# ruff: noqa: RUF012 import itertools import re from datetime import date, datetime, timedelta from logging import DEBUG from pathlib import Path -from pydantic import BaseModel, ByteSize, Field, NonNegativeInt, field_serializer, field_validator +from pydantic import ByteSize, Field, NonNegativeInt, field_serializer, field_validator from cyberdrop_dl import constants from cyberdrop_dl.constants import BROWSERS, DEFAULT_APP_STORAGE, DEFAULT_DOWNLOAD_STORAGE, Hashing -from cyberdrop_dl.models import AliasModel, HttpAppriseURL +from cyberdrop_dl.models import HttpAppriseURL from cyberdrop_dl.models.types import ( ByteSizeSerilized, ListNonEmptyStr, @@ -24,7 +25,7 @@ from cyberdrop_dl.utils.strings import validate_format_string from cyberdrop_dl.utils.utilities import purge_dir_tree -from ._common import ConfigModel +from ._common import ConfigGroup, Settings ALL_SUPPORTED_SITES = ["<>"] _SORTING_COMMON_FIELDS = { @@ -39,7 +40,7 @@ } -class DownloadOptions(BaseModel): +class DownloadOptions(Settings): block_download_sub_folders: bool = False disable_download_attempt_limit: bool = False disable_file_timestamps: bool = False @@ -63,14 +64,14 @@ def valid_format(cls, value: str) -> str: return value -class Files(AliasModel): +class Files(Settings): download_folder: Path = Field(default=DEFAULT_DOWNLOAD_STORAGE, validation_alias="d") dump_json: bool = Field(default=False, validation_alias="j") input_file: Path = Field(default=DEFAULT_APP_STORAGE / "Configs/{config}/URLs.txt", validation_alias="i") save_pages_html: bool = False -class Logs(AliasModel): +class Logs(Settings): download_error_urls: LogPath = Path("Download_Error_URLs.csv") last_forum_post: LogPath = Path("Last_Scraped_Forum_Posts.csv") log_folder: Path = DEFAULT_APP_STORAGE / "Configs/{config}/Logs" @@ -117,7 +118,7 @@ def _delete_old_logs_and_folders(self, now: datetime | None = None) -> None: purge_dir_tree(self.log_folder) -class FileSizeLimits(BaseModel): +class FileSizeLimits(Settings): maximum_image_size: ByteSizeSerilized = ByteSize(0) maximum_other_size: ByteSizeSerilized = ByteSize(0) maximum_video_size: ByteSizeSerilized = ByteSize(0) @@ -126,7 +127,7 @@ class FileSizeLimits(BaseModel): minimum_video_size: ByteSizeSerilized = ByteSize(0) -class MediaDurationLimits(BaseModel): +class MediaDurationLimits(Settings): maximum_video_duration: timedelta = timedelta(seconds=0) maximum_audio_duration: timedelta = timedelta(seconds=0) minimum_video_duration: timedelta = timedelta(seconds=0) @@ -146,7 +147,7 @@ def parse_runtime_duration(input_date: timedelta | str | int | None) -> timedelt return to_timedelta(input_date) -class IgnoreOptions(BaseModel): +class IgnoreOptions(Settings): exclude_audio: bool = False exclude_images: bool = False exclude_other: bool = False @@ -172,7 +173,7 @@ def is_valid_regex(cls, value: str | None) -> str | None: return value -class RuntimeOptions(BaseModel): +class RuntimeOptions(Settings): console_log_level: NonNegativeInt = 100 deep_scrape: bool = False delete_partial_files: bool = False @@ -188,7 +189,7 @@ class RuntimeOptions(BaseModel): update_last_forum_post: bool = True -class Sorting(BaseModel): +class Sorting(Settings): scan_folder: PathOrNone = None sort_downloads: bool = False sort_folder: Path = DEFAULT_DOWNLOAD_STORAGE / "Cyberdrop-DL Sorted Downloads" @@ -247,7 +248,7 @@ def valid_sorted_video(cls, value: str | None) -> str | None: return value -class BrowserCookies(BaseModel): +class BrowserCookies(Settings): auto_import: bool = False browser: BROWSERS | None = BROWSERS.firefox sites: list[NonEmptyStr] = SUPPORTED_SITES_DOMAINS @@ -273,7 +274,7 @@ def use_placeholder(self, values: list[str]) -> list[str]: return values -class DupeCleanup(BaseModel): +class DupeCleanup(Settings): add_md5_hash: bool = False add_sha256_hash: bool = False auto_dedupe: bool = True @@ -281,7 +282,7 @@ class DupeCleanup(BaseModel): send_deleted_to_trash: bool = True -class ConfigSettings(ConfigModel): +class ConfigSettings(ConfigGroup): browser_cookies: BrowserCookies = BrowserCookies() download_options: DownloadOptions = DownloadOptions() dupe_cleanup_options: DupeCleanup = DupeCleanup() diff --git a/cyberdrop_dl/config/global_model.py b/cyberdrop_dl/config/global_model.py index a69fd25b0..616280eff 100755 --- a/cyberdrop_dl/config/global_model.py +++ b/cyberdrop_dl/config/global_model.py @@ -1,9 +1,9 @@ +# ruff: noqa: RUF012 import random from typing import Literal import aiohttp from pydantic import ( - BaseModel, ByteSize, NonNegativeFloat, PositiveFloat, @@ -13,7 +13,7 @@ ) from yarl import URL -from cyberdrop_dl.config._common import ConfigModel +from cyberdrop_dl.config._common import ConfigGroup, Settings from cyberdrop_dl.models.types import ByteSizeSerilized, HttpURL, ListNonEmptyStr, ListPydanticURL, NonEmptyStr from cyberdrop_dl.models.validators import falsy_as, falsy_as_none, to_bytesize @@ -21,7 +21,7 @@ DEFAULT_REQUIRED_FREE_SPACE = to_bytesize("5GB") -class General(BaseModel): +class General(Settings): ssl_context: Literal["truststore", "certifi", "truststore+certifi"] | None = "truststore+certifi" disable_crawlers: ListNonEmptyStr = [] flaresolverr: HttpURL | None = None @@ -58,7 +58,7 @@ def override_min(cls, value: ByteSize) -> ByteSize: return max(value, MIN_REQUIRED_FREE_SPACE) -class RateLimiting(BaseModel): +class RateLimiting(Settings): download_attempts: PositiveInt = 2 download_delay: NonNegativeFloat = 0.0 download_speed_limit: ByteSizeSerilized = ByteSize(0) @@ -95,18 +95,18 @@ def get_jitter(self) -> NonNegativeFloat: return random.uniform(0, self.jitter) -class UIOptions(BaseModel): +class UIOptions(Settings): refresh_rate: PositiveInt = 10 -class GenericCrawlerInstances(BaseModel): +class GenericCrawlerInstances(Settings): wordpress_media: ListPydanticURL = [] wordpress_html: ListPydanticURL = [] discourse: ListPydanticURL = [] chevereto: ListPydanticURL = [] -class GlobalSettings(ConfigModel): +class GlobalSettings(ConfigGroup): general: General = General() rate_limiting_options: RateLimiting = RateLimiting() ui_options: UIOptions = UIOptions() diff --git a/cyberdrop_dl/models/base_models.py b/cyberdrop_dl/models/base_models.py index af03d43a3..14f493ce5 100755 --- a/cyberdrop_dl/models/base_models.py +++ b/cyberdrop_dl/models/base_models.py @@ -22,11 +22,11 @@ class AliasModel(BaseModel): - model_config = ConfigDict(populate_by_name=True) + model_config = ConfigDict(populate_by_name=True, defer_build=True) class FrozenModel(BaseModel): - model_config = ConfigDict(frozen=True) + model_config = ConfigDict(frozen=True, defer_build=True) class AppriseURLModel(FrozenModel): diff --git a/pyproject.toml b/pyproject.toml index fa2fbac93..9e6536d5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ dependencies = [ "beautifulsoup4 >=4.14.3", "certifi >=2026.1.4", "curl-cffi >=0.13,<0.14; implementation_name == 'cpython' ", + "cyclopts>=4.5.4", "dateparser >=1.2.2", "imagesize >=1.4.1", "inquirerpy >=0.3.4", diff --git a/uv.lock b/uv.lock index c1c1de2ba..179ae2423 100644 --- a/uv.lock +++ b/uv.lock @@ -730,6 +730,7 @@ dependencies = [ { name = "beautifulsoup4" }, { name = "certifi" }, { name = "curl-cffi", marker = "implementation_name == 'cpython'" }, + { name = "cyclopts" }, { name = "dateparser" }, { name = "imagesize" }, { name = "inquirerpy" }, @@ -775,6 +776,7 @@ requires-dist = [ { name = "beautifulsoup4", specifier = ">=4.14.3" }, { name = "certifi", specifier = ">=2026.1.4" }, { name = "curl-cffi", marker = "implementation_name == 'cpython'", specifier = ">=0.13,<0.14" }, + { name = "cyclopts", specifier = ">=4.5.4" }, { name = "dateparser", specifier = ">=1.2.2" }, { name = "imagesize", specifier = ">=1.4.1" }, { name = "inquirerpy", specifier = ">=0.3.4" }, @@ -807,6 +809,21 @@ dev = [ ] extras = [{ name = "apprise", specifier = ">=1.9.7" }] +[[package]] +name = "cyclopts" +version = "4.5.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "docstring-parser" }, + { name = "rich" }, + { name = "rich-rst" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3b/d2/f37df900b163f51b4faacdb01bf4895c198906d67c5b2a85c2522de85459/cyclopts-4.5.4.tar.gz", hash = "sha256:eed4d6c76d4391aa796d8fcaabd50e5aad7793261792beb19285f62c5c456c8b", size = 162438, upload-time = "2026-02-20T00:58:46.161Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/0f/119fa63fa93e0a331fbedcb27162d8f88d3ba2f38eba1567e3e44307b857/cyclopts-4.5.4-py3-none-any.whl", hash = "sha256:ad001986ec403ca1dc1ed20375c439d62ac796295ea32b451dfe25d6696bc71a", size = 200225, upload-time = "2026-02-20T00:58:47.275Z" }, +] + [[package]] name = "dateparser" version = "1.3.0" @@ -831,6 +848,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" }, ] +[[package]] +name = "docstring-parser" +version = "0.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/9d/c3b43da9515bd270df0f80548d9944e389870713cc1fe2b8fb35fe2bcefd/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912", size = 27442, upload-time = "2025-07-21T07:35:01.868Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" }, +] + +[[package]] +name = "docutils" +version = "0.22.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ae/b6/03bb70946330e88ffec97aefd3ea75ba575cb2e762061e0e62a213befee8/docutils-0.22.4.tar.gz", hash = "sha256:4db53b1fde9abecbb74d91230d32ab626d94f6badfc575d6db9194a49df29968", size = 2291750, upload-time = "2025-12-18T19:00:26.443Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/10/5da547df7a391dcde17f59520a231527b8571e6f46fc8efb02ccb370ab12/docutils-0.22.4-py3-none-any.whl", hash = "sha256:d0013f540772d1420576855455d050a2180186c91c15779301ac2ccb3eeb68de", size = 633196, upload-time = "2025-12-18T19:00:18.077Z" }, +] + [[package]] name = "filelock" version = "3.24.3" @@ -1897,6 +1932,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" }, ] +[[package]] +name = "rich-rst" +version = "1.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "docutils" }, + { name = "rich" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bc/6d/a506aaa4a9eaa945ed8ab2b7347859f53593864289853c5d6d62b77246e0/rich_rst-1.3.2.tar.gz", hash = "sha256:a1196fdddf1e364b02ec68a05e8ff8f6914fee10fbca2e6b6735f166bb0da8d4", size = 14936, upload-time = "2025-10-14T16:49:45.332Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/2f/b4530fbf948867702d0a3f27de4a6aab1d156f406d72852ab902c4d04de9/rich_rst-1.3.2-py3-none-any.whl", hash = "sha256:a99b4907cbe118cf9d18b0b44de272efa61f15117c61e39ebdc431baf5df722a", size = 12567, upload-time = "2025-10-14T16:49:42.953Z" }, +] + [[package]] name = "ruff" version = "0.15.2" From 657cffc5858645ae6ef6afa9475e184086785e06 Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Sun, 22 Feb 2026 12:47:04 -0500 Subject: [PATCH 02/23] refactor: update app removed `sites` option from cookies --- cyberdrop_dl/cli/__init__.py | 185 +++++----------------------- cyberdrop_dl/cli/model.py | 106 ++++------------ cyberdrop_dl/clients/hash_client.py | 4 +- cyberdrop_dl/config/_common.py | 8 +- cyberdrop_dl/config/auth_model.py | 4 +- cyberdrop_dl/config/config_model.py | 39 ++---- cyberdrop_dl/config/global_model.py | 4 +- cyberdrop_dl/constants.py | 6 +- cyberdrop_dl/director.py | 4 +- cyberdrop_dl/utils/apprise.py | 4 +- cyberdrop_dl/utils/yaml.py | 15 +-- 11 files changed, 88 insertions(+), 291 deletions(-) diff --git a/cyberdrop_dl/cli/__init__.py b/cyberdrop_dl/cli/__init__.py index 851a762e6..3de623021 100644 --- a/cyberdrop_dl/cli/__init__.py +++ b/cyberdrop_dl/cli/__init__.py @@ -1,25 +1,13 @@ -from __future__ import annotations +import shutil +from typing import Annotated -import dataclasses -import sys -from argparse import SUPPRESS, ArgumentParser, RawDescriptionHelpFormatter -from shutil import get_terminal_size -from typing import TYPE_CHECKING, Annotated, Any, Final, NoReturn +import cyclopts +import pydantic +from cyclopts import Parameter -from cyclopts import App, Parameter -from cyclopts.bind import normalize_tokens -from pydantic import BaseModel, ValidationError - -from cyberdrop_dl import __version__, env -from cyberdrop_dl.cli import arguments +from cyberdrop_dl import __version__, env, signature from cyberdrop_dl.cli.model import CLIargs, ParsedArgs -from cyberdrop_dl.config import ConfigSettings, GlobalSettings - -if TYPE_CHECKING: - from argparse import _ArgumentGroup as ArgGroup # pyright: ignore[reportPrivateUsage] - from collections.abc import Sequence - - from cyberdrop_dl.models.types import HttpURL +from cyberdrop_dl.models.types import HttpURL def is_terminal_in_portrait() -> bool: @@ -28,7 +16,7 @@ def is_terminal_in_portrait() -> bool: if env.PORTRAIT_MODE: return True - terminal_size = get_terminal_size() + terminal_size = shutil.get_terminal_size() width, height = terminal_size.columns, terminal_size.lines aspect_ratio = width / height @@ -37,165 +25,58 @@ def is_terminal_in_portrait() -> bool: return False # Check for mobile device in portrait mode - if (aspect_ratio < 1.5 and height >= 40) or (width <= 85 and aspect_ratio < 2.3): + if (aspect_ratio < 1.5 and height >= 40) or (aspect_ratio < 2.3 and width <= 85): return True # Assume landscape mode for other cases return False -class CustomHelpFormatter(RawDescriptionHelpFormatter): - MAX_HELP_POS: Final = 80 - INDENT_INCREMENT: Final = 2 - - def __init__(self, prog: str, width: int | None = None) -> None: - super().__init__(prog, self.INDENT_INCREMENT, self.MAX_HELP_POS, width) - - def _get_help_string(self, action) -> str | None: - if action.help: - return action.help.replace("program's", "CDL") # The ' messes up the markdown formatting - return action.help - - -@dataclasses.dataclass(slots=True) -class CLIParser: - parser: ArgumentParser - groups: dict[str, list[ArgGroup]] - - def parse_args(self, args: Sequence[str] | None = None) -> dict[str, dict[str, Any]]: - return self._unflatten(self._parse_args(args)) - - def _parse_args(self, args: Sequence[str] | None = None) -> dict[str, Any]: - return dict(sorted(vars(self.parser.parse_intermixed_args(args)).items())) +class App(cyclopts.App): + @signature.copy(cyclopts.App._parse_known_args) + def _parse_known_args(self, *args, **kwargs): + from cyberdrop_dl.utils.yaml import format_validation_error - def _unflatten(self, namespace: dict[str, Any]) -> dict[str, dict[str, Any]]: - parsed_args: dict[str, dict[str, Any]] = {} + try: + return super()._parse_known_args(*args, **kwargs) + except cyclopts.ValidationError as e: + if isinstance(e.__cause__, pydantic.ValidationError): + e.exception_message = format_validation_error(e.__cause__, title="CLI arguments") + raise - for name, groups in self.groups.items(): - parsed_args[name] = {} - for group in groups: - group_dict = {arg.dest: v for arg in group._group_actions if (v := namespace.get(arg.dest)) is not None} - if group_dict: - assert group.title - parsed_args[name][group.title] = _unflatten_nested_args(group_dict) - parsed_args["cli_only_args"] = parsed_args["cli_only_args"]["CLI-only options"] - return parsed_args - - -def make_parser() -> CLIParser: - kwargs: dict[str, Any] = {"color": True} if sys.version_info > (3, 14) else {} - parser = ArgumentParser( - description="Bulk asynchronous downloader for multiple file hosts", - usage="cyberdrop-dl [OPTIONS] URL [URL...]", - allow_abbrev=False, - formatter_class=CustomHelpFormatter, - **kwargs, - ) - _ = parser.add_argument("-V", "--version", action="version", version=f"%(prog)s {__version__}") - - cli_only = parser.add_argument_group("CLI-only options") - _add_args_from_model(cli_only, CLIargs) - - groups = { - "config_settings": _create_groups_from_nested_models(parser, ConfigSettings), - "global_settings": _create_groups_from_nested_models(parser, GlobalSettings), - "cli_only_args": [cli_only], - } - - return CLIParser(parser, groups) - - -app = App(result_action="return_value", version=f"{__version__}NTFS") +app = App( + help="Bulk asynchronous downloader for multiple file hosts", + version=f"{__version__}.NTFS", + default_parameter=Parameter(negative_iterable=[]), +) @app.command() def download( links: Annotated[ - list[HttpURL], + list[HttpURL] | None, Parameter( name="links", - consume_multiple=True, negative=[], - help="link(s) to content to download (passing multiple links is supported)", + help="link(s) to content to download", ), - ] = [], # noqa: B006 + ] = None, /, *, + cli_args: CLIargs = CLIargs(), # noqa: B008 # pyright: ignore[reportCallInDefaultInitializer] parsed_settings: ParsedArgs = ParsedArgs(), # pyright: ignore[reportCallInDefaultInitializer] # noqa: B008 -) -> ParsedArgs: - return parsed_settings +): + return links, cli_args, parsed_settings @app.command() -def show_supported_sites() -> NoReturn: +def show_supported_sites() -> None: from cyberdrop_dl.utils.markdown import get_crawlers_info_as_rich_table table = get_crawlers_info_as_rich_table() app.console.print(table) - sys.exit(0) - - -def parse_args(args: Sequence[str] | None = None) -> ParsedArgs: - """Parses the command line arguments passed into the program.""" - - from cyberdrop_dl.utils.yaml import handle_validation_error - - args = normalize_tokens(args) - # if not args or args[0] != "download": - # args = ["download", *args] - - try: - command, bound, _ = app.parse_args(args, print_error=False, exit_on_error=False) - # assert command is download - settings: ParsedArgs = command(*bound.args, **bound.kwargs) - - except ValidationError as e: - handle_validation_error(e, title="CLI arguments") - sys.exit(1) - - # if settings.cli_only_args.show_supported_sites: - # show_supported_sites() - - return settings - - -def _unflatten_nested_args(data: dict[str, Any]) -> dict[str, Any]: - result: dict[str, Any] = {} - - for command_name, value in data.items(): - inner_names = command_name.split(".") - current_level = result - for index, key in enumerate(inner_names): - if index < len(inner_names) - 1: - if key not in current_level: - current_level[key] = {} - current_level = current_level[key] - else: - current_level[key] = value - return result - - -def _add_args_from_model(parser: ArgumentParser | ArgGroup, model: type[BaseModel]) -> None: - cli_args = model is CLIargs - - for arg in arguments.parse(model): - options = arg.compose_options() - - if cli_args and arg.arg_type is bool and not (arg.cli_name == "portrait" and env.RUNNING_IN_TERMUX): - default = arg.default if cli_args else SUPPRESS - options["action"] = "store_false" if default else "store_true" - - _ = parser.add_argument(*arg.name_or_flags, **options) - -def _create_groups_from_nested_models(parser: ArgumentParser, model: type[BaseModel]) -> list[ArgGroup]: - groups: list[ArgGroup] = [] - for name, field in model.model_fields.items(): - submodel = field.annotation - assert submodel and issubclass(submodel, BaseModel) - submodel_group = parser.add_argument_group(name) - _add_args_from_model(submodel_group, submodel) - groups.append(submodel_group) - return groups +if __name__ == "__main__": + app() diff --git a/cyberdrop_dl/cli/model.py b/cyberdrop_dl/cli/model.py index e7a6174d7..038b1535f 100644 --- a/cyberdrop_dl/cli/model.py +++ b/cyberdrop_dl/cli/model.py @@ -1,15 +1,13 @@ import datetime -from collections.abc import Iterable from enum import StrEnum, auto from pathlib import Path -from typing import Annotated, Any, Literal, Self +from typing import Annotated, Literal from cyclopts import Parameter -from pydantic import BaseModel, Field, computed_field, field_validator, model_validator +from pydantic import BaseModel, Field from cyberdrop_dl.cli.arguments import ArgumentParams from cyberdrop_dl.config import ConfigSettings, GlobalSettings -from cyberdrop_dl.models.types import HttpURL class UIOptions(StrEnum): @@ -19,46 +17,18 @@ class UIOptions(StrEnum): FULLSCREEN = auto() -@Parameter(name="*") +@Parameter(name="*", negative_bool=[]) class CLIargs(BaseModel): - links: Annotated[ - list[HttpURL], - ArgumentParams(positional_only=True, metavar="LINK(s)"), - Parameter(show=False), - ] = Field( - default=[], - description="link(s) to content to download (passing multiple links is supported)", - ) appdata_folder: Path | None = Field( default=None, description="AppData folder path", ) - completed_after: datetime.date | None = Field( - default=None, - description="only retry downloads that were completed on or after this date", - ) - completed_before: datetime.date | None = Field( - default=None, - description="only retry downloads that were completed on or before this date", - ) config_file: Path | None = Field( default=None, description="path to the CDL settings.yaml file to load", ) - download: bool = Field( - default=False, - description="skips UI, start download immediately", - ) - download_tiktok_audios: bool = Field( - default=False, - description="download TikTok audios from posts and save them as separate files", - ) - download_tiktok_src_quality_videos: bool = Field( - default=False, - description="download TikTok videos in source quality", - ) impersonate: Annotated[ Literal[ "chrome", @@ -68,17 +38,14 @@ class CLIargs(BaseModel): "chrome_android", "firefox", ] - | bool | None, ArgumentParams(nargs="?", const=True), + Parameter(), ] = Field( default=None, description="Use this target as impersonation for all scrape requests", ) - max_items_retry: int = Field( - default=0, - description="max number of links to retry", - ) + portrait: bool = Field( default=False, description="force CDL to run with a vertical layout", @@ -87,6 +54,24 @@ class CLIargs(BaseModel): default=True, description="show stats report at the end of a run", ) + + +@Parameter(name="*") +class RetryArgs(BaseModel): + completed_after: datetime.date | None = Field( + default=None, + description="only retry downloads that were completed on or after this date", + ) + completed_before: datetime.date | None = Field( + default=None, + description="only retry downloads that were completed on or before this date", + ) + + max_items_retry: int = Field( + default=0, + description="max number of links to retry", + ) + retry_all: bool = Field( default=False, description="retry all downloads", @@ -99,59 +84,14 @@ class CLIargs(BaseModel): default=False, description="retry download of maintenance files (bunkr). Requires files to be hashed", ) - show_supported_sites: bool = Field( - default=False, - description="shows a list of supported sites and exits", - ) - ui: UIOptions = Field( - default=UIOptions.FULLSCREEN, - description="DISABLED, ACTIVITY, SIMPLE or FULLSCREEN", - ) @property def retry_any(self) -> bool: return any((self.retry_all, self.retry_failed, self.retry_maintenance)) - @property - def fullscreen_ui(self) -> bool: - return self.ui == UIOptions.FULLSCREEN - - @computed_field - def __computed__(self) -> dict[str, bool]: - return {"retry_any": self.retry_any, "fullscreen_ui": self.fullscreen_ui} - - @model_validator(mode="after") - def mutually_exclusive(self) -> Self: - group1 = [self.links, self.retry_all, self.retry_failed, self.retry_maintenance] - msg1 = "`--links`, '--retry-all', '--retry-maintenace' and '--retry-failed' are mutually exclusive" - _check_mutually_exclusive(group1, msg1) - return self - - @field_validator("ui", mode="before") - @classmethod - def lower(cls, value: str) -> str: - return value.lower() - - -def _check_mutually_exclusive(group: Iterable[Any], msg: str) -> None: - if sum(1 for value in group if value) >= 2: - raise ValueError(msg) - @Parameter(name="*") class ParsedArgs(BaseModel): cli_only_args: CLIargs = CLIargs() config_settings: ConfigSettings = ConfigSettings() global_settings: GlobalSettings = GlobalSettings() - - def model_post_init(self, *_) -> None: - if self.cli_only_args.retry_all or self.cli_only_args.retry_maintenance: - self.config_settings.runtime_options.ignore_history = True - - if ( - not self.cli_only_args.fullscreen_ui - or self.cli_only_args.retry_any - or self.cli_only_args.config_file - or self.config_settings.sorting.sort_downloads - ): - self.cli_only_args.download = True diff --git a/cyberdrop_dl/clients/hash_client.py b/cyberdrop_dl/clients/hash_client.py index 689a660d0..57c22b507 100644 --- a/cyberdrop_dl/clients/hash_client.py +++ b/cyberdrop_dl/clients/hash_client.py @@ -16,7 +16,7 @@ if TYPE_CHECKING: from yarl import URL - from cyberdrop_dl.config.config_model import DupeCleanup + from cyberdrop_dl.config.config_model import Dedupe from cyberdrop_dl.data_structures.url_objects import MediaItem from cyberdrop_dl.managers.manager import Manager @@ -54,7 +54,7 @@ def _deleted_file_suffix(self) -> Literal["Sent to trash", "Permanently deleted" return "Sent to trash" if self._to_trash else "Permanently deleted" @property - def dupe_cleanup_options(self) -> DupeCleanup: + def dupe_cleanup_options(self) -> Dedupe: return self.manager.config.dupe_cleanup_options async def hash_directory(self, path: Path) -> None: diff --git a/cyberdrop_dl/config/_common.py b/cyberdrop_dl/config/_common.py index cfffe48cc..27a4d9a9d 100755 --- a/cyberdrop_dl/config/_common.py +++ b/cyberdrop_dl/config/_common.py @@ -10,17 +10,19 @@ @Parameter(name="*") -class FlatCLIParams: ... +class FlatNamespace: ... -class Settings(FlatCLIParams, AliasModel): ... +class _Settings(FlatNamespace, AliasModel): ... -class ConfigGroup(Settings): +class Settings(_Settings): def __init_subclass__(cls, name: str | None = None, **kwargs: Unpack[ConfigDict]) -> None: _ = Parameter(group=name or cls.__name__)(cls) return super().__init_subclass__(**kwargs) + +class ConfigFile(_Settings): @classmethod def load_file(cls, file: Path, update_if_has_string: str) -> Self: default = cls() diff --git a/cyberdrop_dl/config/auth_model.py b/cyberdrop_dl/config/auth_model.py index bd918df0f..f511b7f35 100755 --- a/cyberdrop_dl/config/auth_model.py +++ b/cyberdrop_dl/config/auth_model.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from cyberdrop_dl.config._common import ConfigGroup +from cyberdrop_dl.config._common import ConfigFile from cyberdrop_dl.models import AliasModel @@ -39,7 +39,7 @@ class RealDebridAuth(AliasModel): api_key: str = "" -class AuthSettings(ConfigGroup): +class AuthSettings(ConfigFile): coomer: CoomerAuth = CoomerAuth() gofile: GoFileAuth = GoFileAuth() imgur: ImgurAuth = ImgurAuth() diff --git a/cyberdrop_dl/config/config_model.py b/cyberdrop_dl/config/config_model.py index 8c131b664..e5ccab729 100755 --- a/cyberdrop_dl/config/config_model.py +++ b/cyberdrop_dl/config/config_model.py @@ -5,7 +5,7 @@ from logging import DEBUG from pathlib import Path -from pydantic import ByteSize, Field, NonNegativeInt, field_serializer, field_validator +from pydantic import ByteSize, Field, NonNegativeInt, field_validator from cyberdrop_dl import constants from cyberdrop_dl.constants import BROWSERS, DEFAULT_APP_STORAGE, DEFAULT_DOWNLOAD_STORAGE, Hashing @@ -21,13 +21,11 @@ PathOrNone, ) from cyberdrop_dl.models.validators import falsy_as, to_timedelta -from cyberdrop_dl.supported_domains import SUPPORTED_SITES_DOMAINS from cyberdrop_dl.utils.strings import validate_format_string from cyberdrop_dl.utils.utilities import purge_dir_tree -from ._common import ConfigGroup, Settings +from ._common import ConfigFile, Settings -ALL_SUPPORTED_SITES = ["<>"] _SORTING_COMMON_FIELDS = { "base_dir", "ext", @@ -173,7 +171,7 @@ def is_valid_regex(cls, value: str | None) -> str | None: return value -class RuntimeOptions(Settings): +class Runtime(Settings): console_log_level: NonNegativeInt = 100 deep_scrape: bool = False delete_partial_files: bool = False @@ -248,33 +246,16 @@ def valid_sorted_video(cls, value: str | None) -> str | None: return value -class BrowserCookies(Settings): +class Cookies(Settings): auto_import: bool = False browser: BROWSERS | None = BROWSERS.firefox - sites: list[NonEmptyStr] = SUPPORTED_SITES_DOMAINS def model_post_init(self, *_) -> None: if self.auto_import and not self.browser: raise ValueError("You need to provide a browser for auto_import to work") - @field_validator("sites", mode="before") - @classmethod - def handle_list(cls, values: list[str]) -> list[str]: - values = falsy_as(values, []) - if values == ALL_SUPPORTED_SITES: - return SUPPORTED_SITES_DOMAINS - if isinstance(values, list): - return sorted(str(value).lower() for value in values) - return values - - @field_serializer("sites", when_used="json-unless-none") - def use_placeholder(self, values: list[str]) -> list[str]: - if set(values) == set(SUPPORTED_SITES_DOMAINS): - return ALL_SUPPORTED_SITES - return values - - -class DupeCleanup(Settings): + +class Dedupe(Settings): add_md5_hash: bool = False add_sha256_hash: bool = False auto_dedupe: bool = True @@ -282,14 +263,14 @@ class DupeCleanup(Settings): send_deleted_to_trash: bool = True -class ConfigSettings(ConfigGroup): - browser_cookies: BrowserCookies = BrowserCookies() +class ConfigSettings(ConfigFile): + browser_cookies: Cookies = Cookies() download_options: DownloadOptions = DownloadOptions() - dupe_cleanup_options: DupeCleanup = DupeCleanup() + dupe_cleanup_options: Dedupe = Dedupe() file_size_limits: FileSizeLimits = FileSizeLimits() media_duration_limits: MediaDurationLimits = MediaDurationLimits() files: Files = Files() ignore_options: IgnoreOptions = IgnoreOptions() logs: Logs = Logs() - runtime_options: RuntimeOptions = RuntimeOptions() + runtime_options: Runtime = Runtime() sorting: Sorting = Sorting() diff --git a/cyberdrop_dl/config/global_model.py b/cyberdrop_dl/config/global_model.py index 616280eff..56aa76b43 100755 --- a/cyberdrop_dl/config/global_model.py +++ b/cyberdrop_dl/config/global_model.py @@ -13,7 +13,7 @@ ) from yarl import URL -from cyberdrop_dl.config._common import ConfigGroup, Settings +from cyberdrop_dl.config._common import ConfigFile, Settings from cyberdrop_dl.models.types import ByteSizeSerilized, HttpURL, ListNonEmptyStr, ListPydanticURL, NonEmptyStr from cyberdrop_dl.models.validators import falsy_as, falsy_as_none, to_bytesize @@ -106,7 +106,7 @@ class GenericCrawlerInstances(Settings): chevereto: ListPydanticURL = [] -class GlobalSettings(ConfigGroup): +class GlobalSettings(ConfigFile): general: General = General() rate_limiting_options: RateLimiting = RateLimiting() ui_options: UIOptions = UIOptions() diff --git a/cyberdrop_dl/constants.py b/cyberdrop_dl/constants.py index 49c8159d9..7f5af2c3f 100644 --- a/cyberdrop_dl/constants.py +++ b/cyberdrop_dl/constants.py @@ -38,13 +38,9 @@ "tracebacks_extra_lines": 2, "locals_max_length": 20, } -VALIDATION_ERROR_FOOTER = """Please delete the file or fix the errors. Read the documentation to learn what's the expected format and values: https://script-ware.gitbook.io/cyberdrop-dl/reference/configuration-options -\nThis is not a bug. Do not open issues related to this""" +VALIDATION_ERROR_FOOTER = """Please delete the file or fix the errors""" -CLI_VALIDATION_ERROR_FOOTER = """Please read the documentation to learn about the expected values: https://script-ware.gitbook.io/cyberdrop-dl/reference/configuration-options -\nThis is not a bug. Do not open issues related to this""" - # regex RAR_MULTIPART_PATTERN = re.compile(r"^part\d+") SANITIZE_FILENAME_PATTERN = re.compile(r'[<>:"/\\|?*\']') diff --git a/cyberdrop_dl/director.py b/cyberdrop_dl/director.py index b07a7b136..845f6f98f 100644 --- a/cyberdrop_dl/director.py +++ b/cyberdrop_dl/director.py @@ -29,7 +29,7 @@ from cyberdrop_dl.utils.updates import check_latest_pypi from cyberdrop_dl.utils.utilities import check_partials_and_empty_folders from cyberdrop_dl.utils.webhook import send_webhook_message -from cyberdrop_dl.utils.yaml import handle_validation_error +from cyberdrop_dl.utils.yaml import format_validation_error if TYPE_CHECKING: from collections.abc import Callable, Coroutine, Sequence @@ -207,7 +207,7 @@ def _setup_manager(args: Sequence[str] | None = None) -> Manager: "AuthSettings": manager.config_manager.authentication_settings, }.get(e.title) - handle_validation_error(e, file=file) + format_validation_error(e, file=file) sys.exit(_C.ERROR) return manager diff --git a/cyberdrop_dl/utils/apprise.py b/cyberdrop_dl/utils/apprise.py index 654d3c0ee..d5c6e2b5c 100644 --- a/cyberdrop_dl/utils/apprise.py +++ b/cyberdrop_dl/utils/apprise.py @@ -19,7 +19,7 @@ from cyberdrop_dl.dependencies import apprise from cyberdrop_dl.models import AppriseURLModel from cyberdrop_dl.utils.logger import log, log_debug, log_spacer -from cyberdrop_dl.utils.yaml import handle_validation_error +from cyberdrop_dl.utils.yaml import format_validation_error if TYPE_CHECKING: from cyberdrop_dl.managers.manager import Manager @@ -89,7 +89,7 @@ def get_apprise_urls(*, file: Path | None = None, urls: list[str] | None = None) return _simplify_urls([AppriseURLModel.model_validate({"url": url}) for url in set(urls)]) except ValidationError as e: - handle_validation_error(e, title="Apprise", file=file) + format_validation_error(e, title="Apprise", file=file) sys.exit(1) diff --git a/cyberdrop_dl/utils/yaml.py b/cyberdrop_dl/utils/yaml.py index eefc4b527..785362328 100644 --- a/cyberdrop_dl/utils/yaml.py +++ b/cyberdrop_dl/utils/yaml.py @@ -1,7 +1,5 @@ from __future__ import annotations -import logging -import sys from datetime import date, timedelta from enum import Enum from pathlib import Path, PurePath @@ -11,7 +9,6 @@ from pydantic import BaseModel, ValidationError from yarl import URL -from cyberdrop_dl.constants import CLI_VALIDATION_ERROR_FOOTER, VALIDATION_ERROR_FOOTER from cyberdrop_dl.exceptions import InvalidYamlError if TYPE_CHECKING: @@ -70,10 +67,9 @@ def load(file: Path, *, create: bool = False) -> dict[str, Any]: raise InvalidYamlError(file, e) from None -def handle_validation_error(e: ValidationError, *, title: str = "", file: Path | None = None): +def format_validation_error(e: ValidationError, *, title: str = "", file: Path | None = None): """Logs the validation error details and exits the program.""" - startup_logger = logging.getLogger("cyberdrop_dl_startup") error_count = e.error_count() msg = "" if file: @@ -82,15 +78,16 @@ def handle_validation_error(e: ValidationError, *, title: str = "", file: Path | show_title = title or e.title msg += f"Found {error_count} error{'s' if error_count > 1 else ''} [{show_title}]:" from_cli = title == "CLI arguments" - footer = CLI_VALIDATION_ERROR_FOOTER if from_cli else VALIDATION_ERROR_FOOTER + for error in e.errors(include_url=False): option_name = get_field_name(error, from_cli) msg += f"\n\nOption '{option_name}' with value '{error['input']}' is invalid:\n" msg += f" {error['msg']}" - msg += "\n\n" + footer - startup_logger.error(msg) - sys.exit(1) + if not from_cli: + msg += "\n\n" + """Please delete the file or fix the errors""" + + return msg def get_field_name(error: ErrorDetails, from_cli: bool = False) -> str: From b56f4233c3e1a9d8dad97fed57c4d6e348394440 Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Sun, 22 Feb 2026 13:13:26 -0500 Subject: [PATCH 03/23] refactor: rework retry --- cyberdrop_dl/cli/__init__.py | 16 +++-- cyberdrop_dl/cli/arguments.py | 122 ---------------------------------- cyberdrop_dl/cli/model.py | 30 ++------- 3 files changed, 17 insertions(+), 151 deletions(-) delete mode 100644 cyberdrop_dl/cli/arguments.py diff --git a/cyberdrop_dl/cli/__init__.py b/cyberdrop_dl/cli/__init__.py index 3de623021..6455b6d3b 100644 --- a/cyberdrop_dl/cli/__init__.py +++ b/cyberdrop_dl/cli/__init__.py @@ -1,13 +1,14 @@ import shutil -from typing import Annotated +from typing import Annotated, Literal import cyclopts import pydantic from cyclopts import Parameter from cyberdrop_dl import __version__, env, signature -from cyberdrop_dl.cli.model import CLIargs, ParsedArgs +from cyberdrop_dl.cli.model import CLIargs, ParsedArgs, RetryArgs from cyberdrop_dl.models.types import HttpURL +from cyberdrop_dl.utils.yaml import format_validation_error def is_terminal_in_portrait() -> bool: @@ -35,8 +36,6 @@ def is_terminal_in_portrait() -> bool: class App(cyclopts.App): @signature.copy(cyclopts.App._parse_known_args) def _parse_known_args(self, *args, **kwargs): - from cyberdrop_dl.utils.yaml import format_validation_error - try: return super()._parse_known_args(*args, **kwargs) except cyclopts.ValidationError as e: @@ -46,6 +45,7 @@ def _parse_known_args(self, *args, **kwargs): app = App( + name="cyberdrop-dl", help="Bulk asynchronous downloader for multiple file hosts", version=f"{__version__}.NTFS", default_parameter=Parameter(negative_iterable=[]), @@ -67,16 +67,24 @@ def download( cli_args: CLIargs = CLIargs(), # noqa: B008 # pyright: ignore[reportCallInDefaultInitializer] parsed_settings: ParsedArgs = ParsedArgs(), # pyright: ignore[reportCallInDefaultInitializer] # noqa: B008 ): + """Scrape and download files from a list of URLs (from a file or stdin)""" return links, cli_args, parsed_settings @app.command() def show_supported_sites() -> None: + """Show a list of all supported sites""" from cyberdrop_dl.utils.markdown import get_crawlers_info_as_rich_table table = get_crawlers_info_as_rich_table() app.console.print(table) +@app.command() +def retry(choice: Literal["all", "failed", "maintenance"], /, *, retry: RetryArgs | None = None): + """Retry failed downloads""" + return choice, retry or RetryArgs() + + if __name__ == "__main__": app() diff --git a/cyberdrop_dl/cli/arguments.py b/cyberdrop_dl/cli/arguments.py deleted file mode 100644 index 85536a4ee..000000000 --- a/cyberdrop_dl/cli/arguments.py +++ /dev/null @@ -1,122 +0,0 @@ -import dataclasses -from argparse import BooleanOptionalAction -from collections.abc import Generator, Iterable -from typing import Any, Literal, TypedDict - -from pydantic import BaseModel - -_NOT_SET: Any = object() - - -class _ArgumentParams(TypedDict, total=False): - action: str - nargs: int | str | None - const: Any - default: Any - choices: Iterable[Any] | None - required: bool - help: str | None - metavar: str | tuple[str, ...] | None - dest: str | None - - -@dataclasses.dataclass(slots=True, frozen=True, kw_only=True) -class ArgumentParams: - positional_only: bool = dataclasses.field(default=False, metadata={"exclude": True}) - nargs: Literal["?", "*", "+"] | None = _NOT_SET - const: Any = _NOT_SET - dest: str = _NOT_SET - choices: Iterable[Any] | None = _NOT_SET - metavar: str | tuple[str, ...] | None = _NOT_SET - - def as_dict(self) -> _ArgumentParams: - return {name: v for name in _params if (v := getattr(self, name)) is not _NOT_SET} # pyright: ignore[reportReturnType] - - -_params = tuple(f.name for f in dataclasses.fields(ArgumentParams) if not f.metadata.get("exclude")) - - -@dataclasses.dataclass(slots=True, kw_only=True) -class Argument: - name_or_flags: list[str] = dataclasses.field(init=False) - python_name: str - cli_name: str = dataclasses.field(init=False) - aliases: tuple[str, ...] - required: bool - default: Any - annotation: Any - help: str | None - metadata: list[Any] - positional_only: bool = dataclasses.field(init=False) - arg_type: type = dataclasses.field(init=False) - - def __post_init__(self) -> None: - self.cli_name = self.python_name.replace("_", "-") - self.arg_type = type(self.default) - - if self.arg_type not in (list, set, bool): - self.arg_type = str - - self.positional_only = override.positional_only if (override := self._overrides()) else False - cli_command = f"{'' if self.positional_only else '--'}{self.cli_name}" - self.name_or_flags = [cli_command] - - for alias in self.aliases: - if alias and len(alias) == 1: - self.name_or_flags.insert(0, f"-{alias}") - else: - self.name_or_flags.append(alias) - - def compose_options(self) -> _ArgumentParams: - options = self._options() - if override := self._overrides(): - return options | override.as_dict() - - return options - - def _overrides(self) -> ArgumentParams | None: - for meta in self.metadata: - if isinstance(meta, ArgumentParams): - return meta - - def _options(self) -> _ArgumentParams: - options = dict( # noqa: C408 - default=self.default, - help=self.help, - action="store", - ) - if not self.positional_only: - options["dest"] = self.python_name - - if self.arg_type is bool: - options["action"] = BooleanOptionalAction - - elif self.arg_type in (list, set): - options.update(nargs="*", action="extend") - - else: - options["type"] = self.arg_type - - return options # pyright: ignore[reportReturnType] - - -def parse(model: type[BaseModel]) -> Generator[Argument]: - for python_name, field in model.model_fields.items(): - aliases = filter( - None, - ( - field.alias, - field.validation_alias, - field.serialization_alias, - ), - ) - - yield Argument( - python_name=python_name, - aliases=tuple(map(str, aliases)), - annotation=field.annotation, - default=field.default, - required=field.is_required(), - metadata=field.metadata, - help=field.description or None, - ) diff --git a/cyberdrop_dl/cli/model.py b/cyberdrop_dl/cli/model.py index 038b1535f..426666f56 100644 --- a/cyberdrop_dl/cli/model.py +++ b/cyberdrop_dl/cli/model.py @@ -1,12 +1,11 @@ import datetime from enum import StrEnum, auto from pathlib import Path -from typing import Annotated, Literal +from typing import Literal from cyclopts import Parameter from pydantic import BaseModel, Field -from cyberdrop_dl.cli.arguments import ArgumentParams from cyberdrop_dl.config import ConfigSettings, GlobalSettings @@ -29,7 +28,7 @@ class CLIargs(BaseModel): description="path to the CDL settings.yaml file to load", ) - impersonate: Annotated[ + impersonate: ( Literal[ "chrome", "edge", @@ -38,10 +37,8 @@ class CLIargs(BaseModel): "chrome_android", "firefox", ] - | None, - ArgumentParams(nargs="?", const=True), - Parameter(), - ] = Field( + | None + ) = Field( default=None, description="Use this target as impersonation for all scrape requests", ) @@ -56,7 +53,7 @@ class CLIargs(BaseModel): ) -@Parameter(name="*") +@Parameter(name="*", negative_bool="") class RetryArgs(BaseModel): completed_after: datetime.date | None = Field( default=None, @@ -72,23 +69,6 @@ class RetryArgs(BaseModel): description="max number of links to retry", ) - retry_all: bool = Field( - default=False, - description="retry all downloads", - ) - retry_failed: bool = Field( - default=False, - description="retry failed downloads", - ) - retry_maintenance: bool = Field( - default=False, - description="retry download of maintenance files (bunkr). Requires files to be hashed", - ) - - @property - def retry_any(self) -> bool: - return any((self.retry_all, self.retry_failed, self.retry_maintenance)) - @Parameter(name="*") class ParsedArgs(BaseModel): From 42b828b1d295f5853efce598781bc5e70c2727f3 Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Sun, 22 Feb 2026 16:05:35 -0500 Subject: [PATCH 04/23] refactor: create config object --- cyberdrop_dl/cli/model.py | 15 +- cyberdrop_dl/clients/hash_client.py | 2 +- cyberdrop_dl/config/__init__.py | 208 ++++++------------ cyberdrop_dl/config/_common.py | 66 ------ cyberdrop_dl/config/auth.py | 47 ++++ cyberdrop_dl/config/auth_model.py | 50 ----- cyberdrop_dl/config/global_model.py | 113 ---------- .../config/{config_model.py => settings.py} | 168 ++++++++++++-- cyberdrop_dl/crawlers/wordpress/models.py | 2 +- cyberdrop_dl/managers/manager.py | 6 +- cyberdrop_dl/models/__init__.py | 13 +- .../models/{base_models.py => base.py} | 16 +- cyberdrop_dl/utils/webhook.py | 2 +- 13 files changed, 293 insertions(+), 415 deletions(-) delete mode 100755 cyberdrop_dl/config/_common.py create mode 100755 cyberdrop_dl/config/auth.py delete mode 100755 cyberdrop_dl/config/auth_model.py delete mode 100755 cyberdrop_dl/config/global_model.py rename cyberdrop_dl/config/{config_model.py => settings.py} (66%) rename cyberdrop_dl/models/{base_models.py => base.py} (80%) diff --git a/cyberdrop_dl/cli/model.py b/cyberdrop_dl/cli/model.py index 426666f56..22d70cfb5 100644 --- a/cyberdrop_dl/cli/model.py +++ b/cyberdrop_dl/cli/model.py @@ -4,9 +4,10 @@ from typing import Literal from cyclopts import Parameter -from pydantic import BaseModel, Field +from pydantic import Field -from cyberdrop_dl.config import ConfigSettings, GlobalSettings +from cyberdrop_dl.config import Config +from cyberdrop_dl.models import Settings class UIOptions(StrEnum): @@ -17,7 +18,7 @@ class UIOptions(StrEnum): @Parameter(name="*", negative_bool=[]) -class CLIargs(BaseModel): +class CLIargs(Settings): appdata_folder: Path | None = Field( default=None, description="AppData folder path", @@ -54,7 +55,7 @@ class CLIargs(BaseModel): @Parameter(name="*", negative_bool="") -class RetryArgs(BaseModel): +class RetryArgs(Settings): completed_after: datetime.date | None = Field( default=None, description="only retry downloads that were completed on or after this date", @@ -70,8 +71,6 @@ class RetryArgs(BaseModel): ) -@Parameter(name="*") -class ParsedArgs(BaseModel): +class ParsedArgs(Settings): cli_only_args: CLIargs = CLIargs() - config_settings: ConfigSettings = ConfigSettings() - global_settings: GlobalSettings = GlobalSettings() + config: Config = Config() diff --git a/cyberdrop_dl/clients/hash_client.py b/cyberdrop_dl/clients/hash_client.py index 57c22b507..b0f09a735 100644 --- a/cyberdrop_dl/clients/hash_client.py +++ b/cyberdrop_dl/clients/hash_client.py @@ -16,7 +16,7 @@ if TYPE_CHECKING: from yarl import URL - from cyberdrop_dl.config.config_model import Dedupe + from cyberdrop_dl.config.settings import Dedupe from cyberdrop_dl.data_structures.url_objects import MediaItem from cyberdrop_dl.managers.manager import Manager diff --git a/cyberdrop_dl/config/__init__.py b/cyberdrop_dl/config/__init__.py index c1d711864..964fa1462 100755 --- a/cyberdrop_dl/config/__init__.py +++ b/cyberdrop_dl/config/__init__.py @@ -1,177 +1,95 @@ from __future__ import annotations -import dataclasses -import shutil +from contextvars import ContextVar, Token from pathlib import Path -from time import sleep -from typing import TYPE_CHECKING +from typing import Self -from cyberdrop_dl import constants, env -from cyberdrop_dl.utils.apprise import get_apprise_urls +from pydantic import BaseModel -from .auth_model import AuthSettings -from .config_model import ConfigSettings -from .global_model import GlobalSettings +from cyberdrop_dl.config.auth import AuthSettings +from cyberdrop_dl.config.settings import ConfigSettings +from cyberdrop_dl.models import Settings, get_model_fields -if TYPE_CHECKING: - from cyberdrop_dl.cli import ParsedArgs - from cyberdrop_dl.utils.apprise import AppriseURL +_config: ContextVar[Config] = ContextVar("_config") +_appdata: ContextVar[AppData] = ContextVar("_appdata") -__all__ = [ - "AuthSettings", - "ConfigSettings", - "GlobalSettings", -] -deep_scrape: bool = False +class AppData: + def __init__(self, path: Path) -> None: + self.path: Path = path + self.cookies_dir: Path = path / "cookies" + self.cache_file: Path = path / "cache.yaml" + self.default_config: Path = path / "config.yaml" + self.db_file: Path = path / "cyberdrop.db" -current_config: Config -cli: ParsedArgs -appdata: AppData + def __fspath__(self) -> str: + return str(self) -# re-export current config values for easy access -auth: AuthSettings -settings: ConfigSettings -global_settings: GlobalSettings + def __str__(self) -> str: + return str(self.path) - -def startup() -> None: - global appdata, cli - from cyberdrop_dl.cli import parse_args - - cli = parse_args() - - if env.RUNNING_IN_IDE and Path.cwd().name == "cyberdrop_dl": - """This is for testing purposes only""" - constants.DEFAULT_APP_STORAGE = Path("../AppData") - constants.DEFAULT_DOWNLOAD_STORAGE = Path("../Downloads") - - appdata_path = cli.cli_only_args.appdata_folder or constants.DEFAULT_APP_STORAGE - appdata = AppData(appdata_path.resolve()) - appdata.mkdirs() - # cache.startup(appdata.cache_file) - load_config(get_default_config()) - settings.logs._delete_old_logs_and_folders(constants.STARTUP_TIME) - - -class AppData(Path): - def __init__(self, app_data_path: Path) -> None: - self.configs_dir = app_data_path / "Configs" - self.cache_dir = app_data_path / "Cache" - self.cookies_dir = app_data_path / "Cookies" - self.cache_file = self.cache_dir / "cache.yaml" - self.default_auth_config_file = self.configs_dir / "authentication.yaml" - self.global_config_file = self.configs_dir / "global_settings.yaml" - self.cache_db = self.cache_dir / "request_cache.db" - self.history_db = self.cache_dir / "cyberdrop.db" + def __repr__(self) -> str: + return f"{type(self).__name__}({vars(self)!r})" def mkdirs(self) -> None: - for dir in (self.configs_dir, self.cache_dir, self.cookies_dir): + for dir in (self.cookies_dir,): dir.mkdir(parents=True, exist_ok=True) -@dataclasses.dataclass(slots=True) -class Config: - """Helper class to group a single config, not necessarily the current config""" - - folder: Path - - apprise_file: Path - config_file: Path - - auth_config_file: Path - - auth: AuthSettings - settings: ConfigSettings - global_settings: GlobalSettings - apprise_urls: list[AppriseURL] - - def __init__(self, name: str) -> None: - self.apprise_urls = [] - self.folder = appdata.configs_dir / name - self.apprise_file = self.folder / "apprise.txt" - self.config_file = self.folder / "settings.yaml" - auth_override = self.folder / "authentication.yaml" - if auth_override.is_file(): - self.auth_config_file = auth_override - else: - self.auth_config_file = appdata.default_auth_config_file - - @staticmethod - def build(name: str, auth: AuthSettings, settings: ConfigSettings, global_settings: GlobalSettings) -> Config: - self = Config(name) - self.auth = auth - self.settings = settings - self.global_settings = global_settings - self.apprise_urls = get_apprise_urls(file=self.apprise_file) - return self - - @staticmethod - def new_empty_config(name: str) -> Config: - assert name not in get_all_configs() - self = Config(name) - self._load() - return self +class Config(Settings): + auth: AuthSettings = AuthSettings() + settings: ConfigSettings = ConfigSettings() - def _load(self) -> None: - """Read each config module from their respective files + _token: Token[Config] | None = None + _resolved: bool = False - If a files does not exists, uses the default config and creates it""" - self.auth = AuthSettings.load_file(self.auth_config_file, "socialmediagirls_username:") - self.settings = ConfigSettings.load_file(self.config_file, "download_error_urls_filename:") - self.global_settings = GlobalSettings.load_file(appdata.global_config_file, "Dupe_Cleanup_Options:") - self.apprise_urls = get_apprise_urls(file=self.apprise_file) - - def _resolve_all_paths(self) -> None: - self.auth.resolve_paths() - self.settings.resolve_paths() - self.global_settings.resolve_paths() - - def _all_settings(self) -> tuple[ConfigSettings, AuthSettings, GlobalSettings]: - return self.settings, self.auth, self.global_settings + def __enter__(self) -> Self: + self._token = _config.set(self) + return self - def write_updated_config(self) -> None: - """Writes config to disk.""" - self.auth.save_to_file(self.auth_config_file) - self.settings.save_to_file(self.config_file) - self.global_settings.save_to_file(appdata.global_config_file) + def __exit__(self, *_) -> None: + assert self._token is not None + _config.reset(self._token) + def save(self, file: Path) -> None: + from cyberdrop_dl.utils import yaml -def get_default_config() -> str: - ... - # return cache.get(cache.DEFAULT_CONFIG_KEY) or "Default" + yaml.save(file, self) + def resolve_paths(self) -> None: + if not self._resolved: + self._resolve_paths(self) + self._resolved = True -def get_all_configs() -> list: - return sorted(config.name for config in appdata.configs_dir.iterdir() if config.is_dir()) + @classmethod + def _resolve_paths(cls, model: BaseModel) -> None: + for name, value in vars(model).items(): + if isinstance(value, Path): + setattr(model, name, value.resolve()) + elif isinstance(value, BaseModel): + cls._resolve_paths(value) -def set_default_config(config_name: str) -> None: - ... - # cache.save(cache.DEFAULT_CONFIG_KEY, config_name) +def load(file: Path) -> Config: + from cyberdrop_dl.utils import yaml -def delete_config(config_name: str) -> None: - all_configs = get_all_configs() - assert config_name in all_configs - assert len(all_configs) > 1 - assert config_name != current_config.folder.name - all_configs.remove(config_name) + default = Config() + if not file.is_file(): + config = default + overwrite = True - # if cache.get(cache.DEFAULT_CONFIG_KEY) == config_name: - # set_default_config(all_configs[0]) + else: + all_fields = get_model_fields(default, exclude_unset=False) + config = Config.model_validate(yaml.load(file)) + set_fields = get_model_fields(config) + overwrite = all_fields != set_fields - config_path = appdata.configs_dir / config_name - shutil.rmtree(config_path) + if overwrite: + config.save(file) + return config -def load_config(config_name: str) -> None: - global current_config, auth, global_settings, settings - assert config_name - current_config = Config(config_name) - current_config._load() - current_config._resolve_all_paths() - settings, auth, global_settings = current_config._all_settings() - settings.logs._set_output_filenames(constants.STARTUP_TIME) - sleep(1) +def get() -> Config: + return _config.get() diff --git a/cyberdrop_dl/config/_common.py b/cyberdrop_dl/config/_common.py deleted file mode 100755 index 27a4d9a9d..000000000 --- a/cyberdrop_dl/config/_common.py +++ /dev/null @@ -1,66 +0,0 @@ -from pathlib import Path -from typing import Self, Unpack - -from cyclopts import Parameter -from pydantic import BaseModel, ConfigDict - -from cyberdrop_dl.exceptions import InvalidYamlError -from cyberdrop_dl.models import AliasModel, get_model_fields -from cyberdrop_dl.utils import yaml - - -@Parameter(name="*") -class FlatNamespace: ... - - -class _Settings(FlatNamespace, AliasModel): ... - - -class Settings(_Settings): - def __init_subclass__(cls, name: str | None = None, **kwargs: Unpack[ConfigDict]) -> None: - _ = Parameter(group=name or cls.__name__)(cls) - return super().__init_subclass__(**kwargs) - - -class ConfigFile(_Settings): - @classmethod - def load_file(cls, file: Path, update_if_has_string: str) -> Self: - default = cls() - if not file.is_file(): - config = default - needs_update = True - - else: - all_fields = get_model_fields(default, exclude_unset=False) - config = cls.model_validate(yaml.load(file)) - set_fields = get_model_fields(config) - needs_update = all_fields != set_fields or _is_in_file(update_if_has_string, file) - - if needs_update: - config.save_to_file(file) - - return config - - def save_to_file(self, file: Path) -> None: - yaml.save(file, self) - - def resolve_paths(self) -> None: - self._resolve_paths(self) - - @classmethod - def _resolve_paths(cls, model: BaseModel) -> None: - for name, value in vars(model).items(): - if isinstance(value, Path): - setattr(model, name, value.resolve()) - - elif isinstance(value, BaseModel): - cls._resolve_paths(value) - - -def _is_in_file(search_value: str, file: Path) -> bool: - try: - return search_value.casefold() in file.read_text().casefold() - except FileNotFoundError: - return False - except Exception as e: - raise InvalidYamlError(file, e) from e diff --git a/cyberdrop_dl/config/auth.py b/cyberdrop_dl/config/auth.py new file mode 100755 index 000000000..8d49ee95f --- /dev/null +++ b/cyberdrop_dl/config/auth.py @@ -0,0 +1,47 @@ +from cyberdrop_dl.models import AliasModel, Settings + + +class Coomer(AliasModel): + session: str = "" + + +class Imgur(AliasModel): + client_id: str = "" + + +class MegaNz(AliasModel): + email: str = "" + password: str = "" + + +class JDownloader(AliasModel): + username: str = "" + password: str = "" + device: str = "" + + +class Kemono(AliasModel): + session: str = "" + + +class GoFile(AliasModel): + api_key: str = "" + + +class Pixeldrain(AliasModel): + api_key: str = "" + + +class RealDebrid(AliasModel): + api_key: str = "" + + +class AuthSettings(Settings): + coomer: Coomer = Coomer() + gofile: GoFile = GoFile() + imgur: Imgur = Imgur() + jdownloader: JDownloader = JDownloader() + kemono: Kemono = Kemono() + meganz: MegaNz = MegaNz() + pixeldrain: Pixeldrain = Pixeldrain() + realdebrid: RealDebrid = RealDebrid() diff --git a/cyberdrop_dl/config/auth_model.py b/cyberdrop_dl/config/auth_model.py deleted file mode 100755 index f511b7f35..000000000 --- a/cyberdrop_dl/config/auth_model.py +++ /dev/null @@ -1,50 +0,0 @@ -from pydantic import BaseModel - -from cyberdrop_dl.config._common import ConfigFile -from cyberdrop_dl.models import AliasModel - - -class CoomerAuth(BaseModel): - session: str = "" - - -class ImgurAuth(AliasModel): - client_id: str = "" - - -class MegaNzAuth(AliasModel): - email: str = "" - password: str = "" - - -class JDownloaderAuth(AliasModel): - username: str = "" - password: str = "" - device: str = "" - - -class KemonoAuth(AliasModel): - session: str = "" - - -class GoFileAuth(AliasModel): - api_key: str = "" - - -class PixeldrainAuth(AliasModel): - api_key: str = "" - - -class RealDebridAuth(AliasModel): - api_key: str = "" - - -class AuthSettings(ConfigFile): - coomer: CoomerAuth = CoomerAuth() - gofile: GoFileAuth = GoFileAuth() - imgur: ImgurAuth = ImgurAuth() - jdownloader: JDownloaderAuth = JDownloaderAuth() - kemono: KemonoAuth = KemonoAuth() - meganz: MegaNzAuth = MegaNzAuth() - pixeldrain: PixeldrainAuth = PixeldrainAuth() - realdebrid: RealDebridAuth = RealDebridAuth() diff --git a/cyberdrop_dl/config/global_model.py b/cyberdrop_dl/config/global_model.py deleted file mode 100755 index 56aa76b43..000000000 --- a/cyberdrop_dl/config/global_model.py +++ /dev/null @@ -1,113 +0,0 @@ -# ruff: noqa: RUF012 -import random -from typing import Literal - -import aiohttp -from pydantic import ( - ByteSize, - NonNegativeFloat, - PositiveFloat, - PositiveInt, - field_serializer, - field_validator, -) -from yarl import URL - -from cyberdrop_dl.config._common import ConfigFile, Settings -from cyberdrop_dl.models.types import ByteSizeSerilized, HttpURL, ListNonEmptyStr, ListPydanticURL, NonEmptyStr -from cyberdrop_dl.models.validators import falsy_as, falsy_as_none, to_bytesize - -MIN_REQUIRED_FREE_SPACE = to_bytesize("512MB") -DEFAULT_REQUIRED_FREE_SPACE = to_bytesize("5GB") - - -class General(Settings): - ssl_context: Literal["truststore", "certifi", "truststore+certifi"] | None = "truststore+certifi" - disable_crawlers: ListNonEmptyStr = [] - flaresolverr: HttpURL | None = None - max_file_name_length: PositiveInt = 95 - max_folder_name_length: PositiveInt = 60 - proxy: HttpURL | None = None - required_free_space: ByteSizeSerilized = DEFAULT_REQUIRED_FREE_SPACE - user_agent: NonEmptyStr = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0" - - @field_validator("ssl_context", mode="before") - @classmethod - def ssl(cls, value: str | None) -> str | None: - if isinstance(value, str): - value = value.lower().strip() - return falsy_as(value, None) - - @field_validator("disable_crawlers", mode="after") - @classmethod - def unique_list(cls, value: list[str]) -> list[str]: - return sorted(set(value)) - - @field_serializer("flaresolverr", "proxy") - def serialize(self, value: URL | str) -> str | None: - return falsy_as(value, None, str) - - @field_validator("flaresolverr", "proxy", mode="before") - @classmethod - def convert_to_str(cls, value: str) -> str | None: - return falsy_as(value, None, str) - - @field_validator("required_free_space", mode="after") - @classmethod - def override_min(cls, value: ByteSize) -> ByteSize: - return max(value, MIN_REQUIRED_FREE_SPACE) - - -class RateLimiting(Settings): - download_attempts: PositiveInt = 2 - download_delay: NonNegativeFloat = 0.0 - download_speed_limit: ByteSizeSerilized = ByteSize(0) - jitter: NonNegativeFloat = 0 - max_simultaneous_downloads_per_domain: PositiveInt = 5 - max_simultaneous_downloads: PositiveInt = 15 - rate_limit: PositiveFloat = 25 - - connection_timeout: PositiveFloat = 15 - read_timeout: PositiveFloat | None = 300 - - @field_validator("read_timeout", mode="before") - @classmethod - def parse_timeouts(cls, value: object) -> object | None: - return falsy_as_none(value) - - def model_post_init(self, *_) -> None: - self._curl_timeout = self.connection_timeout - if self.read_timeout is not None: - self._curl_timeout = self.connection_timeout, self.read_timeout - self._aiohttp_timeout: aiohttp.ClientTimeout = aiohttp.ClientTimeout( - total=None, - sock_connect=self.connection_timeout, - sock_read=self.read_timeout, - ) - - @property - def total_delay(self) -> NonNegativeFloat: - """download_delay + jitter""" - return self.download_delay + self.get_jitter() - - def get_jitter(self) -> NonNegativeFloat: - """Get a random number in the range [0, self.jitter]""" - return random.uniform(0, self.jitter) - - -class UIOptions(Settings): - refresh_rate: PositiveInt = 10 - - -class GenericCrawlerInstances(Settings): - wordpress_media: ListPydanticURL = [] - wordpress_html: ListPydanticURL = [] - discourse: ListPydanticURL = [] - chevereto: ListPydanticURL = [] - - -class GlobalSettings(ConfigFile): - general: General = General() - rate_limiting_options: RateLimiting = RateLimiting() - ui_options: UIOptions = UIOptions() - generic_crawlers_instances: GenericCrawlerInstances = GenericCrawlerInstances() diff --git a/cyberdrop_dl/config/config_model.py b/cyberdrop_dl/config/settings.py similarity index 66% rename from cyberdrop_dl/config/config_model.py rename to cyberdrop_dl/config/settings.py index e5ccab729..bf459a9e7 100755 --- a/cyberdrop_dl/config/config_model.py +++ b/cyberdrop_dl/config/settings.py @@ -1,30 +1,44 @@ # ruff: noqa: RUF012 import itertools +import random import re from datetime import date, datetime, timedelta from logging import DEBUG from pathlib import Path - -from pydantic import ByteSize, Field, NonNegativeInt, field_validator +from typing import Literal + +import aiohttp +from pydantic import ( + ByteSize, + Field, + NonNegativeFloat, + NonNegativeInt, + PositiveFloat, + PositiveInt, + field_serializer, + field_validator, +) +from yarl import URL from cyberdrop_dl import constants from cyberdrop_dl.constants import BROWSERS, DEFAULT_APP_STORAGE, DEFAULT_DOWNLOAD_STORAGE, Hashing -from cyberdrop_dl.models import HttpAppriseURL +from cyberdrop_dl.models import HttpAppriseURL, Settings, SettingsGroup from cyberdrop_dl.models.types import ( ByteSizeSerilized, + HttpURL, ListNonEmptyStr, ListNonNegativeInt, + ListPydanticURL, LogPath, MainLogPath, NonEmptyStr, NonEmptyStrOrNone, PathOrNone, ) -from cyberdrop_dl.models.validators import falsy_as, to_timedelta -from cyberdrop_dl.utils.strings import validate_format_string -from cyberdrop_dl.utils.utilities import purge_dir_tree +from cyberdrop_dl.models.validators import falsy_as, falsy_as_none, to_bytesize, to_timedelta -from ._common import ConfigFile, Settings +MIN_REQUIRED_FREE_SPACE = to_bytesize("512MB") +DEFAULT_REQUIRED_FREE_SPACE = to_bytesize("5GB") _SORTING_COMMON_FIELDS = { "base_dir", @@ -38,7 +52,15 @@ } -class DownloadOptions(Settings): +class FormatValidator: + @classmethod + def _validate_format(cls, value: str, valid_keys: set[str]) -> None: + from cyberdrop_dl.utils.strings import validate_format_string + + validate_format_string(value, valid_keys) + + +class DownloadOptions(SettingsGroup): block_download_sub_folders: bool = False disable_download_attempt_limit: bool = False disable_file_timestamps: bool = False @@ -57,19 +79,21 @@ class DownloadOptions(Settings): @field_validator("separate_posts_format", mode="after") @classmethod def valid_format(cls, value: str) -> str: + from cyberdrop_dl.utils.strings import validate_format_string + valid_keys = {"default", "title", "id", "number", "date"} validate_format_string(value, valid_keys) return value -class Files(Settings): +class Files(SettingsGroup): download_folder: Path = Field(default=DEFAULT_DOWNLOAD_STORAGE, validation_alias="d") dump_json: bool = Field(default=False, validation_alias="j") input_file: Path = Field(default=DEFAULT_APP_STORAGE / "Configs/{config}/URLs.txt", validation_alias="i") save_pages_html: bool = False -class Logs(Settings): +class Logs(SettingsGroup): download_error_urls: LogPath = Path("Download_Error_URLs.csv") last_forum_post: LogPath = Path("Last_Scraped_Forum_Posts.csv") log_folder: Path = DEFAULT_APP_STORAGE / "Configs/{config}/Logs" @@ -107,16 +131,19 @@ def _set_output_filenames(self, now: datetime) -> None: log_file.parent.mkdir(exist_ok=True, parents=True) def _delete_old_logs_and_folders(self, now: datetime | None = None) -> None: + from cyberdrop_dl.utils.utilities import purge_dir_tree + if now and self.logs_expire_after: for file in itertools.chain(self.log_folder.rglob("*.log"), self.log_folder.rglob("*.csv")): file_date = file.stat().st_ctime t_delta = now - datetime.fromtimestamp(file_date) if t_delta > self.logs_expire_after: file.unlink(missing_ok=True) + purge_dir_tree(self.log_folder) -class FileSizeLimits(Settings): +class FileSizeLimits(SettingsGroup): maximum_image_size: ByteSizeSerilized = ByteSize(0) maximum_other_size: ByteSizeSerilized = ByteSize(0) maximum_video_size: ByteSizeSerilized = ByteSize(0) @@ -125,7 +152,7 @@ class FileSizeLimits(Settings): minimum_video_size: ByteSizeSerilized = ByteSize(0) -class MediaDurationLimits(Settings): +class MediaDurationLimits(SettingsGroup): maximum_video_duration: timedelta = timedelta(seconds=0) maximum_audio_duration: timedelta = timedelta(seconds=0) minimum_video_duration: timedelta = timedelta(seconds=0) @@ -145,7 +172,7 @@ def parse_runtime_duration(input_date: timedelta | str | int | None) -> timedelt return to_timedelta(input_date) -class IgnoreOptions(Settings): +class IgnoreOptions(SettingsGroup): exclude_audio: bool = False exclude_images: bool = False exclude_other: bool = False @@ -171,7 +198,7 @@ def is_valid_regex(cls, value: str | None) -> str | None: return value -class Runtime(Settings): +class Runtime(SettingsGroup): console_log_level: NonNegativeInt = 100 deep_scrape: bool = False delete_partial_files: bool = False @@ -187,7 +214,7 @@ class Runtime(Settings): update_last_forum_post: bool = True -class Sorting(Settings): +class Sorting(FormatValidator, SettingsGroup): scan_folder: PathOrNone = None sort_downloads: bool = False sort_folder: Path = DEFAULT_DOWNLOAD_STORAGE / "Cyberdrop-DL Sorted Downloads" @@ -202,7 +229,7 @@ class Sorting(Settings): def valid_sort_incrementer_format(cls, value: str | None) -> str | None: if value is not None: valid_keys = {"i"} - validate_format_string(value, valid_keys) + cls._validate_format(value, valid_keys) return value @field_validator("sorted_audio", mode="after") @@ -210,7 +237,7 @@ def valid_sort_incrementer_format(cls, value: str | None) -> str | None: def valid_sorted_audio(cls, value: str | None) -> str | None: if value is not None: valid_keys = _SORTING_COMMON_FIELDS | {"bitrate", "duration", "length", "sample_rate"} - validate_format_string(value, valid_keys) + cls._validate_format(value, valid_keys) return value @field_validator("sorted_image", mode="after") @@ -218,7 +245,7 @@ def valid_sorted_audio(cls, value: str | None) -> str | None: def valid_sorted_image(cls, value: str | None) -> str | None: if value is not None: valid_keys = _SORTING_COMMON_FIELDS | {"height", "resolution", "width"} - validate_format_string(value, valid_keys) + cls._validate_format(value, valid_keys) return value @field_validator("sorted_other", mode="after") @@ -226,7 +253,7 @@ def valid_sorted_image(cls, value: str | None) -> str | None: def valid_sorted_other(cls, value: str | None) -> str | None: if value is not None: valid_keys = _SORTING_COMMON_FIELDS | {"bitrate", "duration", "length", "sample_rate"} - validate_format_string(value, valid_keys) + cls._validate_format(value, valid_keys) return value @field_validator("sorted_video", mode="after") @@ -242,11 +269,11 @@ def valid_sorted_video(cls, value: str | None) -> str | None: "resolution", "width", } - validate_format_string(value, valid_keys) + cls._validate_format(value, valid_keys) return value -class Cookies(Settings): +class Cookies(SettingsGroup): auto_import: bool = False browser: BROWSERS | None = BROWSERS.firefox @@ -255,7 +282,7 @@ def model_post_init(self, *_) -> None: raise ValueError("You need to provide a browser for auto_import to work") -class Dedupe(Settings): +class Dedupe(SettingsGroup): add_md5_hash: bool = False add_sha256_hash: bool = False auto_dedupe: bool = True @@ -263,14 +290,107 @@ class Dedupe(Settings): send_deleted_to_trash: bool = True -class ConfigSettings(ConfigFile): +# ruff: noqa: RUF012 + + +class General(SettingsGroup): + ssl_context: Literal["truststore", "certifi", "truststore+certifi"] | None = "truststore+certifi" + disable_crawlers: ListNonEmptyStr = [] + flaresolverr: HttpURL | None = None + max_file_name_length: PositiveInt = 95 + max_folder_name_length: PositiveInt = 60 + proxy: HttpURL | None = None + required_free_space: ByteSizeSerilized = DEFAULT_REQUIRED_FREE_SPACE + user_agent: NonEmptyStr = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0" + + @field_validator("ssl_context", mode="before") + @classmethod + def ssl(cls, value: str | None) -> str | None: + if isinstance(value, str): + value = value.lower().strip() + return falsy_as(value, None) + + @field_validator("disable_crawlers", mode="after") + @classmethod + def unique_list(cls, value: list[str]) -> list[str]: + return sorted(set(value)) + + @field_serializer("flaresolverr", "proxy") + def serialize(self, value: URL | str) -> str | None: + return falsy_as(value, None, str) + + @field_validator("flaresolverr", "proxy", mode="before") + @classmethod + def convert_to_str(cls, value: str) -> str | None: + return falsy_as(value, None, str) + + @field_validator("required_free_space", mode="after") + @classmethod + def override_min(cls, value: ByteSize) -> ByteSize: + return max(value, MIN_REQUIRED_FREE_SPACE) + + +class RateLimiting(SettingsGroup): + download_attempts: PositiveInt = 2 + download_delay: NonNegativeFloat = 0.0 + download_speed_limit: ByteSizeSerilized = ByteSize(0) + jitter: NonNegativeFloat = 0 + max_simultaneous_downloads_per_domain: PositiveInt = 5 + max_simultaneous_downloads: PositiveInt = 15 + rate_limit: PositiveFloat = 25 + + connection_timeout: PositiveFloat = 15 + read_timeout: PositiveFloat | None = 300 + + @field_validator("read_timeout", mode="before") + @classmethod + def parse_timeouts(cls, value: object) -> object | None: + return falsy_as_none(value) + + def model_post_init(self, *_) -> None: + self._curl_timeout = self.connection_timeout + if self.read_timeout is not None: + self._curl_timeout = self.connection_timeout, self.read_timeout + + self._aiohttp_timeout: aiohttp.ClientTimeout = aiohttp.ClientTimeout( + total=None, + sock_connect=self.connection_timeout, + sock_read=self.read_timeout, + ) + + @property + def total_delay(self) -> NonNegativeFloat: + """download_delay + jitter""" + return self.download_delay + self.get_jitter() + + def get_jitter(self) -> NonNegativeFloat: + """Get a random number in the range [0, self.jitter]""" + return random.uniform(0, self.jitter) + + +class UIOptions(SettingsGroup): + refresh_rate: PositiveInt = 10 + + +class GenericCrawlerInstances(SettingsGroup): + wordpress_media: ListPydanticURL = [] + wordpress_html: ListPydanticURL = [] + discourse: ListPydanticURL = [] + chevereto: ListPydanticURL = [] + + +class ConfigSettings(Settings): browser_cookies: Cookies = Cookies() download_options: DownloadOptions = DownloadOptions() dupe_cleanup_options: Dedupe = Dedupe() file_size_limits: FileSizeLimits = FileSizeLimits() - media_duration_limits: MediaDurationLimits = MediaDurationLimits() files: Files = Files() + general: General = General() + generic_crawlers_instances: GenericCrawlerInstances = GenericCrawlerInstances() ignore_options: IgnoreOptions = IgnoreOptions() logs: Logs = Logs() + media_duration_limits: MediaDurationLimits = MediaDurationLimits() + rate_limiting_options: RateLimiting = RateLimiting() runtime_options: Runtime = Runtime() sorting: Sorting = Sorting() + ui_options: UIOptions = UIOptions() diff --git a/cyberdrop_dl/crawlers/wordpress/models.py b/cyberdrop_dl/crawlers/wordpress/models.py index 11f614d8e..b3b90172a 100644 --- a/cyberdrop_dl/crawlers/wordpress/models.py +++ b/cyberdrop_dl/crawlers/wordpress/models.py @@ -7,7 +7,7 @@ from pydantic import AfterValidator, AliasPath, BaseModel, Field from cyberdrop_dl.compat import StrEnum -from cyberdrop_dl.models.base_models import SequenceModel +from cyberdrop_dl.models.base import SequenceModel _ModelT = TypeVar("_ModelT", bound=BaseModel) diff --git a/cyberdrop_dl/managers/manager.py b/cyberdrop_dl/managers/manager.py index ca87a508c..fb83f44be 100644 --- a/cyberdrop_dl/managers/manager.py +++ b/cyberdrop_dl/managers/manager.py @@ -138,7 +138,7 @@ async def async_db_hash_startup(self) -> None: def process_additive_args(self) -> None: cli_general_options = self.parsed_args.global_settings.general - cli_ignore_options = self.parsed_args.config_settings.ignore_options + cli_ignore_options = self.parsed_args.config.ignore_options config_ignore_options = self.config_manager.settings_data.ignore_options config_general_options = self.config_manager.global_settings_data.general @@ -150,9 +150,9 @@ def args_consolidation(self) -> None: """Consolidates runtime arguments with config values.""" self.process_additive_args() - conf = merge_models(self.config_manager.settings_data, self.parsed_args.config_settings) + conf = merge_models(self.config_manager.settings_data, self.parsed_args.config) global_conf = merge_models(self.config_manager.global_settings_data, self.parsed_args.global_settings) - deep_scrape = self.parsed_args.config_settings.runtime_options.deep_scrape or self.config_manager.deep_scrape + deep_scrape = self.parsed_args.config.runtime_options.deep_scrape or self.config_manager.deep_scrape self.config_manager.settings_data = conf self.config_manager.global_settings_data = global_conf diff --git a/cyberdrop_dl/models/__init__.py b/cyberdrop_dl/models/__init__.py index f34b64ca4..5394af909 100755 --- a/cyberdrop_dl/models/__init__.py +++ b/cyberdrop_dl/models/__init__.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from .base_models import AliasModel, AppriseURLModel, FrozenModel, HttpAppriseURL +from .base import AliasModel, AppriseURLModel, FlatNamespace, FrozenModel, HttpAppriseURL, Settings, SettingsGroup def get_model_fields(model: BaseModel, *, exclude_unset: bool = True) -> set[str]: @@ -12,4 +12,13 @@ def get_model_fields(model: BaseModel, *, exclude_unset: bool = True) -> set[str return fields -__all__ = ["AliasModel", "AppriseURLModel", "FrozenModel", "HttpAppriseURL", "get_model_fields"] +__all__ = [ + "AliasModel", + "AppriseURLModel", + "FlatNamespace", + "FrozenModel", + "HttpAppriseURL", + "Settings", + "SettingsGroup", + "get_model_fields", +] diff --git a/cyberdrop_dl/models/base_models.py b/cyberdrop_dl/models/base.py similarity index 80% rename from cyberdrop_dl/models/base_models.py rename to cyberdrop_dl/models/base.py index 14f493ce5..f2381570b 100755 --- a/cyberdrop_dl/models/base_models.py +++ b/cyberdrop_dl/models/base.py @@ -1,9 +1,10 @@ """Pydantic models""" from collections.abc import Iterator, Mapping, Sequence -from typing import TypeVar +from typing import TypeVar, Unpack import yarl +from cyclopts import Parameter from pydantic import ( AnyUrl, BaseModel, @@ -29,6 +30,19 @@ class FrozenModel(BaseModel): model_config = ConfigDict(frozen=True, defer_build=True) +@Parameter(name="*") +class FlatNamespace: ... + + +class Settings(FlatNamespace, AliasModel): ... + + +class SettingsGroup(Settings): + def __init_subclass__(cls, name: str | None = None, **kwargs: Unpack[ConfigDict]) -> None: + _ = Parameter(group=name or cls.__name__)(cls) + return super().__init_subclass__(**kwargs) + + class AppriseURLModel(FrozenModel): url: Secret[AnyUrl] tags: set[str] = set() diff --git a/cyberdrop_dl/utils/webhook.py b/cyberdrop_dl/utils/webhook.py index a80197348..5c75d6f65 100644 --- a/cyberdrop_dl/utils/webhook.py +++ b/cyberdrop_dl/utils/webhook.py @@ -16,7 +16,7 @@ from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL from cyberdrop_dl.managers.manager import Manager - from cyberdrop_dl.models.base_models import HttpAppriseURL + from cyberdrop_dl.models.base import HttpAppriseURL _DEFAULT_DIFF_LINE_FORMAT: str = "{}" From 6b6cd6e09fe247a02ea86a2208ed58a620afdcca Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Sun, 22 Feb 2026 18:39:17 -0500 Subject: [PATCH 05/23] refactor: update usage --- cyberdrop_dl/appdata.py | 39 +++++ cyberdrop_dl/clients/download_client.py | 15 +- cyberdrop_dl/clients/flaresolverr.py | 14 +- cyberdrop_dl/clients/hash_client.py | 4 +- cyberdrop_dl/clients/scraper_client.py | 2 +- cyberdrop_dl/config/__init__.py | 27 ++- cyberdrop_dl/crawlers/__init__.py | 2 +- cyberdrop_dl/crawlers/_forum.py | 3 +- cyberdrop_dl/crawlers/crawler.py | 22 ++- cyberdrop_dl/crawlers/filester.py | 2 +- cyberdrop_dl/crawlers/kemono.py | 5 +- cyberdrop_dl/managers/client_manager.py | 69 ++++---- cyberdrop_dl/managers/manager.py | 209 +++++------------------ cyberdrop_dl/managers/path_manager.py | 8 +- cyberdrop_dl/managers/storage_manager.py | 3 +- cyberdrop_dl/models/__init__.py | 30 ++++ 16 files changed, 213 insertions(+), 241 deletions(-) create mode 100644 cyberdrop_dl/appdata.py diff --git a/cyberdrop_dl/appdata.py b/cyberdrop_dl/appdata.py new file mode 100644 index 000000000..462f8c1e0 --- /dev/null +++ b/cyberdrop_dl/appdata.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +import dataclasses +from contextvars import ContextVar +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pathlib import Path + +_appdata: ContextVar[AppData] = ContextVar("_appdata") + + +@dataclasses.dataclass(slots=True) +class AppData: + path: Path + cookies_dir: Path = dataclasses.field(init=False) + cache_file: Path = dataclasses.field(init=False) + default_config: Path = dataclasses.field(init=False) + db_file: Path = dataclasses.field(init=False) + + def __post_init__(self) -> None: + self.cookies_dir = self.path / "cookies" + self.cache_file = self.path / "cache.yaml" + self.default_config = self.path / "config.yaml" + self.db_file = self.path / "cyberdrop.db" + + def __fspath__(self) -> str: + return str(self) + + def __str__(self) -> str: + return str(self.path) + + def mkdirs(self) -> None: + for dir in (self.cookies_dir,): + dir.mkdir(parents=True, exist_ok=True) + + +def get() -> AppData: + return _appdata.get() diff --git a/cyberdrop_dl/clients/download_client.py b/cyberdrop_dl/clients/download_client.py index 738cea3ec..a6b69b20c 100644 --- a/cyberdrop_dl/clients/download_client.py +++ b/cyberdrop_dl/clients/download_client.py @@ -10,14 +10,14 @@ import aiofiles -from cyberdrop_dl import constants +from cyberdrop_dl import config, constants from cyberdrop_dl.clients.response import AbstractResponse from cyberdrop_dl.constants import FILE_FORMATS from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL from cyberdrop_dl.exceptions import DDOSGuardError, DownloadError, InvalidContentTypeError, SlowDownloadError from cyberdrop_dl.utils import aio, dates from cyberdrop_dl.utils.aio import WeakAsyncLocks -from cyberdrop_dl.utils.logger import log, log_debug +from cyberdrop_dl.utils.logger import log from cyberdrop_dl.utils.utilities import get_size_or_none if TYPE_CHECKING: @@ -67,7 +67,7 @@ async def _track_errors(self, domain: str): def _get_download_headers(self, domain: str, referer: AbsoluteHttpURL) -> dict[str, str]: download_headers = { - "User-Agent": self.manager.global_config.general.user_agent, + "User-Agent": config.get().general.user_agent, "Referer": str(referer), } auth_data = self.manager.config_manager.authentication_data @@ -296,7 +296,7 @@ def check_download_speed() -> None: async def download_file(self, domain: str, media_item: MediaItem) -> bool: """Starts a file.""" - if self.manager.config.download_options.skip_download_mark_completed and not media_item.is_segment: + if config.get().download_options.skip_download_mark_completed and not media_item.is_segment: log(f"Download Removed {media_item.url} due to mark completed option", 10) self.manager.progress_manager.download_progress.add_skipped() # set completed path @@ -307,7 +307,7 @@ async def download_file(self, domain: str, media_item: MediaItem) -> bool: downloaded = await self._download(domain, media_item) if downloaded: - await asyncio.to_thread(media_item.partial_file.rename, media_item.complete_file) + _ = await asyncio.to_thread(media_item.partial_file.rename, media_item.complete_file) if not media_item.is_segment: proceed = await self.client_manager.check_file_duration(media_item) await self.manager.db_manager.history_table.add_duration(domain, media_item) @@ -357,8 +357,6 @@ async def handle_media_item_completion(self, media_item: MediaItem, downloaded: def get_download_dir(self, media_item: MediaItem) -> Path: """Returns the download directory for the media item.""" download_folder = media_item.download_folder - if self.manager.parsed_args.cli_only_args.retry_any: - return download_folder if self.manager.config_manager.settings_data.download_options.block_download_sub_folders: while download_folder.parent != self.manager.path_manager.download_folder: @@ -380,9 +378,6 @@ async def get_final_file_info(self, media_item: MediaItem, domain: str) -> tuple proceed = True skip = False - if not TYPE_CHECKING: - log = log_debug if media_item.is_segment else globals()["log"] - while True: if expected_size and not media_item.is_segment: file_size_check = self.check_filesize_limits(media_item) diff --git a/cyberdrop_dl/clients/flaresolverr.py b/cyberdrop_dl/clients/flaresolverr.py index 487134afe..b683646b8 100644 --- a/cyberdrop_dl/clients/flaresolverr.py +++ b/cyberdrop_dl/clients/flaresolverr.py @@ -10,7 +10,7 @@ import aiohttp from multidict import CIMultiDict, CIMultiDictProxy -from cyberdrop_dl import ddos_guard +from cyberdrop_dl import config, ddos_guard from cyberdrop_dl.compat import StrEnum from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL from cyberdrop_dl.exceptions import DDOSGuardError @@ -76,8 +76,8 @@ def __init__(self, manager: Manager) -> None: self._session_id: str = "" self._session_lock, self._request_lock = asyncio.Lock(), asyncio.Lock() self._next_request_id: Callable[[], int] = itertools.count(1).__next__ - if manager.global_config.general.flaresolverr: - self.url = manager.global_config.general.flaresolverr / "v1" + if flare := config.get().general.flaresolverr: + self.url = flare / "v1" else: self.url = None @@ -116,7 +116,7 @@ async def request(self, url: AbsoluteHttpURL, data: Any = None) -> FlareSolverrS return resp.solution async def _check_user_agent(self, solution: FlareSolverrSolution) -> None: - cdl_user_agent = self.manager.global_config.general.user_agent + cdl_user_agent = config.get().general.user_agent mismatch_ua_msg = ( "Config user_agent and flaresolverr user_agent do not match:" f"\n Cyberdrop-DL: '{cdl_user_agent}'" @@ -137,12 +137,12 @@ async def _request(self, command: _Command, /, data: Any = None, **kwargs: Any) if not self.url: raise DDOSGuardError("Found DDoS challenge, but FlareSolverr is not configured") - timeout = self.manager.global_config.rate_limiting_options._aiohttp_timeout + timeout = config.get().rate_limiting_options._aiohttp_timeout if command is _Command.CREATE_SESSION: timeout = aiohttp.ClientTimeout(total=5 * 60, connect=60) # 5 minutes to create session # timeout in milliseconds (60s) - playload = {"cmd": command, "maxTimeout": 60_000} | kwargs + playload: dict[str, Any] = {"cmd": command, "maxTimeout": 60_000} | kwargs if data: assert command is _Command.POST_REQUEST @@ -164,7 +164,7 @@ async def _request(self, command: _Command, /, data: Any = None, **kwargs: Any) async def _create_session(self) -> None: session_id = "cyberdrop-dl" kwargs = {} - if proxy := self.manager.global_config.general.proxy: + if proxy := config.get().general.proxy: kwargs["proxy"] = {"url": str(proxy)} resp = await self._request(_Command.CREATE_SESSION, session=session_id, **kwargs) diff --git a/cyberdrop_dl/clients/hash_client.py b/cyberdrop_dl/clients/hash_client.py index b0f09a735..9b3d77ff8 100644 --- a/cyberdrop_dl/clients/hash_client.py +++ b/cyberdrop_dl/clients/hash_client.py @@ -7,7 +7,7 @@ from send2trash import send2trash -from cyberdrop_dl import constants +from cyberdrop_dl import config, constants from cyberdrop_dl.constants import Hashing from cyberdrop_dl.ui.prompts.basic_prompts import enter_to_continue from cyberdrop_dl.utils.logger import log @@ -55,7 +55,7 @@ def _deleted_file_suffix(self) -> Literal["Sent to trash", "Permanently deleted" @property def dupe_cleanup_options(self) -> Dedupe: - return self.manager.config.dupe_cleanup_options + return config.get().dupe_cleanup_options async def hash_directory(self, path: Path) -> None: path = Path(path) diff --git a/cyberdrop_dl/clients/scraper_client.py b/cyberdrop_dl/clients/scraper_client.py index 785a5531b..90d07692b 100644 --- a/cyberdrop_dl/clients/scraper_client.py +++ b/cyberdrop_dl/clients/scraper_client.py @@ -71,7 +71,7 @@ async def _request( request_params["json"] = json if not impersonate: - headers.setdefault("user-agent", self.client_manager.manager.global_config.general.user_agent) + headers.setdefault("user-agent", self.client_manager.config.get().general.user_agent) async with self.__request_context(url, method, request_params, impersonate, cache_disabled) as resp: exc = None diff --git a/cyberdrop_dl/config/__init__.py b/cyberdrop_dl/config/__init__.py index 964fa1462..5f524ab11 100755 --- a/cyberdrop_dl/config/__init__.py +++ b/cyberdrop_dl/config/__init__.py @@ -8,7 +8,7 @@ from cyberdrop_dl.config.auth import AuthSettings from cyberdrop_dl.config.settings import ConfigSettings -from cyberdrop_dl.models import Settings, get_model_fields +from cyberdrop_dl.models import get_model_fields, merge_models _config: ContextVar[Config] = ContextVar("_config") _appdata: ContextVar[AppData] = ContextVar("_appdata") @@ -36,13 +36,17 @@ def mkdirs(self) -> None: dir.mkdir(parents=True, exist_ok=True) -class Config(Settings): +class Config(ConfigSettings): auth: AuthSettings = AuthSettings() - settings: ConfigSettings = ConfigSettings() + _source: Path | None = None _token: Token[Config] | None = None _resolved: bool = False + @property + def source(self) -> Path | None: + return self._source + def __enter__(self) -> Self: self._token = _config.set(self) return self @@ -70,6 +74,9 @@ def _resolve_paths(cls, model: BaseModel) -> None: elif isinstance(value, BaseModel): cls._resolve_paths(value) + def update(self, other: Self) -> Self: + return merge_models(self, other) + def load(file: Path) -> Config: from cyberdrop_dl.utils import yaml @@ -88,8 +95,22 @@ def load(file: Path) -> Config: if overwrite: config.save(file) + config._source = file # pyright: ignore[reportPrivateUsage] return config def get() -> Config: return _config.get() + + +def add_or_remove_lists(cli_values: list[str], config_values: list[str]) -> None: + exclude = {"+", "-"} + if cli_values: + if cli_values[0] == "+": + new_values_set = set(config_values + cli_values) + cli_values.clear() + cli_values.extend(sorted(new_values_set - exclude)) + elif cli_values[0] == "-": + new_values_set = set(config_values) - set(cli_values) + cli_values.clear() + cli_values.extend(sorted(new_values_set - exclude)) diff --git a/cyberdrop_dl/crawlers/__init__.py b/cyberdrop_dl/crawlers/__init__.py index 3839a0139..ab0925993 100644 --- a/cyberdrop_dl/crawlers/__init__.py +++ b/cyberdrop_dl/crawlers/__init__.py @@ -1,7 +1,7 @@ # ruff: noqa: F401 from __future__ import annotations -from cyberdrop_dl import env +from cyberdrop_dl import config, env from ._chevereto import CheveretoCrawler from .anontransfer import AnonTransferCrawler diff --git a/cyberdrop_dl/crawlers/_forum.py b/cyberdrop_dl/crawlers/_forum.py index 9f28cc2a7..63ccbf3e2 100644 --- a/cyberdrop_dl/crawlers/_forum.py +++ b/cyberdrop_dl/crawlers/_forum.py @@ -19,6 +19,7 @@ from bs4 import BeautifulSoup, Tag +from cyberdrop_dl import config from cyberdrop_dl.constants import HTTP_REGEX_LINKS from cyberdrop_dl.crawlers.crawler import Crawler from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL @@ -218,7 +219,7 @@ def max_thread_depth(self) -> int: @final @property def max_thread_folder_depth(self): - return self.manager.config.download_options.maximum_thread_folder_depth + return config.get().download_options.maximum_thread_folder_depth async def fetch(self, scrape_item: ScrapeItem) -> None: if not self.logged_in and self.login_required is True: diff --git a/cyberdrop_dl/crawlers/crawler.py b/cyberdrop_dl/crawlers/crawler.py index 9a62316c0..9e6bd0b96 100644 --- a/cyberdrop_dl/crawlers/crawler.py +++ b/cyberdrop_dl/crawlers/crawler.py @@ -16,7 +16,7 @@ from aiolimiter import AsyncLimiter from yarl import URL -from cyberdrop_dl import constants, signature +from cyberdrop_dl import config, constants, signature from cyberdrop_dl.clients.scraper_client import ScraperClient from cyberdrop_dl.data_structures.mediaprops import ISO639Subtitle, Resolution from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, MediaItem, ScrapeItem @@ -453,7 +453,7 @@ async def _download(self, media_item: MediaItem, m3u8: m3u8.RenditionGroup | Non await self.__write_to_jsonl(media_item) async def __write_to_jsonl(self, media_item: MediaItem) -> None: - if not self.manager.config.files.dump_json: + if not config.get().files.dump_json: return data = [media_item.as_jsonable_dict()] @@ -493,17 +493,15 @@ async def handle_media_item(self, media_item: MediaItem, m3u8: m3u8.RenditionGro async def check_skip_by_config(self, media_item: MediaItem) -> bool: media_host = media_item.url.host - if (hosts := self.manager.config.ignore_options.skip_hosts) and any(host in media_host for host in hosts): + if (hosts := config.get().ignore_options.skip_hosts) and any(host in media_host for host in hosts): log(f"Download skip {media_item.url} due to skip_hosts config", 10) return True - if (hosts := self.manager.config.ignore_options.only_hosts) and not any(host in media_host for host in hosts): + if (hosts := config.get().ignore_options.only_hosts) and not any(host in media_host for host in hosts): log(f"Download skip {media_item.url} due to only_hosts config", 10) return True - if (regex := self.manager.config.ignore_options.filename_regex_filter) and re.search( - regex, media_item.filename - ): + if (regex := config.get().ignore_options.filename_regex_filter) and re.search(regex, media_item.filename): log(f"Download skip {media_item.url} due to filename regex filter config", 10) return True @@ -581,13 +579,13 @@ def create_title(self, title: str, album_id: str | None = None, thread_id: int | title = "Untitled" title = title.strip() - if album_id and self.manager.config.download_options.include_album_id_in_folder_name: + if album_id and config.get().download_options.include_album_id_in_folder_name: title = f"{title} {album_id}" - if thread_id and self.manager.config.download_options.include_thread_id_in_folder_name: + if thread_id and config.get().download_options.include_thread_id_in_folder_name: title = f"{title} {thread_id}" - if not self.manager.config.download_options.remove_domains_from_folder_names: + if not config.get().download_options.remove_domains_from_folder_names: title = f"{title} ({self.FOLDER_DOMAIN})" # Remove double spaces @@ -599,7 +597,7 @@ def create_title(self, title: str, album_id: str | None = None, thread_id: int | @property def separate_posts(self) -> bool: - return self.manager.config.download_options.separate_posts + return config.get().download_options.separate_posts def create_separate_post_title( self, @@ -610,7 +608,7 @@ def create_separate_post_title( ) -> str: if not self.separate_posts: return "" - title_format = self.manager.config.download_options.separate_posts_format + title_format = config.get().download_options.separate_posts_format if title_format.strip().casefold() == "{default}": title_format = self.DEFAULT_POST_TITLE_FORMAT if isinstance(date, int): diff --git a/cyberdrop_dl/crawlers/filester.py b/cyberdrop_dl/crawlers/filester.py index d9e3d37d6..110771e13 100644 --- a/cyberdrop_dl/crawlers/filester.py +++ b/cyberdrop_dl/crawlers/filester.py @@ -1,4 +1,4 @@ -from __future__ import annotations # +from __future__ import annotations from typing import TYPE_CHECKING, ClassVar diff --git a/cyberdrop_dl/crawlers/kemono.py b/cyberdrop_dl/crawlers/kemono.py index 10a11a5b3..f1e5db02a 100644 --- a/cyberdrop_dl/crawlers/kemono.py +++ b/cyberdrop_dl/crawlers/kemono.py @@ -12,6 +12,7 @@ from pydantic import BeforeValidator, Field +from cyberdrop_dl import config from cyberdrop_dl.crawlers.crawler import Crawler, SupportedPaths, auto_task_id from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL from cyberdrop_dl.exceptions import NoExtensionError, ScrapeError @@ -215,11 +216,11 @@ def session_cookie(self) -> str: @property def ignore_content(self) -> bool: - return self.manager.config.ignore_options.ignore_coomer_post_content + return config.get().ignore_options.ignore_coomer_post_content @property def ignore_ads(self) -> bool: - return self.manager.config.ignore_options.ignore_coomer_ads + return config.get().ignore_options.ignore_coomer_ads async def async_startup(self) -> None: if getattr(self, "API_ENTRYPOINT", None): diff --git a/cyberdrop_dl/managers/client_manager.py b/cyberdrop_dl/managers/client_manager.py index 7bf752250..0cac0fe93 100644 --- a/cyberdrop_dl/managers/client_manager.py +++ b/cyberdrop_dl/managers/client_manager.py @@ -14,13 +14,14 @@ from aiohttp import ClientResponse, ClientSession from aiolimiter import AsyncLimiter -from cyberdrop_dl import constants, ddos_guard, env +from cyberdrop_dl import config, constants, ddos_guard, env from cyberdrop_dl.clients.download_client import DownloadClient from cyberdrop_dl.clients.flaresolverr import FlareSolverr from cyberdrop_dl.clients.response import AbstractResponse from cyberdrop_dl.clients.scraper_client import ScraperClient from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, MediaItem from cyberdrop_dl.exceptions import DDOSGuardError, DownloadError, ScrapeError, TooManyCrawlerErrors +from cyberdrop_dl.managers.manager import Manager from cyberdrop_dl.ui.prompts.user_prompts import get_cookies_from_browsers from cyberdrop_dl.utils.aio import WeakAsyncLocks from cyberdrop_dl.utils.cookie_management import read_netscape_files @@ -32,6 +33,7 @@ ) if TYPE_CHECKING: + from asyncio.locks import Semaphore from collections.abc import Callable, Generator, Iterable, Mapping from http.cookies import BaseCookie @@ -111,31 +113,40 @@ class CloudflareTurnstile: ALL_SELECTORS = ", ".join(SELECTORS) +def _create_ssl(): + ssl_context = config.get().general.ssl_context + + if not ssl_context: + return False + + if ssl_context == "certifi": + return ssl.create_default_context(cafile=certifi.where()) + if ssl_context == "truststore": + return truststore.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + + ctx = truststore.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + ctx.load_verify_locations(cafile=certifi.where()) + return ctx + + class ClientManager: """Creates a 'client' that can be referenced by scraping or download sessions.""" def __init__(self, manager: Manager) -> None: - self.manager = manager - ssl_context = self.manager.global_config.general.ssl_context - if not ssl_context: - self.ssl_context = False - elif ssl_context == "certifi": - self.ssl_context = ssl.create_default_context(cafile=certifi.where()) - elif ssl_context == "truststore": - self.ssl_context = truststore.SSLContext(ssl.PROTOCOL_TLS_CLIENT) - elif ssl_context == "truststore+certifi": - self.ssl_context = ctx = truststore.SSLContext(ssl.PROTOCOL_TLS_CLIENT) - ctx.load_verify_locations(cafile=certifi.where()) - - self.cookies = aiohttp.CookieJar(quote_cookie=False) + self.manager: Manager = manager + self.ssl_context: ssl.SSLContext | Literal[False] = _create_ssl() + self.cookies: aiohttp.CookieJar = aiohttp.CookieJar(quote_cookie=False) self.rate_limits: dict[str, AsyncLimiter] = {} self.download_slots: dict[str, int] = {} - self.global_rate_limiter = AsyncLimiter(self.rate_limiting_options.rate_limit, 1) - self.global_download_slots = asyncio.Semaphore(self.rate_limiting_options.max_simultaneous_downloads) - self.scraper_client = ScraperClient(self) - self.speed_limiter = DownloadSpeedLimiter(self.rate_limiting_options.download_speed_limit) - self.download_client = DownloadClient(manager, self) - self.flaresolverr = FlareSolverr(manager) + + rate_limits = config.get().rate_limiting_options + + self.global_rate_limiter: AsyncLimiter = AsyncLimiter(rate_limits.rate_limit, 1) + self.global_download_slots: Semaphore = asyncio.Semaphore(rate_limits.max_simultaneous_downloads) + self.scraper_client: ScraperClient = ScraperClient(self) + self.speed_limiter: DownloadSpeedLimiter = DownloadSpeedLimiter(rate_limits.download_speed_limit) + self.download_client: DownloadClient = DownloadClient(manager, self) + self.flaresolverr: FlareSolverr = FlareSolverr(manager) self.file_locks: WeakAsyncLocks[str] = WeakAsyncLocks() self._session: aiohttp.ClientSession @@ -167,7 +178,7 @@ async def __aexit__(self, *args) -> None: @property def rate_limiting_options(self): - return self.manager.global_config.rate_limiting_options + return config.get().rate_limiting_options def get_download_slots(self, domain: str) -> int: """Returns the download limit for a domain.""" @@ -247,7 +258,7 @@ def new_curl_cffi_session(self) -> AsyncSession: warnings.filterwarnings("ignore", category=CurlCffiWarning) acurl = AsyncCurl(loop=loop) - proxy_or_none = str(proxy) if (proxy := self.manager.global_config.general.proxy) else None + proxy_or_none = str(proxy) if (proxy := config.get().general.proxy) else None return AsyncSession( loop=loop, @@ -262,23 +273,21 @@ def new_curl_cffi_session(self) -> AsyncSession: def new_scrape_session(self) -> ClientSession: trace_configs = _create_request_log_hooks("scrape") - return self._new_session(cached=True, trace_configs=trace_configs) + return self._new_session(trace_configs=trace_configs) def new_download_session(self) -> ClientSession: trace_configs = _create_request_log_hooks("download") - return self._new_session(cached=False, trace_configs=trace_configs) + return self._new_session(trace_configs=trace_configs) - def _new_session( - self, cached: bool = False, trace_configs: list[aiohttp.TraceConfig] | None = None - ) -> ClientSession: + def _new_session(self, trace_configs: list[aiohttp.TraceConfig] | None = None) -> ClientSession: timeout = self.rate_limiting_options._aiohttp_timeout return ClientSession( - headers={"user-agent": self.manager.global_config.general.user_agent}, + headers={"user-agent": config.get().general.user_agent}, raise_for_status=False, cookie_jar=self.cookies, timeout=timeout, trace_configs=trace_configs, - proxy=self.manager.global_config.general.proxy, + proxy=config.get().general.proxy, connector=self._new_tcp_connector(), requote_redirect_url=False, ) @@ -397,7 +406,7 @@ async def check_file_duration(self, media_item: MediaItem) -> bool: if not (is_video or is_audio): return True - duration_limits = self.manager.config.media_duration_limits + duration_limits = config.get().media_duration_limits min_video_duration: float = duration_limits.minimum_video_duration.total_seconds() max_video_duration: float = duration_limits.maximum_video_duration.total_seconds() min_audio_duration: float = duration_limits.minimum_audio_duration.total_seconds() diff --git a/cyberdrop_dl/managers/manager.py b/cyberdrop_dl/managers/manager.py index fb83f44be..3452a9ee8 100644 --- a/cyberdrop_dl/managers/manager.py +++ b/cyberdrop_dl/managers/manager.py @@ -2,16 +2,13 @@ import asyncio import json -from dataclasses import Field, field +import logging +from dataclasses import field from time import perf_counter -from typing import TYPE_CHECKING, Any, NamedTuple, TypeVar +from typing import TYPE_CHECKING, NamedTuple -from pydantic import BaseModel - -from cyberdrop_dl import __version__, constants -from cyberdrop_dl.cli import ParsedArgs, parse_args +from cyberdrop_dl import __version__, appdata, config, constants from cyberdrop_dl.database import Database -from cyberdrop_dl.database.transfer import transfer_v5_db_to_v6 from cyberdrop_dl.managers.cache_manager import CacheManager from cyberdrop_dl.managers.client_manager import ClientManager from cyberdrop_dl.managers.config_manager import ConfigManager @@ -22,12 +19,11 @@ from cyberdrop_dl.managers.progress_manager import ProgressManager from cyberdrop_dl.managers.storage_manager import StorageManager from cyberdrop_dl.utils import ffmpeg -from cyberdrop_dl.utils.logger import LogHandler, QueuedLogger, log +from cyberdrop_dl.utils.logger import LogHandler, QueuedLogger from cyberdrop_dl.utils.utilities import close_if_defined, get_system_information if TYPE_CHECKING: from asyncio import TaskGroup - from collections.abc import Sequence from cyberdrop_dl.scraper.scrape_mapper import ScrapeMapper @@ -37,18 +33,13 @@ class AsyncioEvents(NamedTuple): RUNNING: asyncio.Event -class Manager: - def __init__(self, args: Sequence[str] | None = None) -> None: - if isinstance(args, str): - args = [args] +logger = logging.getLogger(__name__) + - self.parsed_args: ParsedArgs = field(init=False) +class Manager: + def __init__(self) -> None: self.cache_manager: CacheManager = CacheManager(self) - self.path_manager: PathManager = field(init=False) - self.config_manager: ConfigManager = field(init=False) self.hash_manager: HashManager = field(init=False) - - self.log_manager: LogManager = field(init=False) self.db_manager: Database = field(init=False) self.client_manager: ClientManager = field(init=False) self.storage_manager: StorageManager = field(init=False) @@ -56,139 +47,49 @@ def __init__(self, args: Sequence[str] | None = None) -> None: self.progress_manager: ProgressManager = field(init=False) self.live_manager: LiveManager = field(init=False) - self._loaded_args_config: bool = False - self._made_portable: bool = False - self.task_group: TaskGroup = field(init=False) self.scrape_mapper: ScrapeMapper = field(init=False) self.start_time: float = perf_counter() - self.downloaded_data: int = 0 self.loggers: dict[str, QueuedLogger] = {} - self.args = args self.states: AsyncioEvents constants.console_handler = LogHandler(level=constants.CONSOLE_LEVEL) - @property - def config(self): - return self.config_manager.settings_data - - @property - def auth_config(self): - return self.config_manager.authentication_data - - @property - def global_config(self): - return self.config_manager.global_settings_data - - def startup(self) -> None: - """Startup process for the manager.""" - - if isinstance(self.parsed_args, Field): - self.parsed_args = parse_args(self.args) - - self.path_manager = PathManager(self) + self.path_manager: PathManager = PathManager(self) self.path_manager.pre_startup() self.cache_manager.startup(self.path_manager.cache_folder / "cache.yaml") - self.config_manager = ConfigManager(self) + self.config_manager: ConfigManager = ConfigManager(self) self.config_manager.startup() - self.args_consolidation() - self.path_manager.startup() - self.log_manager = LogManager(self) + self.log_manager: LogManager = LogManager(self) + log_app_state() """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" async def async_startup(self) -> None: """Async startup process for the manager.""" self.states = AsyncioEvents(asyncio.Event(), asyncio.Event()) - self.args_logging() - - if not isinstance(self.client_manager, ClientManager): - self.client_manager = ClientManager(self) - await self.client_manager.startup() - if not isinstance(self.storage_manager, StorageManager): - self.storage_manager = StorageManager(self) - - elif self.states.RUNNING.is_set(): - await self.storage_manager.reset() + self.client_manager = ClientManager(self) + await self.client_manager.startup() + self.storage_manager = StorageManager(self) await self.async_db_hash_startup() - constants.MAX_NAME_LENGTHS["FILE"] = self.config_manager.global_settings_data.general.max_file_name_length - constants.MAX_NAME_LENGTHS["FOLDER"] = self.config_manager.global_settings_data.general.max_folder_name_length + constants.MAX_NAME_LENGTHS["FILE"] = config.get().general.max_file_name_length + constants.MAX_NAME_LENGTHS["FOLDER"] = config.get().general.max_folder_name_length async def async_db_hash_startup(self) -> None: - if not isinstance(self.db_manager, Database): - self.db_manager = Database( - self.path_manager.history_db, - self.config.runtime_options.ignore_history, - ) - await self.db_manager.startup() - transfer_v5_db_to_v6(self.path_manager.history_db) - if not isinstance(self.hash_manager, HashManager): - self.hash_manager = HashManager(self) - if not isinstance(self.live_manager, LiveManager): - self.live_manager = LiveManager(self) - if not isinstance(self.progress_manager, ProgressManager): - self.progress_manager = ProgressManager(self) - self.progress_manager.startup() - - def process_additive_args(self) -> None: - cli_general_options = self.parsed_args.global_settings.general - cli_ignore_options = self.parsed_args.config.ignore_options - config_ignore_options = self.config_manager.settings_data.ignore_options - config_general_options = self.config_manager.global_settings_data.general - - add_or_remove_lists(cli_ignore_options.skip_hosts, config_ignore_options.skip_hosts) - add_or_remove_lists(cli_ignore_options.only_hosts, config_ignore_options.only_hosts) - add_or_remove_lists(cli_general_options.disable_crawlers, config_general_options.disable_crawlers) - - def args_consolidation(self) -> None: - """Consolidates runtime arguments with config values.""" - self.process_additive_args() - - conf = merge_models(self.config_manager.settings_data, self.parsed_args.config) - global_conf = merge_models(self.config_manager.global_settings_data, self.parsed_args.global_settings) - deep_scrape = self.parsed_args.config.runtime_options.deep_scrape or self.config_manager.deep_scrape - - self.config_manager.settings_data = conf - self.config_manager.global_settings_data = global_conf - self.config_manager.deep_scrape = deep_scrape - - def args_logging(self) -> None: - """Logs the runtime arguments.""" - auth_provided = {} - - for site, auth_entries in self.config_manager.authentication_data.model_dump().items(): - auth_provided[site] = all(auth_entries.values()) - - config_settings = self.config_manager.settings_data.model_copy() - config_settings.runtime_options.deep_scrape = self.config_manager.deep_scrape - config_settings = config_settings.model_dump_json(indent=4) - global_settings = self.config_manager.global_settings_data.model_dump_json(indent=4) - cli_only_args = self.parsed_args.cli_only_args.model_dump_json(indent=4) - system_info = get_system_information() - - args_info = ( - "Starting Cyberdrop-DL Process", - f"Running Version: {__version__}", - f"System Info: {system_info}", - f"Using Config: {self.config_manager.loaded_config}", - f"Using Config File: {self.config_manager.settings}", - f"Using Input File: {self.path_manager.input_file}", - f"Using Download Folder: {self.path_manager.download_folder}", - f"Using Database File: {self.path_manager.history_db}", - f"Using CLI only options: {cli_only_args}", - f"Using Authentication: \n{json.dumps(auth_provided, indent=4, sort_keys=True)}", - f"Using Settings: \n{config_settings}", - f"Using Global Settings: \n{global_settings}", - f"Using ffmpeg version: {ffmpeg.get_ffmpeg_version()}", - f"Using ffprobe version: {ffmpeg.get_ffprobe_version()}", + self.db_manager = Database( + self.path_manager.history_db, + config.get().runtime_options.ignore_history, ) - log("\n".join(args_info)) + await self.db_manager.startup() + self.hash_manager = HashManager(self) + self.live_manager = LiveManager(self) + self.progress_manager = ProgressManager(self) + self.progress_manager.startup() async def async_db_close(self) -> None: "Partial shutdown for managers used for hash directory scanner" @@ -211,41 +112,23 @@ async def close(self) -> None: queued_logger.stop() -def add_or_remove_lists(cli_values: list[str], config_values: list[str]) -> None: - exclude = {"+", "-"} - if cli_values: - if cli_values[0] == "+": - new_values_set = set(config_values + cli_values) - cli_values.clear() - cli_values.extend(sorted(new_values_set - exclude)) - elif cli_values[0] == "-": - new_values_set = set(config_values) - set(cli_values) - cli_values.clear() - cli_values.extend(sorted(new_values_set - exclude)) - - -def merge_dicts(dict1: dict[str, Any], dict2: dict[str, Any]) -> dict[str, Any]: - for key, val in dict1.items(): - if isinstance(val, dict): - if key in dict2 and isinstance(dict2[key], dict): - merge_dicts(dict1[key], dict2[key]) - else: - if key in dict2: - dict1[key] = dict2[key] - - for key, val in dict2.items(): - if key not in dict1: - dict1[key] = val - - return dict1 - - -M = TypeVar("M", bound=BaseModel) - - -def merge_models(default: M, new: M) -> M: - default_dict = default.model_dump() - new_dict = new.model_dump(exclude_unset=True) - - updated_dict = merge_dicts(default_dict, new_dict) - return default.model_validate(updated_dict) +def log_app_state() -> None: + auth = {} + + config_ = config.get() + app_data = appdata.get() + for site, auth_entries in config_.auth.model_dump().items(): # pyright: ignore[reportAny] + auth[site] = all(auth_entries.values()) # pyright: ignore[reportAny] + + # f"Using Input File: {self.path_manager.input_file}", + stats = dict( # noqa: C408 + version=__version__, + system=get_system_information(), + ffmpeg=ffmpeg.get_ffmpeg_version(), + ffprobe=ffmpeg.get_ffprobe_version(), + database=app_data.db_file, + config_file=config_.source, + auth=auth, + config=config_.model_dump_json(indent=2, exclude={"auth"}), + ) + logger.debug(json.dumps(stats, indent=2, ensure_ascii=False)) diff --git a/cyberdrop_dl/managers/path_manager.py b/cyberdrop_dl/managers/path_manager.py index 60e22c106..237d71d92 100644 --- a/cyberdrop_dl/managers/path_manager.py +++ b/cyberdrop_dl/managers/path_manager.py @@ -55,18 +55,13 @@ def __init__(self, manager: Manager) -> None: @property def cwd(self) -> Path: if env.RUNNING_IN_IDE and Path.cwd().name == "cyberdrop_dl": - # This is for testing purposes only""" return Path("..").resolve() return Path().resolve() @property def appdata(self) -> Path: if isinstance(self._appdata, Field): - if self.manager.parsed_args.cli_only_args.appdata_folder: - path = self.manager.parsed_args.cli_only_args.appdata_folder / "AppData" - self._appdata = self.cwd / path - else: - self._appdata = self.cwd / "AppData" + self._appdata = self.cwd / "AppData" return self._appdata @@ -76,7 +71,6 @@ def pre_startup(self) -> None: self.cookies_dir = self.appdata / "Cookies" self.cache_db = self.cache_folder / "request_cache.db" - self.cache_folder.mkdir(parents=True, exist_ok=True) self.config_folder.mkdir(parents=True, exist_ok=True) self.cookies_dir.mkdir(parents=True, exist_ok=True) self.cache_db.touch(exist_ok=True) diff --git a/cyberdrop_dl/managers/storage_manager.py b/cyberdrop_dl/managers/storage_manager.py index 670b17e13..e52482327 100644 --- a/cyberdrop_dl/managers/storage_manager.py +++ b/cyberdrop_dl/managers/storage_manager.py @@ -11,6 +11,7 @@ import psutil from pydantic import ByteSize +from cyberdrop_dl import config from cyberdrop_dl.exceptions import InsufficientFreeSpaceError from cyberdrop_dl.utils.logger import log, log_debug @@ -164,7 +165,7 @@ async def _has_sufficient_space(self, folder: Path) -> bool: free_space = self._free_space[mount] if free_space == -1: return True - return free_space > self.manager.global_config.general.required_free_space + return free_space > config.get().general.required_free_space async def _get_free_space(self, mount: Path) -> int: exc_info = None diff --git a/cyberdrop_dl/models/__init__.py b/cyberdrop_dl/models/__init__.py index 5394af909..a5d7bbe53 100755 --- a/cyberdrop_dl/models/__init__.py +++ b/cyberdrop_dl/models/__init__.py @@ -1,7 +1,13 @@ +from __future__ import annotations + +from typing import Any, TypeVar + from pydantic import BaseModel from .base import AliasModel, AppriseURLModel, FlatNamespace, FrozenModel, HttpAppriseURL, Settings, SettingsGroup +M = TypeVar("M", bound=BaseModel) + def get_model_fields(model: BaseModel, *, exclude_unset: bool = True) -> set[str]: fields = set() @@ -12,6 +18,30 @@ def get_model_fields(model: BaseModel, *, exclude_unset: bool = True) -> set[str return fields +def merge_dicts(dict1: dict[str, Any], dict2: dict[str, Any]) -> dict[str, Any]: + for key, val in dict1.items(): + if isinstance(val, dict): + if key in dict2 and isinstance(dict2[key], dict): + merge_dicts(dict1[key], dict2[key]) + else: + if key in dict2: + dict1[key] = dict2[key] + + for key, val in dict2.items(): + if key not in dict1: + dict1[key] = val + + return dict1 + + +def merge_models(old: M, new: M) -> M: + old.model_copy() + old_values = old.model_dump() + new_values = new.model_dump(exclude_unset=True) + updated_dict = merge_dicts(old_values, new_values) + return old.model_validate(updated_dict) + + __all__ = [ "AliasModel", "AppriseURLModel", From 0e04d441b9cdfeae56458e534b8d48f780b29a96 Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Sun, 22 Feb 2026 19:24:31 -0500 Subject: [PATCH 06/23] refactor: rework cache --- cyberdrop_dl/cache.py | 65 +++++++++ cyberdrop_dl/clients/download_client.py | 10 +- cyberdrop_dl/clients/flaresolverr.py | 2 +- cyberdrop_dl/clients/hash_client.py | 14 +- cyberdrop_dl/crawlers/__init__.py | 2 +- cyberdrop_dl/crawlers/_forum.py | 4 +- cyberdrop_dl/crawlers/crawler.py | 4 +- cyberdrop_dl/crawlers/onedrive.py | 9 +- cyberdrop_dl/data_structures/url_objects.py | 2 +- cyberdrop_dl/director.py | 2 +- cyberdrop_dl/downloader/downloader.py | 12 +- cyberdrop_dl/downloader/mega_nz.py | 2 +- cyberdrop_dl/managers/__init__.py | 130 +++++++++++++++++ cyberdrop_dl/managers/cache_manager.py | 59 -------- cyberdrop_dl/managers/client_manager.py | 18 ++- cyberdrop_dl/managers/config_manager.py | 19 +-- cyberdrop_dl/managers/hash_manager.py | 2 +- cyberdrop_dl/managers/live_manager.py | 6 +- cyberdrop_dl/managers/log_manager.py | 2 +- cyberdrop_dl/managers/manager.py | 134 ------------------ cyberdrop_dl/managers/mock_manager.py | 52 ------- cyberdrop_dl/managers/path_manager.py | 14 +- cyberdrop_dl/managers/progress_manager.py | 2 +- cyberdrop_dl/managers/storage_manager.py | 2 +- cyberdrop_dl/plugins.py | 2 +- cyberdrop_dl/scraper/jdownloader.py | 2 +- cyberdrop_dl/scraper/scrape_mapper.py | 13 +- cyberdrop_dl/ui/program_ui.py | 9 +- .../ui/progress/downloads_progress.py | 2 +- cyberdrop_dl/ui/progress/file_progress.py | 2 +- cyberdrop_dl/ui/progress/hash_progress.py | 2 +- cyberdrop_dl/ui/progress/scraping_progress.py | 2 +- cyberdrop_dl/ui/progress/sort_progress.py | 2 +- cyberdrop_dl/ui/prompts/user_prompts.py | 2 +- cyberdrop_dl/utils/apprise.py | 2 +- cyberdrop_dl/utils/cookie_management.py | 2 +- cyberdrop_dl/utils/logger.py | 2 +- cyberdrop_dl/utils/sorting.py | 2 +- cyberdrop_dl/utils/utilities.py | 2 +- cyberdrop_dl/utils/webhook.py | 2 +- tests/conftest.py | 2 +- tests/crawlers/test_crawlers.py | 2 +- tests/crawlers/test_xenforo.py | 2 +- tests/fake_classes/managers.py | 4 +- tests/test_apprise.py | 2 +- tests/test_flaresolverr.py | 2 +- tests/test_hashing.py | 2 +- tests/test_manager.py | 2 +- tests/test_scrape_mapper.py | 2 +- tests/test_storage.py | 2 +- 50 files changed, 296 insertions(+), 344 deletions(-) create mode 100644 cyberdrop_dl/cache.py delete mode 100644 cyberdrop_dl/managers/cache_manager.py delete mode 100644 cyberdrop_dl/managers/manager.py delete mode 100644 cyberdrop_dl/managers/mock_manager.py diff --git a/cyberdrop_dl/cache.py b/cyberdrop_dl/cache.py new file mode 100644 index 000000000..d3b1b50b6 --- /dev/null +++ b/cyberdrop_dl/cache.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import dataclasses +from collections.abc import Iterator, MutableMapping +from contextvars import ContextVar, Token +from typing import TYPE_CHECKING, Any, Self + +from cyberdrop_dl import __version__ +from cyberdrop_dl.utils import yaml + +if TYPE_CHECKING: + from pathlib import Path + +_cache: ContextVar[Cache] = ContextVar("_cache") + + +@dataclasses.dataclass(slots=True) +class Cache(MutableMapping[str, Any]): + file: Path + _cache: dict[str, Any] = dataclasses.field(init=False) + _token: Token[Cache] | None = None + + def __post_init__(self) -> None: + self._cache = yaml.load(self.file) + + def __getitem__(self, key: str) -> Any: + return self.get(key) + + def __iter__(self) -> Iterator[str]: + return iter(self._cache) + + def __len__(self) -> int: + return len(self._cache) + + def __delitem__(self, key: str) -> None: + try: + _ = self._cache.pop(key) + except KeyError: + pass + else: + self._save() + + def __setitem__(self, key: str, value: Any, /) -> None: + self._cache[key] = value + self._save() + + def __enter__(self) -> Self: + self._token = _cache.set(self) + return self + + def __exit__(self, *_) -> None: + assert self._token is not None + self._token = _cache.reset(self._token) + self.close() + + def _save(self) -> None: + if self._token is None: + yaml.save(self.file, self._cache) + + def close(self) -> None: + self["version"] = __version__ + + +def get(): + return _cache.get() diff --git a/cyberdrop_dl/clients/download_client.py b/cyberdrop_dl/clients/download_client.py index a6b69b20c..70ce36404 100644 --- a/cyberdrop_dl/clients/download_client.py +++ b/cyberdrop_dl/clients/download_client.py @@ -28,8 +28,8 @@ import aiohttp from cyberdrop_dl.data_structures.url_objects import MediaItem + from cyberdrop_dl.managers import Manager from cyberdrop_dl.managers.client_manager import ClientManager - from cyberdrop_dl.managers.manager import Manager _CONTENT_TYPES_OVERRIDES: dict[str, str] = {"text/vnd.trolltech.linguist": "video/MP2T"} @@ -48,7 +48,7 @@ class DownloadClient: def __init__(self, manager: Manager, client_manager: ClientManager) -> None: self.manager = manager self.client_manager = client_manager - self.download_speed_threshold = self.manager.config_manager.settings_data.runtime_options.slow_download_speed + self.download_speed_threshold = config.get().runtime_options.slow_download_speed self._server_locks = WeakAsyncLocks[str]() self.server_locked_domains: set[str] = set() self._supports_ranges: bool = True @@ -111,7 +111,7 @@ async def _download(self, domain: str, media_item: MediaItem) -> bool: resume_point = size download_headers["Range"] = f"bytes={size}-" - await asyncio.sleep(self.manager.config_manager.global_settings_data.rate_limiting_options.total_delay) + await asyncio.sleep(config.get().rate_limiting_options.total_delay) def process_response(resp: aiohttp.ClientResponse | AbstractResponse): return self._process_response(media_item, domain, resume_point, resp) @@ -358,7 +358,7 @@ def get_download_dir(self, media_item: MediaItem) -> Path: """Returns the download directory for the media item.""" download_folder = media_item.download_folder - if self.manager.config_manager.settings_data.download_options.block_download_sub_folders: + if config.get().download_options.block_download_sub_folders: while download_folder.parent != self.manager.path_manager.download_folder: download_folder = download_folder.parent media_item.download_folder = download_folder @@ -475,7 +475,7 @@ async def iterate_filename(self, complete_file: Path, media_item: MediaItem) -> def check_filesize_limits(self, media: MediaItem) -> bool: """Checks if the file size is within the limits.""" - file_size_limits = self.manager.config_manager.settings_data.file_size_limits + file_size_limits = config.get().file_size_limits max_video_filesize = file_size_limits.maximum_video_size or float("inf") min_video_filesize = file_size_limits.minimum_video_size max_image_filesize = file_size_limits.maximum_image_size or float("inf") diff --git a/cyberdrop_dl/clients/flaresolverr.py b/cyberdrop_dl/clients/flaresolverr.py index b683646b8..486245b15 100644 --- a/cyberdrop_dl/clients/flaresolverr.py +++ b/cyberdrop_dl/clients/flaresolverr.py @@ -19,7 +19,7 @@ if TYPE_CHECKING: from collections.abc import Callable - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager class _Command(StrEnum): diff --git a/cyberdrop_dl/clients/hash_client.py b/cyberdrop_dl/clients/hash_client.py index 9b3d77ff8..32c4abb24 100644 --- a/cyberdrop_dl/clients/hash_client.py +++ b/cyberdrop_dl/clients/hash_client.py @@ -18,7 +18,7 @@ from cyberdrop_dl.config.settings import Dedupe from cyberdrop_dl.data_structures.url_objects import MediaItem - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager def hash_directory_scanner(manager: Manager, path: Path) -> None: @@ -79,7 +79,7 @@ async def hash_item(self, media_item: MediaItem) -> None: async def hash_item_during_download(self, media_item: MediaItem) -> None: if media_item.is_segment: return - if self.manager.config_manager.settings_data.dupe_cleanup_options.hashing != Hashing.IN_PLACE: + if config.get().dupe_cleanup_options.hashing != Hashing.IN_PLACE: return await self.manager.states.RUNNING.wait() try: @@ -103,9 +103,9 @@ async def update_db_and_retrive_hash( return hash = await self._update_db_and_retrive_hash_helper(file, original_filename, referer, hash_type=self.xxhash) - if self.manager.config_manager.settings_data.dupe_cleanup_options.add_md5_hash: + if config.get().dupe_cleanup_options.add_md5_hash: await self._update_db_and_retrive_hash_helper(file, original_filename, referer, hash_type=self.md5) - if self.manager.config_manager.settings_data.dupe_cleanup_options.add_sha256_hash: + if config.get().dupe_cleanup_options.add_sha256_hash: await self._update_db_and_retrive_hash_helper(file, original_filename, referer, hash_type=self.sha256) return hash @@ -156,11 +156,11 @@ async def save_hash_data(self, media_item: MediaItem, hash: str | None) -> None: self.hashes_dict[hash][size].add(absolute_path) async def cleanup_dupes_after_download(self) -> None: - if self.manager.config_manager.settings_data.dupe_cleanup_options.hashing == Hashing.OFF: + if config.get().dupe_cleanup_options.hashing == Hashing.OFF: return - if not self.manager.config_manager.settings_data.dupe_cleanup_options.auto_dedupe: + if not config.get().dupe_cleanup_options.auto_dedupe: return - if self.manager.config_manager.settings_data.runtime_options.ignore_history: + if config.get().runtime_options.ignore_history: return with self.manager.live_manager.get_hash_live(stop=True): file_hashes_dict = await self.get_file_hashes_dict() diff --git a/cyberdrop_dl/crawlers/__init__.py b/cyberdrop_dl/crawlers/__init__.py index ab0925993..8d24d35b7 100644 --- a/cyberdrop_dl/crawlers/__init__.py +++ b/cyberdrop_dl/crawlers/__init__.py @@ -1,7 +1,7 @@ # ruff: noqa: F401 from __future__ import annotations -from cyberdrop_dl import config, env +from cyberdrop_dl import cache, config, env from ._chevereto import CheveretoCrawler from .anontransfer import AnonTransferCrawler diff --git a/cyberdrop_dl/crawlers/_forum.py b/cyberdrop_dl/crawlers/_forum.py index 63ccbf3e2..4391e42a3 100644 --- a/cyberdrop_dl/crawlers/_forum.py +++ b/cyberdrop_dl/crawlers/_forum.py @@ -209,12 +209,12 @@ async def login(self) -> None: @final @property def scrape_single_forum_post(self) -> bool: - return self.manager.config_manager.settings_data.download_options.scrape_single_forum_post + return config.get().download_options.scrape_single_forum_post @final @property def max_thread_depth(self) -> int: - return self.manager.config_manager.settings_data.download_options.maximum_thread_depth + return config.get().download_options.maximum_thread_depth @final @property diff --git a/cyberdrop_dl/crawlers/crawler.py b/cyberdrop_dl/crawlers/crawler.py index 9e6bd0b96..a00dad930 100644 --- a/cyberdrop_dl/crawlers/crawler.py +++ b/cyberdrop_dl/crawlers/crawler.py @@ -53,7 +53,7 @@ from rich.progress import TaskID from cyberdrop_dl.clients.response import AbstractResponse - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager OneOrTuple: TypeAlias = T | tuple[T, ...] @@ -264,7 +264,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: ... @final @property def allow_no_extension(self) -> bool: - return not self.manager.config_manager.settings_data.ignore_options.exclude_files_with_no_extension + return not config.get().ignore_options.exclude_files_with_no_extension @property def deep_scrape(self) -> bool: diff --git a/cyberdrop_dl/crawlers/onedrive.py b/cyberdrop_dl/crawlers/onedrive.py index da9be4401..6d842eb78 100644 --- a/cyberdrop_dl/crawlers/onedrive.py +++ b/cyberdrop_dl/crawlers/onedrive.py @@ -9,6 +9,7 @@ from functools import partial from typing import TYPE_CHECKING, Any, ClassVar, Self +from cyberdrop_dl import cache from cyberdrop_dl.crawlers.crawler import Crawler, SupportedDomains, SupportedPaths from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL from cyberdrop_dl.exceptions import ScrapeError @@ -102,8 +103,8 @@ class OneDriveCrawler(Crawler): FOLDER_DOMAIN: ClassVar[str] = "OneDrive" def __post_init__(self) -> None: - badger_token: str = self.manager.cache_manager.get("onedrive_badger_token") or "" - badger_token_expires: str = self.manager.cache_manager.get("onedrive_badger_token_expires") or "" + badger_token: str = cache.get().get("onedrive_badger_token") or "" + badger_token_expires: str = cache.get().get("onedrive_badger_token_expires") or "" self.auth_headers = {} expired = True if badger_token_expires: @@ -226,8 +227,8 @@ async def get_badger_token(self, badger_url: AbsoluteHttpURL = BADGER_URL) -> No badger_token: str = json_resp["token"] badger_token_expires: str = json_resp["expiryTimeUtc"] self.auth_headers = {"Prefer": "autoredeem", "Authorization": f"Badger {badger_token}"} - self.manager.cache_manager.save("onedrive_badger_token", badger_token) - self.manager.cache_manager.save("onedrive_badger_token_expires", badger_token_expires) + cache.get().save("onedrive_badger_token", badger_token) + cache.get().save("onedrive_badger_token_expires", badger_token_expires) def is_share_link(url: AbsoluteHttpURL) -> bool: diff --git a/cyberdrop_dl/data_structures/url_objects.py b/cyberdrop_dl/data_structures/url_objects.py index d4894ebcc..59da65299 100644 --- a/cyberdrop_dl/data_structures/url_objects.py +++ b/cyberdrop_dl/data_structures/url_objects.py @@ -18,7 +18,7 @@ from rich.progress import TaskID from cyberdrop_dl import signature - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager class AbsoluteHttpURL(yarl.URL): @signature.copy(yarl.URL.__new__) diff --git a/cyberdrop_dl/director.py b/cyberdrop_dl/director.py index 845f6f98f..2540a2438 100644 --- a/cyberdrop_dl/director.py +++ b/cyberdrop_dl/director.py @@ -14,7 +14,7 @@ from cyberdrop_dl import constants, env from cyberdrop_dl.dependencies import browser_cookie3 -from cyberdrop_dl.managers.manager import Manager +from cyberdrop_dl.managers import Manager from cyberdrop_dl.scraper.scrape_mapper import ScrapeMapper from cyberdrop_dl.ui.program_ui import ProgramUI from cyberdrop_dl.utils.apprise import send_apprise_notifications diff --git a/cyberdrop_dl/downloader/downloader.py b/cyberdrop_dl/downloader/downloader.py index 9911b335a..df4c0a820 100644 --- a/cyberdrop_dl/downloader/downloader.py +++ b/cyberdrop_dl/downloader/downloader.py @@ -14,7 +14,7 @@ from aiohttp import ClientConnectorError, ClientError, ClientResponseError -from cyberdrop_dl import constants +from cyberdrop_dl import config, constants from cyberdrop_dl.data_structures.url_objects import HlsSegment, MediaItem from cyberdrop_dl.exceptions import ( DownloadError, @@ -64,7 +64,7 @@ from collections.abc import Callable, Coroutine, Generator from cyberdrop_dl.clients.download_client import DownloadClient - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager from cyberdrop_dl.utils.m3u8 import M3U8, RenditionGroup P = ParamSpec("P") @@ -135,9 +135,9 @@ def __init__(self, manager: Manager, domain: str) -> None: @property def max_attempts(self): - if self.manager.config_manager.settings_data.download_options.disable_download_attempt_limit: + if config.get().download_options.disable_download_attempt_limit: return 1 - return self.manager.config_manager.global_settings_data.rate_limiting_options.download_attempts + return config.get().rate_limiting_options.download_attempts def startup(self) -> None: """Starts the downloader.""" @@ -145,7 +145,7 @@ def startup(self) -> None: self._semaphore = asyncio.Semaphore(self.manager.client_manager.get_download_slots(self.domain)) self.manager.path_manager.download_folder.mkdir(parents=True, exist_ok=True) - if self.manager.config_manager.settings_data.sorting.sort_downloads: + if config.get().sorting.sort_downloads: self.manager.path_manager.sorted_folder.mkdir(parents=True, exist_ok=True) def update_queued_files(self, increase_total: bool = True): @@ -337,7 +337,7 @@ async def set_file_datetime(self, media_item: MediaItem, complete_file: Path) -> if media_item.is_segment: return - if self.manager.config_manager.settings_data.download_options.disable_file_timestamps: + if config.get().download_options.disable_file_timestamps: return if not media_item.datetime: log(f"Unable to parse upload date for {media_item.url}, using current datetime as file datetime", 30) diff --git a/cyberdrop_dl/downloader/mega_nz.py b/cyberdrop_dl/downloader/mega_nz.py index 73f47b330..fe8a697cd 100644 --- a/cyberdrop_dl/downloader/mega_nz.py +++ b/cyberdrop_dl/downloader/mega_nz.py @@ -17,7 +17,7 @@ from yarl import URL from cyberdrop_dl.data_structures.url_objects import MediaItem - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager class MegaDownloadClient(DownloadClient): diff --git a/cyberdrop_dl/managers/__init__.py b/cyberdrop_dl/managers/__init__.py index e69de29bb..5bfc00dd5 100644 --- a/cyberdrop_dl/managers/__init__.py +++ b/cyberdrop_dl/managers/__init__.py @@ -0,0 +1,130 @@ +from __future__ import annotations + +import asyncio +import json +import logging +from dataclasses import field +from time import perf_counter +from typing import TYPE_CHECKING, NamedTuple + +from cyberdrop_dl import __version__, appdata, config, constants +from cyberdrop_dl.database import Database +from cyberdrop_dl.managers.client_manager import ClientManager +from cyberdrop_dl.managers.config_manager import ConfigManager +from cyberdrop_dl.managers.hash_manager import HashManager +from cyberdrop_dl.managers.live_manager import LiveManager +from cyberdrop_dl.managers.log_manager import LogManager +from cyberdrop_dl.managers.path_manager import PathManager +from cyberdrop_dl.managers.progress_manager import ProgressManager +from cyberdrop_dl.managers.storage_manager import StorageManager +from cyberdrop_dl.utils import ffmpeg +from cyberdrop_dl.utils.logger import LogHandler, QueuedLogger +from cyberdrop_dl.utils.utilities import close_if_defined, get_system_information + +if TYPE_CHECKING: + from asyncio import TaskGroup + + from cyberdrop_dl.scraper.scrape_mapper import ScrapeMapper + + +class AsyncioEvents(NamedTuple): + SHUTTING_DOWN: asyncio.Event + RUNNING: asyncio.Event + + +logger = logging.getLogger(__name__) + + +class Manager: + def __init__(self) -> None: + self.hash_manager: HashManager = field(init=False) + self.db_manager: Database = field(init=False) + self.client_manager: ClientManager = field(init=False) + self.storage_manager: StorageManager = field(init=False) + + self.progress_manager: ProgressManager = field(init=False) + self.live_manager: LiveManager = field(init=False) + + self.task_group: TaskGroup = field(init=False) + self.scrape_mapper: ScrapeMapper = field(init=False) + + self.start_time: float = perf_counter() + self.loggers: dict[str, QueuedLogger] = {} + self.states: AsyncioEvents + + constants.console_handler = LogHandler(level=constants.CONSOLE_LEVEL) + + self.path_manager: PathManager = PathManager(self) + self.path_manager.pre_startup() + self.config_manager: ConfigManager = ConfigManager(self) + self.config_manager.startup() + + self.path_manager.startup() + self.log_manager: LogManager = LogManager(self) + log_app_state() + + """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" + + async def async_startup(self) -> None: + """Async startup process for the manager.""" + self.states = AsyncioEvents(asyncio.Event(), asyncio.Event()) + self.client_manager = ClientManager(self) + await self.client_manager.startup() + self.storage_manager = StorageManager(self) + + await self.async_db_hash_startup() + + constants.MAX_NAME_LENGTHS["FILE"] = config.get().general.max_file_name_length + constants.MAX_NAME_LENGTHS["FOLDER"] = config.get().general.max_folder_name_length + + async def async_db_hash_startup(self) -> None: + self.db_manager = Database( + self.path_manager.history_db, + config.get().runtime_options.ignore_history, + ) + await self.db_manager.startup() + self.hash_manager = HashManager(self) + self.live_manager = LiveManager(self) + self.progress_manager = ProgressManager(self) + self.progress_manager.startup() + + async def async_db_close(self) -> None: + "Partial shutdown for managers used for hash directory scanner" + self.db_manager = await close_if_defined(self.db_manager) + self.hash_manager = constants.NOT_DEFINED + self.progress_manager.hash_progress.reset() + + async def close(self) -> None: + """Closes the manager.""" + self.states.RUNNING.clear() + + await self.async_db_close() + + self.client_manager = await close_if_defined(self.client_manager) + self.storage_manager = await close_if_defined(self.storage_manager) + + while self.loggers: + _, queued_logger = self.loggers.popitem() + queued_logger.stop() + + +def log_app_state() -> None: + auth = {} + + config_ = config.get() + app_data = appdata.get() + for site, auth_entries in config_.auth.model_dump().items(): # pyright: ignore[reportAny] + auth[site] = all(auth_entries.values()) # pyright: ignore[reportAny] + + # f"Using Input File: {self.path_manager.input_file}", + stats = dict( # noqa: C408 + version=__version__, + system=get_system_information(), + ffmpeg=ffmpeg.get_ffmpeg_version(), + ffprobe=ffmpeg.get_ffprobe_version(), + database=app_data.db_file, + config_file=config_.source, + auth=auth, + config=config_.model_dump_json(indent=2, exclude={"auth"}), + ) + logger.debug(json.dumps(stats, indent=2, ensure_ascii=False)) diff --git a/cyberdrop_dl/managers/cache_manager.py b/cyberdrop_dl/managers/cache_manager.py deleted file mode 100644 index 104fcab37..000000000 --- a/cyberdrop_dl/managers/cache_manager.py +++ /dev/null @@ -1,59 +0,0 @@ -from __future__ import annotations - -from dataclasses import field -from typing import TYPE_CHECKING, Any - -from cyberdrop_dl import __version__ as current_version -from cyberdrop_dl.utils import yaml - -if TYPE_CHECKING: - from pathlib import Path - - from cyberdrop_dl.managers.manager import Manager - - -class CacheManager: - def __init__(self, manager: Manager) -> None: - self.manager = manager - self.cache_file: Path = field(init=False) - self._cache: dict[str, Any] = {} - - def startup(self, cache_file: Path) -> None: - """Ensures that the cache file exists.""" - self.cache_file = cache_file - if not self.cache_file.is_file(): - self.save("default_config", "Default") - - self.load() - if self.manager.parsed_args.cli_only_args.appdata_folder: - self.save("first_startup_completed", True) - - def load(self) -> None: - """Loads the cache file into memory.""" - self._cache = yaml.load(self.cache_file) - - def load_request_cache(self) -> None: - return - - def get(self, key: str) -> Any: - """Returns the value of a key in the cache.""" - return self._cache.get(key, None) - - def save(self, key: str, value: Any) -> None: - """Saves a key and value to the cache.""" - self._cache[key] = value - yaml.save(self.cache_file, self._cache) - - def dump(self, data: dict[str, Any]) -> None: - """dumps the dictionary into the cache""" - self._cache = data - yaml.save(self.cache_file, self._cache) - - def remove(self, key: str) -> None: - """Removes a key from the cache.""" - if key in self._cache: - del self._cache[key] - yaml.save(self.cache_file, self._cache) - - async def close(self) -> None: - self.save("version", current_version) diff --git a/cyberdrop_dl/managers/client_manager.py b/cyberdrop_dl/managers/client_manager.py index 0cac0fe93..b15ac4063 100644 --- a/cyberdrop_dl/managers/client_manager.py +++ b/cyberdrop_dl/managers/client_manager.py @@ -21,7 +21,7 @@ from cyberdrop_dl.clients.scraper_client import ScraperClient from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, MediaItem from cyberdrop_dl.exceptions import DDOSGuardError, DownloadError, ScrapeError, TooManyCrawlerErrors -from cyberdrop_dl.managers.manager import Manager +from cyberdrop_dl.managers import Manager from cyberdrop_dl.ui.prompts.user_prompts import get_cookies_from_browsers from cyberdrop_dl.utils.aio import WeakAsyncLocks from cyberdrop_dl.utils.cookie_management import read_netscape_files @@ -41,7 +41,7 @@ from curl_cffi.requests import AsyncSession from curl_cffi.requests.models import Response as CurlResponse - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager _curl_import_error = None try: @@ -62,7 +62,7 @@ if TYPE_CHECKING: - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager _null_context = contextlib.nullcontext() @@ -207,7 +207,7 @@ def basic_auth(username: str, password: str) -> str: def check_allowed_filetype(self, media_item: MediaItem) -> bool: """Checks if the file type is allowed to download.""" - ignore_options = self.manager.config_manager.settings_data.ignore_options + ignore_options = config.get().ignore_options if media_item.ext.lower() in constants.FILE_FORMATS["Images"] and ignore_options.exclude_images: return False @@ -224,7 +224,7 @@ def check_allowed_date_range(self, media_item: MediaItem) -> bool: return True item_date = datetime.date() - ignore_options = self.manager.config_manager.settings_data.ignore_options + ignore_options = config.get().ignore_options if ignore_options.exclude_before and item_date < ignore_options.exclude_before: return False @@ -324,11 +324,9 @@ def request_context(self, domain: str) -> Generator[None]: pass async def load_cookie_files(self) -> None: - if self.manager.config_manager.settings_data.browser_cookies.auto_import: - assert self.manager.config_manager.settings_data.browser_cookies.browser - get_cookies_from_browsers( - self.manager, browser=self.manager.config_manager.settings_data.browser_cookies.browser - ) + if config.get().browser_cookies.auto_import: + assert config.get().browser_cookies.browser + get_cookies_from_browsers(self.manager, browser=config.get().browser_cookies.browser) cookie_files = sorted(self.manager.path_manager.cookies_dir.glob("*.txt")) if not cookie_files: return diff --git a/cyberdrop_dl/managers/config_manager.py b/cyberdrop_dl/managers/config_manager.py index 23981a74a..7ace2bb6a 100644 --- a/cyberdrop_dl/managers/config_manager.py +++ b/cyberdrop_dl/managers/config_manager.py @@ -6,6 +6,7 @@ from time import sleep from typing import TYPE_CHECKING +from cyberdrop_dl import cache from cyberdrop_dl.config import AuthSettings, ConfigSettings, GlobalSettings from cyberdrop_dl.exceptions import InvalidYamlError from cyberdrop_dl.managers.log_manager import LogManager @@ -17,7 +18,7 @@ from pydantic import BaseModel - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager from cyberdrop_dl.utils.apprise import AppriseURL @@ -49,14 +50,14 @@ def startup(self) -> None: self.authentication_settings = auth_override self.settings.parent.mkdir(parents=True, exist_ok=True) - self.pydantic_config = self.manager.cache_manager.get("pydantic_config") + self.pydantic_config = cache.get().get("pydantic_config") self.load_configs() def get_loaded_config(self): return self.loaded_config or self.get_default_config() def get_default_config(self) -> str: - return self.manager.cache_manager.get("default_config") or "Default" + return cache.get().get("default_config") or "Default" def load_configs(self) -> None: """Loads all the configs.""" @@ -163,15 +164,15 @@ def get_configs(self) -> list: def change_default_config(self, config_name: str) -> None: """Changes the default config.""" - self.manager.cache_manager.save("default_config", config_name) + cache.get().save("default_config", config_name) def delete_config(self, config_name: str) -> None: """Deletes a config.""" configs = self.get_configs() configs.remove(config_name) - if self.manager.cache_manager.get("default_config") == config_name: - self.manager.cache_manager.save("default_config", configs[0]) + if cache.get().get("default_config") == config_name: + cache.get().save("default_config", configs[0]) config = self.manager.path_manager.config_folder / config_name shutil.rmtree(config) @@ -187,7 +188,7 @@ def change_config(self, config_name: str) -> None: sleep(1) def _set_apprise_fixed(self): - apprise_fixed = self.manager.cache_manager.get("apprise_fixed") + apprise_fixed = cache.get().get("apprise_fixed") if apprise_fixed: return if os.name == "nt": @@ -198,12 +199,12 @@ def _set_apprise_fixed(self): else: with self.apprise_file.open("a", encoding="utf8") as f: f.write("windows://\n") - self.manager.cache_manager.save("apprise_fixed", True) + cache.get().save("apprise_fixed", True) def _set_pydantic_config(self): if self.pydantic_config: return - self.manager.cache_manager.save("pydantic_config", True) + cache.get().save("pydantic_config", True) self.pydantic_config = True diff --git a/cyberdrop_dl/managers/hash_manager.py b/cyberdrop_dl/managers/hash_manager.py index 8a24d46c4..93dd275c8 100644 --- a/cyberdrop_dl/managers/hash_manager.py +++ b/cyberdrop_dl/managers/hash_manager.py @@ -10,7 +10,7 @@ from cyberdrop_dl.clients.hash_client import HashClient if TYPE_CHECKING: - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager _HASHERS: Final = { "md5": hashlib.md5, diff --git a/cyberdrop_dl/managers/live_manager.py b/cyberdrop_dl/managers/live_manager.py index bab9bc58e..d5ed00254 100644 --- a/cyberdrop_dl/managers/live_manager.py +++ b/cyberdrop_dl/managers/live_manager.py @@ -7,7 +7,7 @@ from rich.live import Live -from cyberdrop_dl import constants +from cyberdrop_dl import config, constants from cyberdrop_dl.cli import is_terminal_in_portrait if TYPE_CHECKING: @@ -15,7 +15,7 @@ from rich.console import RenderableType - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager class LiveManager: @@ -23,7 +23,7 @@ def __init__(self, manager: Manager) -> None: self.manager = manager self.ui_setting = self.manager.parsed_args.cli_only_args.ui self.fullscreen = f = self.manager.parsed_args.cli_only_args.fullscreen_ui - self.refresh_rate = rate = self.manager.config_manager.global_settings_data.ui_options.refresh_rate + self.refresh_rate = rate = config.get().ui_options.refresh_rate self.live = Live(refresh_per_second=rate, transient=True, screen=f, auto_refresh=True) self.current_layout: str = "" diff --git a/cyberdrop_dl/managers/log_manager.py b/cyberdrop_dl/managers/log_manager.py index 80e06f5c7..41b5a67f0 100644 --- a/cyberdrop_dl/managers/log_manager.py +++ b/cyberdrop_dl/managers/log_manager.py @@ -17,7 +17,7 @@ from yarl import URL from cyberdrop_dl.data_structures.url_objects import MediaItem - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager class LogManager: diff --git a/cyberdrop_dl/managers/manager.py b/cyberdrop_dl/managers/manager.py deleted file mode 100644 index 3452a9ee8..000000000 --- a/cyberdrop_dl/managers/manager.py +++ /dev/null @@ -1,134 +0,0 @@ -from __future__ import annotations - -import asyncio -import json -import logging -from dataclasses import field -from time import perf_counter -from typing import TYPE_CHECKING, NamedTuple - -from cyberdrop_dl import __version__, appdata, config, constants -from cyberdrop_dl.database import Database -from cyberdrop_dl.managers.cache_manager import CacheManager -from cyberdrop_dl.managers.client_manager import ClientManager -from cyberdrop_dl.managers.config_manager import ConfigManager -from cyberdrop_dl.managers.hash_manager import HashManager -from cyberdrop_dl.managers.live_manager import LiveManager -from cyberdrop_dl.managers.log_manager import LogManager -from cyberdrop_dl.managers.path_manager import PathManager -from cyberdrop_dl.managers.progress_manager import ProgressManager -from cyberdrop_dl.managers.storage_manager import StorageManager -from cyberdrop_dl.utils import ffmpeg -from cyberdrop_dl.utils.logger import LogHandler, QueuedLogger -from cyberdrop_dl.utils.utilities import close_if_defined, get_system_information - -if TYPE_CHECKING: - from asyncio import TaskGroup - - from cyberdrop_dl.scraper.scrape_mapper import ScrapeMapper - - -class AsyncioEvents(NamedTuple): - SHUTTING_DOWN: asyncio.Event - RUNNING: asyncio.Event - - -logger = logging.getLogger(__name__) - - -class Manager: - def __init__(self) -> None: - self.cache_manager: CacheManager = CacheManager(self) - self.hash_manager: HashManager = field(init=False) - self.db_manager: Database = field(init=False) - self.client_manager: ClientManager = field(init=False) - self.storage_manager: StorageManager = field(init=False) - - self.progress_manager: ProgressManager = field(init=False) - self.live_manager: LiveManager = field(init=False) - - self.task_group: TaskGroup = field(init=False) - self.scrape_mapper: ScrapeMapper = field(init=False) - - self.start_time: float = perf_counter() - self.loggers: dict[str, QueuedLogger] = {} - self.states: AsyncioEvents - - constants.console_handler = LogHandler(level=constants.CONSOLE_LEVEL) - - self.path_manager: PathManager = PathManager(self) - self.path_manager.pre_startup() - self.cache_manager.startup(self.path_manager.cache_folder / "cache.yaml") - self.config_manager: ConfigManager = ConfigManager(self) - self.config_manager.startup() - - self.path_manager.startup() - self.log_manager: LogManager = LogManager(self) - log_app_state() - - """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - async def async_startup(self) -> None: - """Async startup process for the manager.""" - self.states = AsyncioEvents(asyncio.Event(), asyncio.Event()) - self.client_manager = ClientManager(self) - await self.client_manager.startup() - self.storage_manager = StorageManager(self) - - await self.async_db_hash_startup() - - constants.MAX_NAME_LENGTHS["FILE"] = config.get().general.max_file_name_length - constants.MAX_NAME_LENGTHS["FOLDER"] = config.get().general.max_folder_name_length - - async def async_db_hash_startup(self) -> None: - self.db_manager = Database( - self.path_manager.history_db, - config.get().runtime_options.ignore_history, - ) - await self.db_manager.startup() - self.hash_manager = HashManager(self) - self.live_manager = LiveManager(self) - self.progress_manager = ProgressManager(self) - self.progress_manager.startup() - - async def async_db_close(self) -> None: - "Partial shutdown for managers used for hash directory scanner" - self.db_manager = await close_if_defined(self.db_manager) - self.hash_manager = constants.NOT_DEFINED - self.progress_manager.hash_progress.reset() - - async def close(self) -> None: - """Closes the manager.""" - self.states.RUNNING.clear() - - await self.async_db_close() - - self.client_manager = await close_if_defined(self.client_manager) - self.storage_manager = await close_if_defined(self.storage_manager) - self.cache_manager = await close_if_defined(self.cache_manager) - - while self.loggers: - _, queued_logger = self.loggers.popitem() - queued_logger.stop() - - -def log_app_state() -> None: - auth = {} - - config_ = config.get() - app_data = appdata.get() - for site, auth_entries in config_.auth.model_dump().items(): # pyright: ignore[reportAny] - auth[site] = all(auth_entries.values()) # pyright: ignore[reportAny] - - # f"Using Input File: {self.path_manager.input_file}", - stats = dict( # noqa: C408 - version=__version__, - system=get_system_information(), - ffmpeg=ffmpeg.get_ffmpeg_version(), - ffprobe=ffmpeg.get_ffprobe_version(), - database=app_data.db_file, - config_file=config_.source, - auth=auth, - config=config_.model_dump_json(indent=2, exclude={"auth"}), - ) - logger.debug(json.dumps(stats, indent=2, ensure_ascii=False)) diff --git a/cyberdrop_dl/managers/mock_manager.py b/cyberdrop_dl/managers/mock_manager.py deleted file mode 100644 index 3cf903507..000000000 --- a/cyberdrop_dl/managers/mock_manager.py +++ /dev/null @@ -1,52 +0,0 @@ -from __future__ import annotations - -from typing import Any - -MOCK_MANAGER = None - - -class MockCallable: - def __init__(self, return_obj: Any = None) -> None: - self.return_obj = return_obj - - def __getitem__(self, parameters: Any) -> object: ... - def __or__(self, other: Any) -> MockCallable: ... - def __ror__(self, other: Any) -> MockCallable: ... - def __call__(self, *args, **kwargs): - return self.return_obj - - -class Mock(Any): - def __init__(self, name: str, /) -> None: - self._nested_attrs: dict[str, Mock] = {} - self._mock_name = name - - def __call__(self, *args, **kwargs) -> None: ... - - def __getattribute__(self, name: str, /) -> Any: - if name == "manager" and MOCK_MANAGER is not None: - return MOCK_MANAGER - try: - return super().__getattribute__(name) - except AttributeError: - if name == "_nested_attrs": - raise # Avoid infinite recursion - return self._nested_attrs.get(name, Mock(name)) - - -class MockCacheManager(Mock): - def __init__(self) -> None: - self.get = self.save = MockCallable() - super().__init__("cache_manager") - - -class MockManager(Mock): - def __init__(self): - global MOCK_MANAGER - assert MOCK_MANAGER is None, "A global MockManager already exists. Only 1 should be created" - super().__init__("manager") - self.cache_manager = MockCacheManager() - MOCK_MANAGER = self - - -MOCK_MANAGER = MockManager() diff --git a/cyberdrop_dl/managers/path_manager.py b/cyberdrop_dl/managers/path_manager.py index 237d71d92..ff91cc3d4 100644 --- a/cyberdrop_dl/managers/path_manager.py +++ b/cyberdrop_dl/managers/path_manager.py @@ -6,12 +6,12 @@ from pathlib import Path from typing import TYPE_CHECKING -from cyberdrop_dl import env +from cyberdrop_dl import config, env from cyberdrop_dl.utils.utilities import purge_dir_tree if TYPE_CHECKING: from cyberdrop_dl.data_structures.url_objects import MediaItem - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager class PathManager: @@ -77,7 +77,7 @@ def pre_startup(self) -> None: def startup(self) -> None: """Startup process for the Directory Manager.""" - settings_data = self.manager.config_manager.settings_data + settings_data = config.get() current_config = self.manager.config_manager.loaded_config def replace(path: Path) -> Path: @@ -110,7 +110,7 @@ def replace(path: Path) -> Path: def _set_output_filenames(self, now: datetime) -> None: current_time_file_iso: str = now.strftime("%Y%m%d_%H%M%S") current_time_folder_iso: str = now.strftime("%Y_%m_%d") - log_settings_config = self.manager.config_manager.settings_data.logs + log_settings_config = config.get().logs log_files: dict[str, Path] = log_settings_config.model_dump() for model_name, log_file in log_files.items(): @@ -130,11 +130,11 @@ def _set_output_filenames(self, now: datetime) -> None: self.pages_folder = self.main_log.parent / "cdl_responses" def _delete_logs_and_folders(self, now: datetime): - if self.manager.config_manager.settings_data.logs.logs_expire_after: + if config.get().logs.logs_expire_after: for file in set(self.log_folder.rglob("*.log")) | set(self.log_folder.rglob("*.csv")): file_date = Path(file).stat().st_ctime t_delta = now - datetime.fromtimestamp(file_date) - if t_delta > self.manager.config_manager.settings_data.logs.logs_expire_after: + if t_delta > config.get().logs.logs_expire_after: file.unlink(missing_ok=True) purge_dir_tree(self.log_folder) @@ -144,7 +144,7 @@ def _create_output_folders(self): path: Path = getattr(self, internal_name) path.parent.mkdir(parents=True, exist_ok=True) - if self.manager.config_manager.settings_data.files.save_pages_html: + if config.get().files.save_pages_html: self.pages_folder.mkdir(parents=True, exist_ok=True) def add_completed(self, media_item: MediaItem) -> None: diff --git a/cyberdrop_dl/managers/progress_manager.py b/cyberdrop_dl/managers/progress_manager.py index 8f6b46454..c2cf2c6f9 100644 --- a/cyberdrop_dl/managers/progress_manager.py +++ b/cyberdrop_dl/managers/progress_manager.py @@ -30,7 +30,7 @@ from rich.console import RenderableType - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager from cyberdrop_dl.ui.progress.statistic_progress import UiFailureTotal log_cyan = partial(log_with_color, style="cyan", level=20) diff --git a/cyberdrop_dl/managers/storage_manager.py b/cyberdrop_dl/managers/storage_manager.py index e52482327..5726fcf9d 100644 --- a/cyberdrop_dl/managers/storage_manager.py +++ b/cyberdrop_dl/managers/storage_manager.py @@ -21,7 +21,7 @@ from psutil._ntuples import sdiskpart from cyberdrop_dl.data_structures.url_objects import MediaItem - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager @dataclass(frozen=True, slots=True, order=True) diff --git a/cyberdrop_dl/plugins.py b/cyberdrop_dl/plugins.py index 0284045e8..1aa13f933 100644 --- a/cyberdrop_dl/plugins.py +++ b/cyberdrop_dl/plugins.py @@ -25,7 +25,7 @@ from collections.abc import Iterable from importlib.metadata import EntryPoint - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager _GROUP_NAME: Final = "cyberdrop_dl_plugins" diff --git a/cyberdrop_dl/scraper/jdownloader.py b/cyberdrop_dl/scraper/jdownloader.py index d3c9fd94d..fff9fe0a9 100644 --- a/cyberdrop_dl/scraper/jdownloader.py +++ b/cyberdrop_dl/scraper/jdownloader.py @@ -12,7 +12,7 @@ from pathlib import Path from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager @dataclasses.dataclass(slots=True) diff --git a/cyberdrop_dl/scraper/scrape_mapper.py b/cyberdrop_dl/scraper/scrape_mapper.py index 594817cb7..51fa75d20 100644 --- a/cyberdrop_dl/scraper/scrape_mapper.py +++ b/cyberdrop_dl/scraper/scrape_mapper.py @@ -10,6 +10,7 @@ import aiofiles from yarl import URL +from cyberdrop_dl import config from cyberdrop_dl.constants import REGEX_LINKS, BlockedDomains from cyberdrop_dl.crawlers._chevereto import CheveretoCrawler from cyberdrop_dl.crawlers.crawler import Crawler, create_crawlers @@ -31,7 +32,7 @@ from cyberdrop_dl.config.global_model import GenericCrawlerInstances, GlobalSettings from cyberdrop_dl.crawlers import Crawler - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager existing_crawlers: dict[str, Crawler] = {} _seen_urls: set[AbsoluteHttpURL] = set() @@ -46,7 +47,7 @@ def __init__(self, manager: Manager) -> None: self.existing_crawlers: dict[str, Crawler] = {} self.direct_crawler = DirectHttpFile(self.manager) self.jdownloader = JDownloader(self.manager) - self.jdownloader_whitelist = self.manager.config_manager.settings_data.runtime_options.jdownloader_whitelist + self.jdownloader_whitelist = config.get().runtime_options.jdownloader_whitelist self.using_input_file = False self.groups = set() self.count = 0 @@ -60,7 +61,7 @@ def group_count(self) -> int: @property def global_settings(self) -> GlobalSettings: - return self.manager.config_manager.global_settings_data + return config.get() @property def enable_generic_crawler(self) -> bool: @@ -121,7 +122,7 @@ async def get_input_items(self) -> AsyncGenerator[ScrapeItem]: async for item in items_generator: await self.manager.states.RUNNING.wait() - item.children_limits = self.manager.config_manager.settings_data.download_options.maximum_number_of_children + item.children_limits = config.get().download_options.maximum_number_of_children if self.filter_items(item): if item_limit and self.count >= item_limit: break @@ -283,12 +284,12 @@ def filter_items(self, scrape_item: ScrapeItem) -> bool: log(f"Skipping {scrape_item.url} as it is outside of the desired date range", 10) return False - skip_hosts = self.manager.config_manager.settings_data.ignore_options.skip_hosts + skip_hosts = config.get().ignore_options.skip_hosts if skip_hosts and is_in_domain_list(scrape_item, skip_hosts): log(f"Skipping URL by skip_hosts config: {scrape_item.url}", 10) return False - only_hosts = self.manager.config_manager.settings_data.ignore_options.only_hosts + only_hosts = config.get().ignore_options.only_hosts if only_hosts and not is_in_domain_list(scrape_item, only_hosts): log(f"Skipping URL by only_hosts config: {scrape_item.url}", 10) return False diff --git a/cyberdrop_dl/ui/program_ui.py b/cyberdrop_dl/ui/program_ui.py index d7d113b73..4bfaec497 100644 --- a/cyberdrop_dl/ui/program_ui.py +++ b/cyberdrop_dl/ui/program_ui.py @@ -11,6 +11,7 @@ from rich.markdown import Markdown from rich.text import Text +from cyberdrop_dl import cache from cyberdrop_dl.clients.hash_client import hash_directory_scanner from cyberdrop_dl.dependencies import browser_cookie3 from cyberdrop_dl.ui.prompts import user_prompts @@ -28,7 +29,7 @@ from InquirerPy.base.control import Choice - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager P = ParamSpec("P") R = TypeVar("R") @@ -156,7 +157,7 @@ def _clear_cache(self) -> None: return urls = user_prompts.filter_cache_urls(self.manager, domains) for url in urls: - asyncio.run(self.manager.cache_manager.request_cache.delete_url(url)) + asyncio.run(cache.get().request_cache.delete_url(url)) console.print("\nExecuting database vacuum. This may take several minutes, please wait...") try: @@ -209,7 +210,7 @@ def _delete_config(self) -> None: self.print_error("You cannot delete the currently active config") return - if self.manager.cache_manager.get("default_config") == selected_config: + if cache.get().get("default_config") == selected_config: self.print_error("You cannot delete the default config") return @@ -242,7 +243,7 @@ def _open_in_text_editor(self, file_path: Path, *, reload_config: bool = True): def _process_answer(self, answer: Any, options_map: dict) -> Choice | None: """Checks prompt answer and executes corresponding function.""" if answer == EXIT_CHOICE.value: - asyncio.run(self.manager.cache_manager.close()) + asyncio.run(cache.get().close()) sys.exit(0) if answer == DONE_CHOICE.value: return DONE_CHOICE diff --git a/cyberdrop_dl/ui/progress/downloads_progress.py b/cyberdrop_dl/ui/progress/downloads_progress.py index 25e83989a..6d791c5b6 100644 --- a/cyberdrop_dl/ui/progress/downloads_progress.py +++ b/cyberdrop_dl/ui/progress/downloads_progress.py @@ -7,7 +7,7 @@ from rich.progress import BarColumn, Progress if TYPE_CHECKING: - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager class DownloadsProgress: diff --git a/cyberdrop_dl/ui/progress/file_progress.py b/cyberdrop_dl/ui/progress/file_progress.py index d2b17ed3e..75ca945ff 100644 --- a/cyberdrop_dl/ui/progress/file_progress.py +++ b/cyberdrop_dl/ui/progress/file_progress.py @@ -16,7 +16,7 @@ from cyberdrop_dl.ui.progress.deque_progress import DequeProgress, adjust_title if TYPE_CHECKING: - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager class FileProgress(DequeProgress): diff --git a/cyberdrop_dl/ui/progress/hash_progress.py b/cyberdrop_dl/ui/progress/hash_progress.py index 252dfe74f..400190da1 100644 --- a/cyberdrop_dl/ui/progress/hash_progress.py +++ b/cyberdrop_dl/ui/progress/hash_progress.py @@ -11,7 +11,7 @@ from rich.progress import BarColumn, Progress, TaskID if TYPE_CHECKING: - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager def _generic_progress() -> Progress: diff --git a/cyberdrop_dl/ui/progress/scraping_progress.py b/cyberdrop_dl/ui/progress/scraping_progress.py index 9139c65e6..3efd44251 100644 --- a/cyberdrop_dl/ui/progress/scraping_progress.py +++ b/cyberdrop_dl/ui/progress/scraping_progress.py @@ -9,7 +9,7 @@ if TYPE_CHECKING: from yarl import URL - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager class ScrapingProgress(DequeProgress): diff --git a/cyberdrop_dl/ui/progress/sort_progress.py b/cyberdrop_dl/ui/progress/sort_progress.py index e57684285..ca05f92bc 100644 --- a/cyberdrop_dl/ui/progress/sort_progress.py +++ b/cyberdrop_dl/ui/progress/sort_progress.py @@ -9,7 +9,7 @@ from cyberdrop_dl.ui.progress.deque_progress import DequeProgress, adjust_title if TYPE_CHECKING: - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager class SortProgress(DequeProgress): diff --git a/cyberdrop_dl/ui/prompts/user_prompts.py b/cyberdrop_dl/ui/prompts/user_prompts.py index 1d76d1593..594026c25 100644 --- a/cyberdrop_dl/ui/prompts/user_prompts.py +++ b/cyberdrop_dl/ui/prompts/user_prompts.py @@ -26,7 +26,7 @@ from yarl import URL - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager console = Console() diff --git a/cyberdrop_dl/utils/apprise.py b/cyberdrop_dl/utils/apprise.py index d5c6e2b5c..dd66d8517 100644 --- a/cyberdrop_dl/utils/apprise.py +++ b/cyberdrop_dl/utils/apprise.py @@ -22,7 +22,7 @@ from cyberdrop_dl.utils.yaml import format_validation_error if TYPE_CHECKING: - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager @dataclass diff --git a/cyberdrop_dl/utils/cookie_management.py b/cyberdrop_dl/utils/cookie_management.py index e1c936fd2..42a635b6d 100644 --- a/cyberdrop_dl/utils/cookie_management.py +++ b/cyberdrop_dl/utils/cookie_management.py @@ -17,7 +17,7 @@ from pathlib import Path from cyberdrop_dl.constants import BROWSERS - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager P = ParamSpec("P") diff --git a/cyberdrop_dl/utils/logger.py b/cyberdrop_dl/utils/logger.py index 662f2f213..51908eed1 100644 --- a/cyberdrop_dl/utils/logger.py +++ b/cyberdrop_dl/utils/logger.py @@ -37,7 +37,7 @@ from rich.console import ConsoleRenderable - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager _P = ParamSpec("_P") _ExitCode = str | int | None diff --git a/cyberdrop_dl/utils/sorting.py b/cyberdrop_dl/utils/sorting.py index f817f44f8..bb3a8e32f 100644 --- a/cyberdrop_dl/utils/sorting.py +++ b/cyberdrop_dl/utils/sorting.py @@ -17,7 +17,7 @@ from cyberdrop_dl.utils.utilities import purge_dir_tree if TYPE_CHECKING: - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager async def get_modified_date(file: Path) -> datetime: diff --git a/cyberdrop_dl/utils/utilities.py b/cyberdrop_dl/utils/utilities.py index 4e985db0f..e1e6cae60 100644 --- a/cyberdrop_dl/utils/utilities.py +++ b/cyberdrop_dl/utils/utilities.py @@ -56,7 +56,7 @@ from cyberdrop_dl.crawlers import Crawler from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, MediaItem, ScrapeItem from cyberdrop_dl.downloader.downloader import Downloader - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager CrawerOrDownloader = TypeVar("CrawerOrDownloader", bound=Crawler | Downloader) Origin = TypeVar("Origin", bound=ScrapeItem | MediaItem | URL) diff --git a/cyberdrop_dl/utils/webhook.py b/cyberdrop_dl/utils/webhook.py index 5c75d6f65..a156988dd 100644 --- a/cyberdrop_dl/utils/webhook.py +++ b/cyberdrop_dl/utils/webhook.py @@ -15,7 +15,7 @@ from pathlib import Path from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager from cyberdrop_dl.models.base import HttpAppriseURL diff --git a/tests/conftest.py b/tests/conftest.py index a4a8a40f4..46f3fd62b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,7 +5,7 @@ import pytest -from cyberdrop_dl.managers.manager import Manager +from cyberdrop_dl.managers import Manager from cyberdrop_dl.scraper import scrape_mapper if TYPE_CHECKING: diff --git a/tests/crawlers/test_crawlers.py b/tests/crawlers/test_crawlers.py index 7fa246d26..587ddd3a3 100644 --- a/tests/crawlers/test_crawlers.py +++ b/tests/crawlers/test_crawlers.py @@ -19,7 +19,7 @@ if TYPE_CHECKING: from cyberdrop_dl.crawlers.crawler import Crawler - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager def _crawler_mock(func: str = "handle_media_item") -> mock._patch[mock.AsyncMock]: diff --git a/tests/crawlers/test_xenforo.py b/tests/crawlers/test_xenforo.py index 1fd3f48b1..e735a3ee1 100644 --- a/tests/crawlers/test_xenforo.py +++ b/tests/crawlers/test_xenforo.py @@ -11,7 +11,7 @@ from cyberdrop_dl.crawlers.xenforo import xenforo from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, ScrapeItem from cyberdrop_dl.exceptions import ScrapeError -from cyberdrop_dl.managers.manager import Manager +from cyberdrop_dl.managers import Manager if TYPE_CHECKING: from collections.abc import AsyncGenerator diff --git a/tests/fake_classes/managers.py b/tests/fake_classes/managers.py index 90dbb1fdb..dba1fef1f 100644 --- a/tests/fake_classes/managers.py +++ b/tests/fake_classes/managers.py @@ -1,9 +1,9 @@ from typing import Literal -from cyberdrop_dl.managers.cache_manager import CacheManager +from cyberdrop_dl.managers.cache_manager import Cache -class FakeCacheManager(CacheManager): +class FakeCacheManager(Cache): def get(self, _: str) -> Literal[True]: return True diff --git a/tests/test_apprise.py b/tests/test_apprise.py index fe815b27f..6eb780bcd 100644 --- a/tests/test_apprise.py +++ b/tests/test_apprise.py @@ -8,8 +8,8 @@ from cyberdrop_dl import constants from cyberdrop_dl.constants import NotificationResult +from cyberdrop_dl.managers import Manager from cyberdrop_dl.managers.config_manager import ConfigManager -from cyberdrop_dl.managers.manager import Manager from cyberdrop_dl.managers.path_manager import PathManager from cyberdrop_dl.utils import apprise from tests.fake_classes.managers import FakeCacheManager diff --git a/tests/test_flaresolverr.py b/tests/test_flaresolverr.py index 612df2856..59d67959b 100644 --- a/tests/test_flaresolverr.py +++ b/tests/test_flaresolverr.py @@ -4,7 +4,7 @@ from cyberdrop_dl.clients.flaresolverr import FlareSolverr, _Command from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL -from cyberdrop_dl.managers.manager import Manager +from cyberdrop_dl.managers import Manager from cyberdrop_dl.scraper.scrape_mapper import ScrapeMapper ENV_NAME = "CDL_FLARESOLVERR" diff --git a/tests/test_hashing.py b/tests/test_hashing.py index 0ae9498de..078cb5d0e 100644 --- a/tests/test_hashing.py +++ b/tests/test_hashing.py @@ -11,7 +11,7 @@ if TYPE_CHECKING: from pathlib import Path - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager def get_hashes(path: Path) -> set[tuple[str, str]]: diff --git a/tests/test_manager.py b/tests/test_manager.py index 42fdd84aa..d21106043 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -5,7 +5,7 @@ import pytest -from cyberdrop_dl.managers.manager import Manager, merge_dicts +from cyberdrop_dl.managers import Manager, merge_dicts if TYPE_CHECKING: from pydantic import BaseModel diff --git a/tests/test_scrape_mapper.py b/tests/test_scrape_mapper.py index 71523b318..7680fea7a 100644 --- a/tests/test_scrape_mapper.py +++ b/tests/test_scrape_mapper.py @@ -9,7 +9,7 @@ from cyberdrop_dl.scraper import scrape_mapper if TYPE_CHECKING: - from cyberdrop_dl.managers.manager import Manager + from cyberdrop_dl.managers import Manager TEST_BASE_CRAWLER = next(iter(crawlers.GENERIC_CRAWLERS)) diff --git a/tests/test_storage.py b/tests/test_storage.py index 0e6badec1..885ba8799 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -6,7 +6,7 @@ import pytest -from cyberdrop_dl.managers.manager import Manager +from cyberdrop_dl.managers import Manager from cyberdrop_dl.managers.storage_manager import StorageManager From ae4865c43dae0fbc41beea710d743527a16a55be Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Sun, 22 Feb 2026 21:59:35 -0500 Subject: [PATCH 07/23] refactor: update progress UI --- cyberdrop_dl/clients/download_client.py | 4 +- cyberdrop_dl/clients/scraper_client.py | 2 +- cyberdrop_dl/crawlers/coomer.py | 3 +- cyberdrop_dl/crawlers/crawler.py | 4 +- cyberdrop_dl/crawlers/kemono.py | 2 +- cyberdrop_dl/crawlers/rumble.py | 2 +- cyberdrop_dl/director.py | 33 +- cyberdrop_dl/downloader/downloader.py | 4 +- cyberdrop_dl/managers/__init__.py | 6 +- cyberdrop_dl/managers/progress_manager.py | 244 -------------- cyberdrop_dl/progress/__init__.py | 191 +++++++++++ .../{ui => }/progress/downloads_progress.py | 0 .../{ui => }/progress/hash_progress.py | 0 cyberdrop_dl/progress/panels.py | 129 ++++++++ cyberdrop_dl/progress/sorting.py | 44 +++ .../{ui => }/progress/statistic_progress.py | 0 cyberdrop_dl/ui/program_ui.py | 297 ------------------ cyberdrop_dl/ui/progress/__init__.py | 0 cyberdrop_dl/ui/progress/deque_progress.py | 118 ------- cyberdrop_dl/ui/progress/file_progress.py | 78 ----- cyberdrop_dl/ui/progress/scraping_progress.py | 46 --- cyberdrop_dl/ui/progress/sort_progress.py | 74 ----- cyberdrop_dl/ui/prompts/user_prompts.py | 10 +- cyberdrop_dl/utils/cookie_management.py | 3 +- cyberdrop_dl/utils/sorting.py | 25 +- cyberdrop_dl/utils/utilities.py | 10 +- cyberdrop_dl/utils/webhook.py | 4 +- tests/test_manager.py | 11 +- tests/test_startup.py | 2 +- 29 files changed, 416 insertions(+), 930 deletions(-) delete mode 100644 cyberdrop_dl/managers/progress_manager.py create mode 100644 cyberdrop_dl/progress/__init__.py rename cyberdrop_dl/{ui => }/progress/downloads_progress.py (100%) rename cyberdrop_dl/{ui => }/progress/hash_progress.py (100%) create mode 100644 cyberdrop_dl/progress/panels.py create mode 100644 cyberdrop_dl/progress/sorting.py rename cyberdrop_dl/{ui => }/progress/statistic_progress.py (100%) delete mode 100644 cyberdrop_dl/ui/program_ui.py delete mode 100644 cyberdrop_dl/ui/progress/__init__.py delete mode 100644 cyberdrop_dl/ui/progress/deque_progress.py delete mode 100644 cyberdrop_dl/ui/progress/file_progress.py delete mode 100644 cyberdrop_dl/ui/progress/scraping_progress.py delete mode 100644 cyberdrop_dl/ui/progress/sort_progress.py diff --git a/cyberdrop_dl/clients/download_client.py b/cyberdrop_dl/clients/download_client.py index 70ce36404..44f55efa9 100644 --- a/cyberdrop_dl/clients/download_client.py +++ b/cyberdrop_dl/clients/download_client.py @@ -70,7 +70,7 @@ def _get_download_headers(self, domain: str, referer: AbsoluteHttpURL) -> dict[s "User-Agent": config.get().general.user_agent, "Referer": str(referer), } - auth_data = self.manager.config_manager.authentication_data + auth_data = config.get().auth if domain == "pixeldrain" and auth_data.pixeldrain.api_key: download_headers["Authorization"] = self.manager.client_manager.basic_auth( "Cyberdrop-DL", auth_data.pixeldrain.api_key @@ -161,7 +161,7 @@ async def _process_response( task_id = media_item.task_id if task_id is None: size = (media_item.filesize + resume_point) if media_item.filesize is not None else None - task_id = self.manager.progress_manager.file_progress.add_task( + task_id = self.manager.progress_manager.file_progress.new_task( domain=domain, filename=media_item.filename, expected_size=size ) media_item.set_task_id(task_id) diff --git a/cyberdrop_dl/clients/scraper_client.py b/cyberdrop_dl/clients/scraper_client.py index 90d07692b..eb9624700 100644 --- a/cyberdrop_dl/clients/scraper_client.py +++ b/cyberdrop_dl/clients/scraper_client.py @@ -29,7 +29,7 @@ class ScraperClient: def __init__(self, client_manager: ClientManager) -> None: self.client_manager = client_manager - self._save_pages_html = client_manager.manager.config_manager.settings_data.files.save_pages_html + self._save_pages_html = client_manager.config.get().files.save_pages_html self._pages_folder = self.client_manager.manager.path_manager.pages_folder min_html_file_path_len = len(str(self._pages_folder)) + len(constants.STARTUP_TIME_STR) + 10 self._max_html_stem_len = 245 - min_html_file_path_len diff --git a/cyberdrop_dl/crawlers/coomer.py b/cyberdrop_dl/crawlers/coomer.py index eae9fe8c9..cb8285f4d 100644 --- a/cyberdrop_dl/crawlers/coomer.py +++ b/cyberdrop_dl/crawlers/coomer.py @@ -2,6 +2,7 @@ from typing import ClassVar +from cyberdrop_dl import config from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL from .kemono import KemonoBaseCrawler @@ -16,4 +17,4 @@ class CoomerCrawler(KemonoBaseCrawler): @property def session_cookie(self) -> str: - return self.manager.config_manager.authentication_data.coomer.session + return config.get().auth.coomer.session diff --git a/cyberdrop_dl/crawlers/crawler.py b/cyberdrop_dl/crawlers/crawler.py index a00dad930..b9d6b1e31 100644 --- a/cyberdrop_dl/crawlers/crawler.py +++ b/cyberdrop_dl/crawlers/crawler.py @@ -268,7 +268,7 @@ def allow_no_extension(self) -> bool: @property def deep_scrape(self) -> bool: - return self.manager.config_manager.deep_scrape + return config.get().runtime_options.deep_scrape def _init_downloader(self) -> Downloader: self.downloader = dl = Downloader(self.manager, self.DOMAIN) @@ -367,7 +367,7 @@ def raise_exc(self, scrape_item: ScrapeItem, exc: type[Exception] | Exception | def new_task_id(self, url: AbsoluteHttpURL) -> Generator[TaskID]: """Creates a new task_id (shows the URL in the UI and logs)""" log(f"Scraping [{self.FOLDER_DOMAIN}]: {url}", 20) - task_id = self.manager.progress_manager.scraping_progress.add_task(url) + task_id = self.manager.progress_manager.scraping_progress.new_task(url) try: yield task_id finally: diff --git a/cyberdrop_dl/crawlers/kemono.py b/cyberdrop_dl/crawlers/kemono.py index f1e5db02a..d8d0dde9d 100644 --- a/cyberdrop_dl/crawlers/kemono.py +++ b/cyberdrop_dl/crawlers/kemono.py @@ -619,7 +619,7 @@ class KemonoCrawler(KemonoBaseCrawler): @property def session_cookie(self) -> str: - return self.manager.config_manager.authentication_data.kemono.session + return config.get().auth.kemono.session def _thumbnail_to_src(og_url: AbsoluteHttpURL) -> AbsoluteHttpURL: diff --git a/cyberdrop_dl/crawlers/rumble.py b/cyberdrop_dl/crawlers/rumble.py index 2b8fee7ea..ad782334a 100644 --- a/cyberdrop_dl/crawlers/rumble.py +++ b/cyberdrop_dl/crawlers/rumble.py @@ -42,7 +42,7 @@ class Format(NamedTuple): is_single_file: bool # for formats with the same resolution, give priority to non hls bitrate: int size: int - type: FormatType # On formats where everything else is the same, choose mp4 over webm + type: FormatType # On formats where everything else is the same, choose mp4 over webm url: AbsoluteHttpURL m3u8: m3u8.RenditionGroup | None = None diff --git a/cyberdrop_dl/director.py b/cyberdrop_dl/director.py index 2540a2438..6a76c99b8 100644 --- a/cyberdrop_dl/director.py +++ b/cyberdrop_dl/director.py @@ -10,13 +10,10 @@ from pathlib import Path from typing import TYPE_CHECKING, ParamSpec, TypeVar -from pydantic import ValidationError - -from cyberdrop_dl import constants, env +from cyberdrop_dl import config, constants, env from cyberdrop_dl.dependencies import browser_cookie3 from cyberdrop_dl.managers import Manager from cyberdrop_dl.scraper.scrape_mapper import ScrapeMapper -from cyberdrop_dl.ui.program_ui import ProgramUI from cyberdrop_dl.utils.apprise import send_apprise_notifications from cyberdrop_dl.utils.logger import ( LogHandler, @@ -29,7 +26,6 @@ from cyberdrop_dl.utils.updates import check_latest_pypi from cyberdrop_dl.utils.utilities import check_partials_and_empty_folders from cyberdrop_dl.utils.webhook import send_webhook_message -from cyberdrop_dl.utils.yaml import format_validation_error if TYPE_CHECKING: from collections.abc import Callable, Coroutine, Sequence @@ -122,18 +118,18 @@ async def _runtime(manager: Manager) -> None: async def _post_runtime(manager: Manager) -> None: """Actions to complete after main runtime, and before ui shutdown.""" log_spacer(20, log_to_console=False) - msg = f"Running Post-Download Processes For Config: {manager.config_manager.loaded_config}" + msg = "Running Post-Download Processes" log_with_color(msg, "green", 20) await manager.hash_manager.hash_client.cleanup_dupes_after_download() - if manager.config_manager.settings_data.sorting.sort_downloads and not manager.parsed_args.cli_only_args.retry_any: + if config.get().sorting.sort_downloads and not manager.parsed_args.cli_only_args.retry_any: sorter = Sorter(manager) await sorter.run() check_partials_and_empty_folders(manager) - if manager.config_manager.settings_data.runtime_options.update_last_forum_post: + if config.get().runtime_options.update_last_forum_post: await manager.log_manager.update_last_forum_post() @@ -143,7 +139,7 @@ def _setup_debug_logger(manager: Manager) -> Path | None: debug_logger = logging.getLogger("cyberdrop_dl_debug") log_level = 10 - settings_data = manager.config_manager.settings_data + settings_data = config.get() settings_data.runtime_options.log_level = log_level debug_logger.setLevel(log_level) debug_log_file_path = Path(__file__).parents[1] / "cyberdrop_dl_debug.log" @@ -172,7 +168,7 @@ def _setup_debug_logger(manager: Manager) -> Path | None: def _setup_main_logger(manager: Manager) -> None: logger = logging.getLogger("cyberdrop_dl") file_io = manager.path_manager.main_log.open("w", encoding="utf8") - settings_data = manager.config_manager.settings_data + settings_data = config.get() log_level = settings_data.runtime_options.log_level logger.setLevel(log_level) @@ -193,22 +189,7 @@ def _setup_manager(args: Sequence[str] | None = None) -> Manager: After this function returns, the manager will be ready to use and scraping / downloading can begin. """ - manager = Manager(args) - try: - manager.startup() - - if not manager.parsed_args.cli_only_args.download: - ProgramUI(manager) - - except ValidationError as e: - file = { - "GlobalSettings": manager.config_manager.global_settings, - "ConfigSettings": manager.config_manager.settings, - "AuthSettings": manager.config_manager.authentication_settings, - }.get(e.title) - - format_validation_error(e, file=file) - sys.exit(_C.ERROR) + manager = Manager() return manager diff --git a/cyberdrop_dl/downloader/downloader.py b/cyberdrop_dl/downloader/downloader.py index df4c0a820..b6bbb7f65 100644 --- a/cyberdrop_dl/downloader/downloader.py +++ b/cyberdrop_dl/downloader/downloader.py @@ -130,7 +130,7 @@ def __init__(self, manager: Manager, domain: str) -> None: self._additional_headers = {} self._current_attempt_filesize: dict[str, int] = {} self._file_lock_vault = manager.client_manager.file_locks - self._ignore_history = manager.config_manager.settings_data.runtime_options.ignore_history + self._ignore_history = config.get().runtime_options.ignore_history self._semaphore: asyncio.Semaphore = field(init=False) @property @@ -208,7 +208,7 @@ async def _start_hls_download(self, media_item: MediaItem, m3u8_group: Rendition # TODO: compute approx size for UI from the m3u8 info media_item.download_filename = media_item.complete_file.name await self.manager.db_manager.history_table.add_download_filename(self.domain, media_item) - task_id = self.manager.progress_manager.file_progress.add_task(domain=self.domain, filename=media_item.filename) + task_id = self.manager.progress_manager.file_progress.new_task(domain=self.domain, filename=media_item.filename) media_item.set_task_id(task_id) video, audio, _subs = await self._download_rendition_group(media_item, m3u8_group) if not audio: diff --git a/cyberdrop_dl/managers/__init__.py b/cyberdrop_dl/managers/__init__.py index 5bfc00dd5..df8f987c4 100644 --- a/cyberdrop_dl/managers/__init__.py +++ b/cyberdrop_dl/managers/__init__.py @@ -15,8 +15,8 @@ from cyberdrop_dl.managers.live_manager import LiveManager from cyberdrop_dl.managers.log_manager import LogManager from cyberdrop_dl.managers.path_manager import PathManager -from cyberdrop_dl.managers.progress_manager import ProgressManager from cyberdrop_dl.managers.storage_manager import StorageManager +from cyberdrop_dl.progress import ProgressManager from cyberdrop_dl.utils import ffmpeg from cyberdrop_dl.utils.logger import LogHandler, QueuedLogger from cyberdrop_dl.utils.utilities import close_if_defined, get_system_information @@ -42,7 +42,7 @@ def __init__(self) -> None: self.client_manager: ClientManager = field(init=False) self.storage_manager: StorageManager = field(init=False) - self.progress_manager: ProgressManager = field(init=False) + self.progress_manager = ProgressManager(self) self.live_manager: LiveManager = field(init=False) self.task_group: TaskGroup = field(init=False) @@ -85,8 +85,6 @@ async def async_db_hash_startup(self) -> None: await self.db_manager.startup() self.hash_manager = HashManager(self) self.live_manager = LiveManager(self) - self.progress_manager = ProgressManager(self) - self.progress_manager.startup() async def async_db_close(self) -> None: "Partial shutdown for managers used for hash directory scanner" diff --git a/cyberdrop_dl/managers/progress_manager.py b/cyberdrop_dl/managers/progress_manager.py deleted file mode 100644 index c2cf2c6f9..000000000 --- a/cyberdrop_dl/managers/progress_manager.py +++ /dev/null @@ -1,244 +0,0 @@ -from __future__ import annotations - -import time -from contextlib import asynccontextmanager -from dataclasses import field -from datetime import timedelta -from functools import partial -from typing import TYPE_CHECKING - -from pydantic import ByteSize -from rich.columns import Columns -from rich.console import Group -from rich.layout import Layout -from rich.progress import Progress, SpinnerColumn, TaskID -from rich.text import Text -from yarl import URL - -from cyberdrop_dl import __version__ -from cyberdrop_dl.ui.progress.downloads_progress import DownloadsProgress -from cyberdrop_dl.ui.progress.file_progress import FileProgress -from cyberdrop_dl.ui.progress.hash_progress import HashProgress -from cyberdrop_dl.ui.progress.scraping_progress import ScrapingProgress -from cyberdrop_dl.ui.progress.sort_progress import SortProgress -from cyberdrop_dl.ui.progress.statistic_progress import DownloadStatsProgress, ScrapeStatsProgress -from cyberdrop_dl.utils.logger import log, log_spacer, log_with_color - -if TYPE_CHECKING: - from collections.abc import AsyncGenerator - from pathlib import Path - - from rich.console import RenderableType - - from cyberdrop_dl.managers import Manager - from cyberdrop_dl.ui.progress.statistic_progress import UiFailureTotal - -log_cyan = partial(log_with_color, style="cyan", level=20) -log_yellow = partial(log_with_color, style="yellow", level=20) -log_green = partial(log_with_color, style="green", level=20) -log_red = partial(log_with_color, style="red", level=20) - - -class ProgressManager: - def __init__(self, manager: Manager) -> None: - # File Download Bars - self.manager = manager - ui_options = manager.config_manager.global_settings_data.ui_options - self.portrait = manager.parsed_args.cli_only_args.portrait - self.file_progress = FileProgress(manager) - self.scraping_progress = ScrapingProgress(manager) - - # Overall Progress Bars & Stats - self.download_progress = DownloadsProgress(manager) - self.download_stats_progress = DownloadStatsProgress() - self.scrape_stats_progress = ScrapeStatsProgress() - self.hash_progress = HashProgress(manager) - self.sort_progress = SortProgress(1, manager) - - self.ui_refresh_rate = ui_options.refresh_rate - - self.hash_remove_layout: RenderableType = field(init=False) - self.hash_layout: RenderableType = field(init=False) - self.sort_layout: RenderableType = field(init=False) - self.status_message: Progress = field(init=False) - self.status_message_task_id: TaskID = field(init=False) - - @asynccontextmanager - async def show_status_msg(self, msg: str | None) -> AsyncGenerator[None]: - try: - self.status_message.update(self.status_message_task_id, description=msg, visible=bool(msg)) - yield - finally: - self.status_message.update(self.status_message_task_id, visible=False) - - def pause_or_resume(self): - if self.manager.states.RUNNING.is_set(): - self.pause() - else: - self.resume() - - def pause(self, msg: str = ""): - self.manager.states.RUNNING.clear() - suffix = f" [{msg}]" if msg else "" - self.activity.update(self.activity_task_id, description=f"Paused{suffix}") - - def resume(self): - self.manager.states.RUNNING.set() - self.activity.update(self.activity_task_id, description="Running Cyberdrop-DL") - - def startup(self) -> None: - """Startup process for the progress manager.""" - spinner = SpinnerColumn(style="green", spinner_name="dots") - activity = Progress(spinner, "[progress.description]{task.description}") - self.status_message = Progress(spinner, "[progress.description]{task.description}") - - self.status_message_task_id = self.status_message.add_task("", total=100, completed=0, visible=False) - self.activity_task_id = activity.add_task(f"Running Cyberdrop-DL: v{__version__}", total=100, completed=0) - self.activity = activity - - simple_layout = Group(activity, self.download_progress.simple_progress) - - status_message_columns = Columns([activity, self.status_message], expand=False) - - horizontal_layout = Layout() - vertical_layout = Layout() - - upper_layouts = ( - Layout(renderable=self.download_progress.get_progress(), name="Files", ratio=1, minimum_size=9), - Layout(renderable=self.scrape_stats_progress.get_progress(), name="Scrape Failures", ratio=1), - Layout(renderable=self.download_stats_progress.get_progress(), name="Download Failures", ratio=1), - ) - - lower_layouts = ( - Layout(renderable=self.scraping_progress.get_renderable(), name="Scraping", ratio=20), - Layout(renderable=self.file_progress.get_renderable(), name="Downloads", ratio=20), - Layout(renderable=status_message_columns, name="status_message", ratio=2), - ) - - horizontal_layout.split_column(Layout(name="upper", ratio=20), *lower_layouts) - vertical_layout.split_column(Layout(name="upper", ratio=60), *lower_layouts) - - horizontal_layout["upper"].split_row(*upper_layouts) - vertical_layout["upper"].split_column(*upper_layouts) - - self.horizontal_layout = horizontal_layout - self.vertical_layout = vertical_layout - self.activity_layout = activity - self.simple_layout = simple_layout - self.hash_remove_layout = self.hash_progress.get_removed_progress() - self.hash_layout = self.hash_progress.get_renderable() - self.sort_layout = self.sort_progress.get_renderable() - - @property - def fullscreen_layout(self) -> Layout: - if self.portrait: - return self.vertical_layout - return self.horizontal_layout - - def print_stats(self, start_time: float) -> None: - """Prints the stats of the program.""" - if not self.manager.parsed_args.cli_only_args.print_stats: - return - end_time = time.perf_counter() - runtime = timedelta(seconds=int(end_time - start_time)) - total_data_written = ByteSize(self.manager.storage_manager.total_data_written).human_readable(decimal=True) - - log_spacer(20) - log("Printing Stats...\n", 20) - config_path = self.manager.path_manager.config_folder / self.manager.config_manager.loaded_config - config_path_text = get_console_hyperlink(config_path, text=self.manager.config_manager.loaded_config) - input_file_text = get_input(self.manager) - log_folder_text = get_console_hyperlink(self.manager.path_manager.log_folder) - - log_concat("Run Stats (config: ", config_path_text, ")", style="cyan") - log_concat(" Input File: ", input_file_text, style="yellow") - log_yellow(f" Input URLs: {self.manager.scrape_mapper.count:,}") - log_yellow(f" Input URL Groups: {self.manager.scrape_mapper.group_count:,}") - log_concat(" Log Folder: ", log_folder_text, style="yellow") - log_yellow(f" Total Runtime: {runtime}") - log_yellow(f" Total Downloaded Data: {total_data_written}") - - log_spacer(20, "") - log_cyan("Download Stats:") - log_green(f" Downloaded: {self.download_progress.completed_files:,} files") - log_yellow(f" Skipped (By Config): {self.download_progress.skipped_files:,} files") - log_yellow(f" Skipped (Previously Downloaded): {self.download_progress.previously_completed_files:,} files") - log_red(f" Failed: {self.download_stats_progress.failed_files:,} files") - - log_spacer(20, "") - log_cyan("Unsupported URLs Stats:") - log_yellow(f" Sent to Jdownloader: {self.scrape_stats_progress.sent_to_jdownloader:,}") - log_yellow(f" Skipped: {self.scrape_stats_progress.unsupported_urls_skipped:,}") - - self.print_dedupe_stats() - - log_spacer(20, "") - log_cyan("Sort Stats:") - log_green(f" Audios: {self.sort_progress.audio_count:,}") - log_green(f" Images: {self.sort_progress.image_count:,}") - log_green(f" Videos: {self.sort_progress.video_count:,}") - log_green(f" Other Files: {self.sort_progress.other_count:,}") - - last_padding = log_failures(self.scrape_stats_progress.return_totals(), "Scrape Failures:") - log_failures(self.download_stats_progress.return_totals(), "Download Failures:", last_padding) - - def print_dedupe_stats(self) -> None: - log_spacer(20, "") - log_cyan("Dupe Stats:") - log_yellow(f" Newly Hashed: {self.hash_progress.hashed_files:,} files") - log_yellow(f" Previously Hashed: {self.hash_progress.prev_hashed_files:,} files") - log_yellow(f" Removed (Downloads): {self.hash_progress.removed_files:,} files") - - -def log_failures(failures: list[UiFailureTotal], title: str = "Failures:", last_padding: int = 0) -> int: - log_spacer(20, "") - log_cyan(title) - if not failures: - log_green(" None") - return 0 - error_padding = last_padding - error_codes = [f.error_code for f in failures if f.error_code is not None] - if error_codes: - error_padding = max(len(str(max(error_codes))), error_padding) - for f in failures: - error = f.error_code if f.error_code is not None else "" - log_red(f" {error:>{error_padding}}{' ' if error_padding else ''}{f.msg}: {f.total:,}") - return error_padding - - -def get_input(manager: Manager) -> Text | str: - if manager.parsed_args.cli_only_args.retry_all: - return "--retry-all" - if manager.parsed_args.cli_only_args.retry_failed: - return "--retry-failed" - if manager.parsed_args.cli_only_args.retry_maintenance: - return "--retry-maintenance" - if manager.scrape_mapper.using_input_file: - return get_console_hyperlink(manager.path_manager.input_file) - return "--links (CLI args)" - - -def get_console_hyperlink(file_path: Path, text: str = "") -> Text: - full_path = file_path - show_text = text or full_path - file_url = URL(full_path.as_posix()).with_scheme("file") - return Text(str(show_text), style=f"link {file_url}") - - -def concat_as_text(*text_or_str, style: str = "") -> Text: - result = Text() - for elem in text_or_str: - if isinstance(elem, Text): - text = elem - if style and text.style != style: - text.stylize(f"{style} {text.style}") - else: - text = Text(elem, style=style) - - result.append(text) - return result - - -def log_concat(*text_or_str, style: str = "", **kwargs) -> None: - text = concat_as_text(*text_or_str, style=style) - log_with_color(text, style, **kwargs) diff --git a/cyberdrop_dl/progress/__init__.py b/cyberdrop_dl/progress/__init__.py new file mode 100644 index 000000000..289e2d4a2 --- /dev/null +++ b/cyberdrop_dl/progress/__init__.py @@ -0,0 +1,191 @@ +from __future__ import annotations + +import dataclasses +import logging +import time +from contextlib import asynccontextmanager +from datetime import timedelta +from typing import TYPE_CHECKING, Self + +from pydantic import ByteSize +from rich.columns import Columns +from rich.console import Group +from rich.layout import Layout +from rich.progress import Progress, SpinnerColumn +from rich.text import Text +from yarl import URL + +from cyberdrop_dl import __version__, config +from cyberdrop_dl.progress.downloads_progress import DownloadsProgress +from cyberdrop_dl.progress.hash_progress import HashProgress +from cyberdrop_dl.progress.panels import DownloadsPanel, ScrapingPanel +from cyberdrop_dl.progress.sorting import SortingPanel +from cyberdrop_dl.progress.statistic_progress import DownloadStatsProgress, ScrapeStatsProgress +from cyberdrop_dl.utils.logger import log_spacer + +if TYPE_CHECKING: + from collections.abc import AsyncGenerator + from pathlib import Path + + from cyberdrop_dl.managers import Manager + from cyberdrop_dl.progress.statistic_progress import UiFailureTotal + + +spinner = SpinnerColumn(style="green", spinner_name="dots") +logger = logging.getLogger(__name__) + + +class StatusMessage: + def __init__(self) -> None: + self.progress: Progress = Progress(spinner, "[progress.description]{task.description}") + self._task_id = self.progress.add_task("", total=100, completed=0, visible=False) + + def update(self, description: str | None = None) -> None: + self.progress.update(self._task_id, description=description, visible=bool(description)) + + @property + def msg(self) -> str: + return self.progress._tasks[self._task_id].description + + def __repr__(self) -> str: + return f"{type(self).__name__}(msg={self.msg!r})" + + +@dataclasses.dataclass(slots=True) +class UILayouts: + horizontal: Layout + vertical: Layout + simple: Group + + @classmethod + def build(cls, progress: ProgressManager) -> Self: + horizontal = Layout() + vertical = Layout() + + activity = Progress(spinner, "[progress.description]{task.description}") + _ = activity.add_task(f"Running Cyberdrop-DL: v{__version__}", total=100, completed=0) + + status_message_columns = Columns([activity, progress.status.progress]) + + upper_layouts = ( + Layout(renderable=progress.download_progress.get_progress(), name="Files", ratio=1, minimum_size=9), + Layout(renderable=progress.scrape_stats_progress.get_progress(), name="Scrape Failures", ratio=1), + Layout(renderable=progress.download_stats_progress.get_progress(), name="Download Failures", ratio=1), + ) + + lower_layouts = ( + Layout(renderable=progress.scraping_progress.get_renderable(), name="Scraping", ratio=20), + Layout(renderable=progress.file_progress.get_renderable(), name="Downloads", ratio=20), + Layout(renderable=status_message_columns, name="status_message", ratio=2), + ) + + horizontal.split_column(Layout(name="upper", ratio=20), *lower_layouts) + vertical.split_column(Layout(name="upper", ratio=60), *lower_layouts) + + horizontal["upper"].split_row(*upper_layouts) + vertical["upper"].split_column(*upper_layouts) + + simple = Group(activity, progress.download_progress.simple_progress) + return cls(horizontal, vertical, simple) + + +class ProgressManager: + def __init__(self, manager: Manager) -> None: + self.manager = manager + + self.portrait = True + self.file_progress = DownloadsPanel() + self.scraping_progress = ScrapingPanel() + self.status = StatusMessage() + + self.download_progress: DownloadsProgress = DownloadsProgress(manager) + self.download_stats_progress: DownloadStatsProgress = DownloadStatsProgress() + self.scrape_stats_progress: ScrapeStatsProgress = ScrapeStatsProgress() + self.hash_progress: HashProgress = HashProgress(manager) + self.sorting: SortingPanel = SortingPanel(1) + + self.layouts = UILayouts.build(self) + self.hash_remove_layout = self.hash_progress.get_removed_progress() + self.hash_layout = self.hash_progress.get_renderable() + self.sort_layout = self.sorting.get_renderable() + + @asynccontextmanager + async def show_status_msg(self, msg: str | None) -> AsyncGenerator[None]: + try: + self.status.update(msg) + yield + finally: + self.status.update() + + @property + def layout(self) -> Layout: + if self.portrait: + return self.layouts.vertical + return self.layouts.horizontal + + def print_stats(self, start_time: float) -> None: + """Prints the stats of the program.""" + # if not self.manager.parsed_args.cli_only_args.print_stats: + # return + end_time = time.perf_counter() + runtime = timedelta(seconds=int(end_time - start_time)) + total_data_written = ByteSize(self.manager.storage_manager.total_data_written).human_readable(decimal=True) + + log_spacer(20) + logger.info("Printing Stats...\n") + logger.info("Run Stats") + logger.info(f" Input File: {config.get().source}") + logger.info(f" Input URLs: {self.manager.scrape_mapper.count:,}") + logger.info(f" Input URL Groups: {self.manager.scrape_mapper.group_count:,}") + # logger.info(f" Log Folder: {log_folder_text}") + logger.info(f" Total Runtime: {runtime}") + logger.info(f" Total Downloaded Data: {total_data_written}") + + logger.info("Download Stats:") + logger.info(f" Downloaded: {self.download_progress.completed_files:,} files") + logger.info(f" Skipped (By Config): {self.download_progress.skipped_files:,} files") + logger.info(f" Skipped (Previously Downloaded): {self.download_progress.previously_completed_files:,} files") + logger.info(f" Failed: {self.download_stats_progress.failed_files:,} files") + + logger.info("Unsupported URLs Stats:") + logger.info(f" Sent to Jdownloader: {self.scrape_stats_progress.sent_to_jdownloader:,}") + logger.info(f" Skipped: {self.scrape_stats_progress.unsupported_urls_skipped:,}") + + self.print_dedupe_stats() + + logger.info("Sort Stats:") + logger.info(f" Audios: {self.sorting.audio_count:,}") + logger.info(f" Images: {self.sorting.image_count:,}") + logger.info(f" Videos: {self.sorting.video_count:,}") + logger.info(f" Other Files: {self.sorting.other_count:,}") + + last_padding = log_failures(self.scrape_stats_progress.return_totals(), "Scrape Failures:") + log_failures(self.download_stats_progress.return_totals(), "Download Failures:", last_padding) + + def print_dedupe_stats(self) -> None: + logger.info("Dupe Stats:") + logger.info(f" Newly Hashed: {self.hash_progress.hashed_files:,} files") + logger.info(f" Previously Hashed: {self.hash_progress.prev_hashed_files:,} files") + logger.info(f" Removed (Downloads): {self.hash_progress.removed_files:,} files") + + +def log_failures(failures: list[UiFailureTotal], title: str = "Failures:", last_padding: int = 0) -> int: + logger.info(title) + if not failures: + logger.info(" None") + return 0 + error_padding = last_padding + error_codes = [f.error_code for f in failures if f.error_code is not None] + if error_codes: + error_padding = max(len(str(max(error_codes))), error_padding) + for f in failures: + error = f.error_code if f.error_code is not None else "" + logger.info(f" {error:>{error_padding}}{' ' if error_padding else ''}{f.msg}: {f.total:,}") + return error_padding + + +def _get_console_hyperlink(file_path: Path, text: str = "") -> Text: + full_path = file_path + show_text = text or full_path + file_url = URL(full_path.as_posix()).with_scheme("file") + return Text(str(show_text), style=f"link {file_url}") diff --git a/cyberdrop_dl/ui/progress/downloads_progress.py b/cyberdrop_dl/progress/downloads_progress.py similarity index 100% rename from cyberdrop_dl/ui/progress/downloads_progress.py rename to cyberdrop_dl/progress/downloads_progress.py diff --git a/cyberdrop_dl/ui/progress/hash_progress.py b/cyberdrop_dl/progress/hash_progress.py similarity index 100% rename from cyberdrop_dl/ui/progress/hash_progress.py rename to cyberdrop_dl/progress/hash_progress.py diff --git a/cyberdrop_dl/progress/panels.py b/cyberdrop_dl/progress/panels.py new file mode 100644 index 000000000..e9345a966 --- /dev/null +++ b/cyberdrop_dl/progress/panels.py @@ -0,0 +1,129 @@ +from __future__ import annotations + +from types import MappingProxyType +from typing import TYPE_CHECKING, ClassVar + +from rich.console import Group +from rich.markup import escape +from rich.panel import Panel +from rich.progress import ( + BarColumn, + DownloadColumn, + Progress, + SpinnerColumn, + TaskID, + TimeRemainingColumn, + TransferSpeedColumn, +) + +if TYPE_CHECKING: + from yarl import URL + +_COLOR: str = "plum3" + + +def truncate(s: str, length: int = 40, placeholder: str = "...") -> str: + return f"{s[: length - len(placeholder)]}{placeholder}" if len(s) >= length else s.ljust(length) + + +class OverFlow: + _desc: ClassVar[str] = "[{color}]... and {number:,} other {name}" + + def __init__(self, name: str) -> None: + self.name: str = name + self.progress: Progress = Progress("[progress.description]{task.description}") + self._task_id = self.progress.add_task(self._format(count=0), visible=False) + + def _format(self, count: int) -> str: + return self._desc.format(color=_COLOR, number=count, name=self.name) + + def update(self, count: int) -> None: + self.progress.update(self._task_id, description=self._format(count=count), visible=count > 0) + + +class UIPanel: + title: ClassVar[str] + type_str: ClassVar[str] = "files" + desc_fmt: ClassVar[str] = "[{color}]{description}" + + def __init__(self, progress: Progress, visible_tasks_limit: int) -> None: + self._progress = progress + self._overflow = OverFlow(self.type_str) + self._limit = visible_tasks_limit + self._tasks = MappingProxyType(self._progress._tasks) + + @classmethod + def _clean_task_desc(cls, desc: str) -> str: + return escape(truncate(desc.encode("ascii", "ignore").decode().strip(), length=40)) + + def get_renderable(self) -> Panel: + return Panel( + Group(self._progress, self._overflow.progress), + title=self.title, + border_style="green", + padding=(1, 1), + ) + + def add_task(self, description: str, total: float | None = None) -> TaskID: + task_id = self._progress.add_task( + self.desc_fmt.format(color=_COLOR, description=description), + total=total, + visible=len(self._tasks) < self._limit, + ) + self.redraw() + return task_id + + def remove_task(self, task_id: TaskID) -> None: + self._progress.remove_task(task_id) + self.redraw() + + def redraw(self) -> None: + self._overflow.update(count=len(self._tasks) - self._limit) + + +class ScrapingPanel(UIPanel): + title: ClassVar[str] = "Scraping" + type_str: ClassVar[str] = "URLs" + + def __init__(self) -> None: + progress = Progress(SpinnerColumn(), "[progress.description]{task.description}") + super().__init__(progress, visible_tasks_limit=5) + + def new_task(self, url: URL) -> TaskID: # type: ignore[reportIncompatibleMethodOverride] + return self.add_task(str(url)) + + +class DownloadsPanel(UIPanel): + title: ClassVar[str] = "Downloads" + _base_columns = (SpinnerColumn(), "[progress.description]{task.description}", BarColumn(bar_width=None)) + _horizontal = ( + *_base_columns, + "[progress.percentage]{task.percentage:>6.2f}%", + "━", + DownloadColumn(), + "━", + TransferSpeedColumn(), + "━", + TimeRemainingColumn(), + ) + _vertical = (*_base_columns, DownloadColumn(), "━", TransferSpeedColumn()) + + def __init__(self) -> None: + self.total_data_written: int = 0 + progress = Progress(*self._vertical) if True else Progress(*self._horizontal) + super().__init__(progress, visible_tasks_limit=10) + + def new_task(self, *, domain: str, filename: str, expected_size: int | None = None) -> TaskID: # type: ignore[reportIncompatibleMethodOverride] + description = self._clean_task_desc(filename.split("/")[-1]) + if not True: + description = f"({domain.upper()}) {description}" + + return super().add_task(description, expected_size) + + def advance_file(self, task_id: TaskID, amount: int) -> None: + self.total_data_written += amount + self._progress.advance(task_id, amount) + + def get_speed(self, task_id: TaskID) -> float: + task = self._tasks[task_id] + return task.finished_speed or task.speed or 0 diff --git a/cyberdrop_dl/progress/sorting.py b/cyberdrop_dl/progress/sorting.py new file mode 100644 index 000000000..32ebd6b03 --- /dev/null +++ b/cyberdrop_dl/progress/sorting.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from rich.progress import BarColumn, Progress, SpinnerColumn, TaskID + +from cyberdrop_dl.progress.panels import UIPanel + + +class SortingPanel(UIPanel): + """Class that keeps track of sorted files.""" + + title = "Sorting" + name = "Folders" + + def __init__(self, visible_tasks_limit: int) -> None: + progress = Progress( + SpinnerColumn(), + "[progress.description]{task.description}", + BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>6.2f}%", + "━", + "{task.completed}/{task.total} files", + ) + super().__init__(progress, visible_tasks_limit) + + self.audio_count = self.video_count = self.image_count = self.other_count = 0 + + def new_task(self, folder: str, expected_size: int | None) -> TaskID: + description = self._clean_task_desc(folder) + return super().add_task(description, expected_size) + + def advance_folder(self, task_id: TaskID, amount: int = 1) -> None: + self._progress.advance(task_id, amount) + + def increment_audio(self) -> None: + self.audio_count += 1 + + def increment_video(self) -> None: + self.video_count += 1 + + def increment_image(self) -> None: + self.image_count += 1 + + def increment_other(self) -> None: + self.other_count += 1 diff --git a/cyberdrop_dl/ui/progress/statistic_progress.py b/cyberdrop_dl/progress/statistic_progress.py similarity index 100% rename from cyberdrop_dl/ui/progress/statistic_progress.py rename to cyberdrop_dl/progress/statistic_progress.py diff --git a/cyberdrop_dl/ui/program_ui.py b/cyberdrop_dl/ui/program_ui.py deleted file mode 100644 index 4bfaec497..000000000 --- a/cyberdrop_dl/ui/program_ui.py +++ /dev/null @@ -1,297 +0,0 @@ -from __future__ import annotations - -import asyncio -import sqlite3 -import sys -from functools import wraps -from typing import TYPE_CHECKING, Any, ParamSpec, TypeVar - -from requests import request -from rich.console import Console -from rich.markdown import Markdown -from rich.text import Text - -from cyberdrop_dl import cache -from cyberdrop_dl.clients.hash_client import hash_directory_scanner -from cyberdrop_dl.dependencies import browser_cookie3 -from cyberdrop_dl.ui.prompts import user_prompts -from cyberdrop_dl.ui.prompts.basic_prompts import ask_dir_path, enter_to_continue -from cyberdrop_dl.ui.prompts.defaults import DONE_CHOICE, EXIT_CHOICE -from cyberdrop_dl.utils.cookie_management import clear_cookies -from cyberdrop_dl.utils.sorting import Sorter -from cyberdrop_dl.utils.text_editor import open_in_text_editor -from cyberdrop_dl.utils.updates import check_latest_pypi -from cyberdrop_dl.utils.utilities import clear_term - -if TYPE_CHECKING: - from collections.abc import Callable - from pathlib import Path - - from InquirerPy.base.control import Choice - - from cyberdrop_dl.managers import Manager - -P = ParamSpec("P") -R = TypeVar("R") - -console = Console() -ERROR_PREFIX = Text("ERROR: ", style="bold red") - - -def repeat_until_done(func: Callable[P, R]) -> Callable[P, R]: - @wraps(func) - def wrapper(*args, **kwargs) -> R: - done = False - while not done: - done = func(*args, **kwargs) - return done - - return wrapper - - -class ProgramUI: - def __init__(self, manager: Manager, run: bool = True) -> None: - self.manager = manager - if run: - self.run() - - @staticmethod - def print_error(msg: str, critical: bool = False) -> None: - text = ERROR_PREFIX + msg - console.print(text, style="bold red" if critical else None) - if critical: - sys.exit(1) - enter_to_continue() - - @repeat_until_done - def run(self) -> bool | None: - """Program UI.""" - clear_term() - options_map = { - 1: self._download, - 2: self._retry_failed_download, - 3: self._scan_and_create_hashes, - 4: self._sort_files, - 5: self._edit_urls, - 6: self._change_config, - 7: self._manage_configs, - 8: self._check_updates, - 9: self._view_changelog, - } - - answer = user_prompts.main_prompt(self.manager) - result = self._process_answer(answer, options_map) - return_to_main = result and result != DONE_CHOICE - if return_to_main: - clear_term() - return return_to_main - - def _download(self) -> bool: - """Starts download process.""" - return True - - def _retry_failed_download(self) -> bool: - """Sets retry failed and starts download process.""" - self.manager.parsed_args.cli_only_args.retry_failed = True - return True - - def _scan_and_create_hashes(self) -> None: - """Scans a folder and creates hashes for all of its files.""" - path = ask_dir_path("Select the directory to scan", default=str(self.manager.path_manager.download_folder)) - hash_directory_scanner(self.manager, path) - - def _sort_files(self) -> None: - """Sort files in download folder""" - sorter = Sorter(self.manager) - asyncio.run(sorter.run()) - - def _check_updates(self) -> None: - """Checks Cyberdrop-DL updates.""" - check_latest_pypi(logging="CONSOLE") - enter_to_continue() - - def _change_config(self) -> None: - configs = self.manager.config_manager.get_configs() - selected_config = user_prompts.select_config(configs) - self.manager.config_manager.change_config(selected_config) - if user_prompts.switch_default_config_to(self.manager, selected_config): - self.manager.config_manager.change_default_config(selected_config) - self.manager.config_manager.change_config(selected_config) - - def _view_changelog(self) -> None: - clear_term() - changelog_content = self._get_changelog() - if not changelog_content: - return - with console.pager(links=True): - console.print(Markdown(changelog_content, justify="left")) - - @repeat_until_done - def _manage_configs(self) -> Choice | None: - options_map = { - 1: self._change_default_config, - 2: self._create_new_config, - 3: self._delete_config, - 4: self._edit_config, - 5: self._edit_auth_config, - 6: self._edit_global_config, - 7: self._edit_auto_cookies_extration, - 8: self._import_cookies_now, - 9: self._clear_cookies, - 10: self._clear_cache, - } - answer = user_prompts.manage_configs(self.manager) - return self._process_answer(answer, options_map) - - def _clear_cookies(self) -> None: - domains, _ = user_prompts.domains_prompt(domain_message="Select site(s) to clear cookies for:") - clear_cookies(self.manager, domains) - console.print("Finished clearing cookies", style="green") - enter_to_continue() - - def _clear_cache(self) -> None: - domains, _ = user_prompts.domains_prompt(domain_message="Select site(s) to clear cache for:") - if not domains: - console.print("No domains selected", style="red") - enter_to_continue() - return - urls = user_prompts.filter_cache_urls(self.manager, domains) - for url in urls: - asyncio.run(cache.get().request_cache.delete_url(url)) - - console.print("\nExecuting database vacuum. This may take several minutes, please wait...") - try: - vacuum_database(self.manager.path_manager.cache_db) - except sqlite3.Error as e: - return self.print_error(f"Unable to clean request database. Database may be corrupted : {e!s}") - console.print("Finished clearing the cache", style="green") - enter_to_continue() - - def _edit_auth_config(self) -> None: - config_file = self.manager.config_manager.authentication_settings - self._open_in_text_editor(config_file) - - def _edit_global_config(self) -> None: - config_file = self.manager.config_manager.global_settings - self._open_in_text_editor(config_file) - - def _edit_config(self) -> None: - config_file = self.manager.config_manager.settings - self._open_in_text_editor(config_file) - - def _create_new_config(self) -> None: - config_name = user_prompts.create_new_config(self.manager) - if not config_name: - return - if user_prompts.switch_default_config_to(self.manager, config_name): - self.manager.config_manager.change_default_config(config_name) - self.manager.config_manager.change_config(config_name) - config_file = self.manager.config_manager.settings - self._open_in_text_editor(config_file) - - def _edit_urls(self) -> None: - self._open_in_text_editor(self.manager.path_manager.input_file, reload_config=False) - - def _change_default_config(self) -> None: - configs = self.manager.config_manager.get_configs() - selected_config = user_prompts.select_config(configs) - self.manager.config_manager.change_default_config(selected_config) - if user_prompts.activate_config(self.manager, selected_config) is not None: - self.manager.config_manager.change_config(selected_config) - - def _delete_config(self) -> None: - configs = self.manager.config_manager.get_configs() - if len(configs) == 1: - self.print_error("There is only one config") - return - - selected_config = user_prompts.select_config(configs) - if selected_config == self.manager.config_manager.loaded_config: - self.print_error("You cannot delete the currently active config") - return - - if cache.get().get("default_config") == selected_config: - self.print_error("You cannot delete the default config") - return - - self.manager.config_manager.delete_config(selected_config) - if user_prompts.switch_default_config(): - self._change_default_config() - - def _edit_auto_cookies_extration(self) -> None: - user_prompts.auto_cookie_extraction(self.manager) - - def _import_cookies_now(self) -> None: - try: - user_prompts.extract_cookies(self.manager) - except browser_cookie3.BrowserCookieError as e: - self.print_error(str(e)) - - def _place_holder(self) -> None: - self.print_error("Option temporarily disabled on this version") - - def _open_in_text_editor(self, file_path: Path, *, reload_config: bool = True): - try: - open_in_text_editor(file_path) - except ValueError as e: - self.print_error(str(e)) - return - if reload_config: - console.print("Revalidating config, please wait..") - self.manager.config_manager.change_config(self.manager.config_manager.loaded_config) - - def _process_answer(self, answer: Any, options_map: dict) -> Choice | None: - """Checks prompt answer and executes corresponding function.""" - if answer == EXIT_CHOICE.value: - asyncio.run(cache.get().close()) - sys.exit(0) - if answer == DONE_CHOICE.value: - return DONE_CHOICE - - function_to_call = options_map.get(answer) - if not function_to_call: - self.print_error("Something went wrong. Please report it to the developer", critical=True) - sys.exit(1) - - return function_to_call() - - def _get_changelog(self) -> str | None: - """Get latest changelog file from github. Returns its content.""" - path = self.manager.path_manager.config_folder.parent / "CHANGELOG.md" - url = "https://raw.githubusercontent.com/NTFSvolume/cdl/refs/heads/master/CHANGELOG.md" - _, latest_version = check_latest_pypi(logging="OFF") - if not latest_version: - self.print_error("UNABLE TO GET LATEST VERSION INFORMATION") - return None - - name = f"{path.stem}_{latest_version}{path.suffix}" - changelog = path.with_name(name) - if not changelog.is_file(): - changelog_pattern = f"{path.stem}*{path.suffix}" - for old_changelog in path.parent.glob(changelog_pattern): - old_changelog.unlink() - try: - with request("GET", url, timeout=15) as response: - response.raise_for_status() - with changelog.open("wb") as f: - f.write(response.content) - except Exception: - self.print_error("UNABLE TO GET CHANGELOG INFORMATION") - return None - - lines = changelog.read_text(encoding="utf8").splitlines() - # remove keep_a_changelog disclaimer - return "\n".join(lines[:21] + lines[25:]) - - -def vacuum_database(db_path: Path) -> None: - if not db_path.is_file(): - return - conn = None - try: - conn = sqlite3.connect(db_path) - conn.execute("VACUUM") - conn.commit() - finally: - if conn: - conn.close() diff --git a/cyberdrop_dl/ui/progress/__init__.py b/cyberdrop_dl/ui/progress/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/cyberdrop_dl/ui/progress/deque_progress.py b/cyberdrop_dl/ui/progress/deque_progress.py deleted file mode 100644 index ec1645501..000000000 --- a/cyberdrop_dl/ui/progress/deque_progress.py +++ /dev/null @@ -1,118 +0,0 @@ -from __future__ import annotations - -from abc import ABC, abstractmethod -from collections import deque -from itertools import islice -from typing import TYPE_CHECKING - -from rich.console import Group -from rich.panel import Panel -from rich.progress import Progress, TaskID - -if TYPE_CHECKING: - from collections.abc import Sequence - - -def adjust_title(s: str, length: int = 40, placeholder: str = "...") -> str: - """Collapse and truncate or pad the given string to fit in the given length.""" - return f"{s[: length - len(placeholder)]}{placeholder}" if len(s) >= length else s.ljust(length) - - -class DequeProgress(ABC): - _progress: Progress - type_str: str = "files" - color = "plum3" - progress_str = "[{color}]{description}" - overflow_str = "[{color}]... and {number:,} other {type_str}" - queue_str = "[{color}]... and {number:,} {type_str} in {title} queue" - - def __init__(self, title: str, visible_tasks_limit: int) -> None: - self.title = title - self.title_lower = title.lower() - self._overflow = Progress("[progress.description]{task.description}") - self._queue = Progress("[progress.description]{task.description}") - self._progress_group = Group(self._progress, self._overflow, self._queue) - - self._overflow_task_id = self._overflow.add_task( - self.overflow_str.format(color=self.color, number=0, type_str=self.type_str), - visible=False, - ) - self._queue_task_id = self._queue.add_task( - self.queue_str.format(color=self.color, number=0, type_str=self.type_str, title=self.title_lower), - visible=False, - ) - self._tasks: deque[TaskID] = deque() - self._tasks_visibility_limit = visible_tasks_limit - - @abstractmethod - def get_queue_length(self) -> int: ... - - @property - def visible_tasks(self) -> Sequence[TaskID]: - if len(self._tasks) > self._tasks_visibility_limit: - return [self._tasks[i] for i in range(self._tasks_visibility_limit)] - return self._tasks - - @property - def invisible_tasks(self) -> Sequence[TaskID]: - return list(islice(self._tasks, self._tasks_visibility_limit, None)) - - @property - def invisible_tasks_len(self) -> int: - """Faster to compute than `len(self.invisible_tasks)`""" - return max(0, len(self._tasks) - self._tasks_visibility_limit) - - def has_visible_capacity(self) -> bool: - return len(self._tasks) < self._tasks_visibility_limit - - def get_renderable(self) -> Panel: - """Returns the progress bar.""" - return Panel(self._progress_group, title=self.title, border_style="green", padding=(1, 1)) - - def add_task(self, description: str, total: float | None = None) -> TaskID: - """Adds a new task to the progress bar.""" - task_id = self._progress.add_task( - self.progress_str.format(color=self.color, description=description), - total=total, - visible=self.has_visible_capacity(), - ) - self._tasks.append(task_id) - self.redraw() - return task_id - - def remove_task(self, task_id: TaskID) -> None: - """Removes a task from the progress bar.""" - if task_id not in self._tasks: - msg = "Task ID not found" - raise ValueError(msg) - - self._tasks.remove(task_id) - self._progress.remove_task(task_id) - self.redraw() - - def redraw(self) -> None: - """Redraws the progress bar.""" - for task in self.visible_tasks: - self._progress.update(task, visible=True) - - invisible_tasks_len = self.invisible_tasks_len - - self._overflow.update( - self._overflow_task_id, - description=self.overflow_str.format( - color=self.color, - number=invisible_tasks_len, - type_str=self.type_str, - ), - visible=invisible_tasks_len > 0, - ) - - queue_length = self.get_queue_length() - - self._queue.update( - self._queue_task_id, - description=self.queue_str.format( - color=self.color, number=queue_length, type_str=self.type_str, title=self.title_lower - ), - visible=queue_length > 0, - ) diff --git a/cyberdrop_dl/ui/progress/file_progress.py b/cyberdrop_dl/ui/progress/file_progress.py deleted file mode 100644 index 75ca945ff..000000000 --- a/cyberdrop_dl/ui/progress/file_progress.py +++ /dev/null @@ -1,78 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from rich.markup import escape -from rich.progress import ( - BarColumn, - DownloadColumn, - Progress, - SpinnerColumn, - TaskID, - TimeRemainingColumn, - TransferSpeedColumn, -) - -from cyberdrop_dl.ui.progress.deque_progress import DequeProgress, adjust_title - -if TYPE_CHECKING: - from cyberdrop_dl.managers import Manager - - -class FileProgress(DequeProgress): - """Class that manages the download progress of individual files.""" - - def __init__(self, manager: Manager) -> None: - self.manager = manager - progress_colums = (SpinnerColumn(), "[progress.description]{task.description}", BarColumn(bar_width=None)) - visible_tasks_limit: int = 10 - horizontal_columns = ( - *progress_colums, - "[progress.percentage]{task.percentage:>6.2f}%", - "━", - DownloadColumn(), - "━", - TransferSpeedColumn(), - "━", - TimeRemainingColumn(), - ) - vertical_columns = (*progress_colums, DownloadColumn(), "━", TransferSpeedColumn()) - use_columns = horizontal_columns - if manager.parsed_args.cli_only_args.portrait: - use_columns = vertical_columns - self._progress = Progress(*use_columns) - super().__init__("Downloads", visible_tasks_limit) - - def get_queue_length(self) -> int: - """Returns the number of tasks in the downloader queue.""" - total = 0 - unique_crawler_ids = set() - for crawler in self.manager.scrape_mapper.existing_crawlers.values(): - crawler_id = id(crawler) # Only count each instance of the crawler once - if crawler_id in unique_crawler_ids: - continue - unique_crawler_ids.add(crawler_id) - total += getattr(crawler.downloader, "waiting_items", 0) - - return total - - def add_task(self, *, domain: str, filename: str, expected_size: int | None = None) -> TaskID: # type: ignore[reportIncompatibleMethodOverride] - """Adds a new task to the progress bar.""" - filename = filename.split("/")[-1].encode("ascii", "ignore").decode().strip() - description = escape(adjust_title(filename, length=40)) - if not self.manager.progress_manager.portrait: - description = f"({domain.upper()}) {description}" - return super().add_task(description, expected_size) - - def advance_file(self, task_id: TaskID, amount: int) -> None: - """Advances the progress of the given task by the given amount.""" - self.manager.storage_manager.total_data_written += amount - self._progress.advance(task_id, amount) - - def get_speed(self, task_id: TaskID) -> float: - if task_id not in self._tasks: - msg = "Task ID not found" - raise ValueError(msg) - - task = self._progress._tasks[task_id] - return task.finished_speed or task.speed or 0 diff --git a/cyberdrop_dl/ui/progress/scraping_progress.py b/cyberdrop_dl/ui/progress/scraping_progress.py deleted file mode 100644 index 3efd44251..000000000 --- a/cyberdrop_dl/ui/progress/scraping_progress.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from rich.progress import Progress, SpinnerColumn, TaskID - -from cyberdrop_dl.ui.progress.deque_progress import DequeProgress - -if TYPE_CHECKING: - from yarl import URL - - from cyberdrop_dl.managers import Manager - - -class ScrapingProgress(DequeProgress): - """Class that manages the download progress of individual files.""" - - type_str = "URLs" - - def __init__(self, manager: Manager) -> None: - self.manager = manager - self._progress = Progress(SpinnerColumn(), "[progress.description]{task.description}") - visible_tasks_limit: int = 5 - super().__init__("Scraping", visible_tasks_limit) - - def get_queue_length(self) -> int: - """Returns the number of tasks in the scraper queue.""" - total = 0 - unique_crawler_ids = set() - for crawler in self.manager.scrape_mapper.existing_crawlers.values(): - crawler_id = id(crawler) # Only count each instance of the crawler once - if crawler_id in unique_crawler_ids: - continue - unique_crawler_ids.add(crawler_id) - total += crawler.waiting_items - - return total - - def redraw(self, passed: bool = False) -> None: - super().redraw() - if not passed: - self.manager.progress_manager.file_progress.redraw() - - def add_task(self, url: URL) -> TaskID: # type: ignore[reportIncompatibleMethodOverride] - """Adds a new task to the progress bar.""" - return super().add_task(str(url)) diff --git a/cyberdrop_dl/ui/progress/sort_progress.py b/cyberdrop_dl/ui/progress/sort_progress.py deleted file mode 100644 index ca05f92bc..000000000 --- a/cyberdrop_dl/ui/progress/sort_progress.py +++ /dev/null @@ -1,74 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from rich.markup import escape -from rich.panel import Panel -from rich.progress import BarColumn, Progress, SpinnerColumn, TaskID - -from cyberdrop_dl.ui.progress.deque_progress import DequeProgress, adjust_title - -if TYPE_CHECKING: - from cyberdrop_dl.managers import Manager - - -class SortProgress(DequeProgress): - """Class that keeps track of sorted files.""" - - type_str = "Folders" - - def __init__(self, visible_tasks_limit: int, manager: Manager) -> None: - """Sorter to track the progress of folders being sorted. - - Should work similar to the file_progress but for folders, with a percentage and progress bar for the files within the folders""" - self.manager = manager - self._progress = Progress( - SpinnerColumn(), - "[progress.description]{task.description}", - BarColumn(bar_width=None), - "[progress.percentage]{task.percentage:>6.2f}%", - "━", - "{task.completed}/{task.total} files", - ) - super().__init__("Sort", visible_tasks_limit) - - # counts - self.queue_length = self.audio_count = self.video_count = self.image_count = self.other_count = 0 - - def get_queue_length(self) -> int: - return self.queue_length - - def get_renderable(self) -> Panel: - """Returns the progress bar.""" - return Panel( - self._progress_group, - title=f"Sorting Downloads ━ Config: {self.manager.config_manager.loaded_config}", - border_style="green", - padding=(1, 1), - ) - - def set_queue_length(self, length: int) -> None: - self.queue_length = length - - def add_task(self, folder: str, expected_size: int | None) -> TaskID: - """Adds a new task to the progress bar.""" - # description = f'Sorting {folder}' - description = folder.encode("ascii", "ignore").decode().strip() - description = escape(adjust_title(description)) - return super().add_task(description, expected_size) - - def advance_folder(self, task_id: TaskID, amount: int = 1) -> None: - """Advances the progress of the given task by the given amount.""" - self._progress.advance(task_id, amount) - - def increment_audio(self) -> None: - self.audio_count += 1 - - def increment_video(self) -> None: - self.video_count += 1 - - def increment_image(self) -> None: - self.image_count += 1 - - def increment_other(self) -> None: - self.other_count += 1 diff --git a/cyberdrop_dl/ui/prompts/user_prompts.py b/cyberdrop_dl/ui/prompts/user_prompts.py index 594026c25..32820c920 100644 --- a/cyberdrop_dl/ui/prompts/user_prompts.py +++ b/cyberdrop_dl/ui/prompts/user_prompts.py @@ -14,7 +14,7 @@ ) from rich.console import Console -from cyberdrop_dl import __version__ +from cyberdrop_dl import __version__, config from cyberdrop_dl.constants import BROWSERS, RESERVED_CONFIG_NAMES from cyberdrop_dl.ui.prompts import basic_prompts from cyberdrop_dl.ui.prompts.defaults import ALL_CHOICE, DONE_CHOICE, EXIT_CHOICE @@ -134,7 +134,7 @@ def _check_valid_new_config_name(answer: str, manager: Manager) -> str | None: def auto_cookie_extraction(manager: Manager): answer = basic_prompts.ask_toggle("Enable auto cookies import:") - manager.config_manager.settings_data.browser_cookies.auto_import = answer + config.get().browser_cookies.auto_import = answer if answer: extract_cookies(manager, dry_run=True) manager.config_manager.write_updated_settings_config() @@ -198,8 +198,8 @@ def extract_cookies(manager: Manager, *, dry_run: bool = False) -> None: browser = BROWSERS(browser_prompt()) if dry_run: - manager.config_manager.settings_data.browser_cookies.browser = browser - current_sites = set(manager.config_manager.settings_data.browser_cookies.sites) + config.get().browser_cookies.browser = browser + current_sites = set(config.get().browser_cookies.sites) new_sites = current_sites - set(all_domains) if domains == supported_forums: new_sites -= {"all"} @@ -216,7 +216,7 @@ def extract_cookies(manager: Manager, *, dry_run: bool = False) -> None: if "all_forums" in new_sites and "all_file_hosts" in new_sites: new_sites -= {"all_forums", "all_file_hosts"} new_sites.add("all") - manager.config_manager.settings_data.browser_cookies.sites = sorted(new_sites) + config.get().browser_cookies.sites = sorted(new_sites) return get_cookies_from_browsers(manager, browser=browser, domains=domains) diff --git a/cyberdrop_dl/utils/cookie_management.py b/cyberdrop_dl/utils/cookie_management.py index 42a635b6d..c7a5e6e08 100644 --- a/cyberdrop_dl/utils/cookie_management.py +++ b/cyberdrop_dl/utils/cookie_management.py @@ -9,6 +9,7 @@ from textwrap import dedent from typing import TYPE_CHECKING, NamedTuple, ParamSpec, TypeVar +from cyberdrop_dl import config from cyberdrop_dl.dependencies import browser_cookie3 from cyberdrop_dl.utils.logger import log @@ -77,7 +78,7 @@ def get_cookies_from_browsers(manager: Manager, *, browser: BROWSERS, domains: l raise ValueError(msg) extractor_name = browser.lower() - domains_to_extract: list[str] = domains or manager.config_manager.settings_data.browser_cookies.sites + domains_to_extract: list[str] = domains or config.get().browser_cookies.sites if "all" in domains_to_extract: domains_to_extract.remove("all") domains_to_extract.extend(SUPPORTED_SITES_DOMAINS) diff --git a/cyberdrop_dl/utils/sorting.py b/cyberdrop_dl/utils/sorting.py index bb3a8e32f..fa8635afd 100644 --- a/cyberdrop_dl/utils/sorting.py +++ b/cyberdrop_dl/utils/sorting.py @@ -9,7 +9,7 @@ import imagesize -from cyberdrop_dl import constants +from cyberdrop_dl import config, constants from cyberdrop_dl.constants import FILE_FORMATS from cyberdrop_dl.utils import strings from cyberdrop_dl.utils.ffmpeg import probe @@ -30,10 +30,10 @@ def __init__(self, manager: Manager) -> None: self.manager = manager self.download_folder = manager.path_manager.scan_folder or manager.path_manager.download_folder self.sorted_folder = manager.path_manager.sorted_folder - self.incrementer_format: str = manager.config_manager.settings_data.sorting.sort_incrementer_format + self.incrementer_format: str = config.get().sorting.sort_incrementer_format self.db_manager = manager.db_manager - settings = manager.config_manager.settings_data.sorting + settings = config.get().sorting self.audio_format: str | None = settings.sorted_audio self.image_format: str | None = settings.sorted_image self.video_format: str | None = settings.sorted_video @@ -88,11 +88,8 @@ async def run(self) -> None: _ = purge_dir_tree(self.download_folder) async def _sort_files(self, files_to_sort: dict[str, list[Path]]) -> None: - queue_length = len(files_to_sort) - self.manager.progress_manager.sort_progress.set_queue_length(queue_length) - for folder_name, files in files_to_sort.items(): - task_id = self.manager.progress_manager.sort_progress.add_task(folder_name, len(files)) + task_id = self.manager.progress_manager.sorting.new_task(folder_name, len(files)) for file in files: ext = file.suffix.lower() @@ -109,11 +106,9 @@ async def _sort_files(self, files_to_sort: dict[str, list[Path]]) -> None: else: await self.sort_other(file, folder_name) - self.manager.progress_manager.sort_progress.advance_folder(task_id) + self.manager.progress_manager.sorting.advance_folder(task_id) - self.manager.progress_manager.sort_progress.remove_task(task_id) - queue_length -= 1 - self.manager.progress_manager.sort_progress.set_queue_length(queue_length) + self.manager.progress_manager.sorting.remove_task(task_id) async def sort_audio(self, file: Path, base_name: str) -> None: """Sorts an audio file into the sorted audio folder.""" @@ -140,7 +135,7 @@ async def sort_audio(self, file: Path, base_name: str) -> None: length=duration, sample_rate=sample_rate, ): - self.manager.progress_manager.sort_progress.increment_audio() + self.manager.progress_manager.sorting.increment_audio() async def sort_image(self, file: Path, base_name: str) -> None: """Sorts an image file into the sorted image folder.""" @@ -166,7 +161,7 @@ async def sort_image(self, file: Path, base_name: str) -> None: resolution=resolution, width=width, ): - self.manager.progress_manager.sort_progress.increment_image() + self.manager.progress_manager.sorting.increment_image() async def sort_video(self, file: Path, base_name: str) -> None: """Sorts a video file into the sorted video folder.""" @@ -199,7 +194,7 @@ async def sort_video(self, file: Path, base_name: str) -> None: resolution=resolution, width=width, ): - self.manager.progress_manager.sort_progress.increment_video() + self.manager.progress_manager.sorting.increment_video() async def sort_other(self, file: Path, base_name: str) -> None: """Sorts an other file into the sorted other folder.""" @@ -207,7 +202,7 @@ async def sort_other(self, file: Path, base_name: str) -> None: return if await self._process_file_move(file, base_name, self.other_format): - self.manager.progress_manager.sort_progress.increment_other() + self.manager.progress_manager.sorting.increment_other() async def _process_file_move(self, file: Path, base_name: str, format_str: str, **kwargs: Any) -> bool: file_date = await get_modified_date(file) diff --git a/cyberdrop_dl/utils/utilities.py b/cyberdrop_dl/utils/utilities.py index e1e6cae60..68dae5a54 100644 --- a/cyberdrop_dl/utils/utilities.py +++ b/cyberdrop_dl/utils/utilities.py @@ -35,7 +35,7 @@ from pydantic import ValidationError from yarl import URL -from cyberdrop_dl import constants +from cyberdrop_dl import config, constants from cyberdrop_dl.data_structures import AbsoluteHttpURL from cyberdrop_dl.exceptions import ( CDLBaseError, @@ -201,7 +201,7 @@ def sanitize_folder(title: str) -> str: title = title.replace("\n", "").strip() title = title.replace("\t", "").strip() - title = re.sub(" +", " ", title) + title = re.sub(r" +", " ", title) title = sanitize_filename(title, "-") title = re.sub(r"\.{2,}", ".", title) title = title.rstrip(".").strip() @@ -264,7 +264,7 @@ def get_download_path(manager: Manager, scrape_item: ScrapeItem, domain: str) -> def remove_file_id(manager: Manager, filename: str, ext: str) -> tuple[str, str]: """Removes the additional string some websites adds to the end of every filename.""" original_filename = filename - if not manager.config_manager.settings_data.download_options.remove_generated_id_from_filenames: + if not config.get().download_options.remove_generated_id_from_filenames: return original_filename, filename filename = filename.rsplit(ext, 1)[0] @@ -332,7 +332,7 @@ def purge_dir_tree(dirname: Path | str) -> bool: def check_partials_and_empty_folders(manager: Manager) -> None: """Checks for partial downloads, deletes partial files and empty folders.""" - settings = manager.config_manager.settings_data.runtime_options + settings = config.get().runtime_options if settings.delete_partial_files: delete_partial_files(manager) if not settings.skip_check_for_partial_files: @@ -379,7 +379,7 @@ def delete_empty_folders(manager: Manager): purge_dir_tree(manager.path_manager.download_folder) sorted_folder = manager.path_manager.sorted_folder - if sorted_folder and manager.config_manager.settings_data.sorting.sort_downloads: + if sorted_folder and config.get().sorting.sort_downloads: purge_dir_tree(sorted_folder) diff --git a/cyberdrop_dl/utils/webhook.py b/cyberdrop_dl/utils/webhook.py index a156988dd..af8191c2e 100644 --- a/cyberdrop_dl/utils/webhook.py +++ b/cyberdrop_dl/utils/webhook.py @@ -7,7 +7,7 @@ import rich from aiohttp import FormData -from cyberdrop_dl import constants +from cyberdrop_dl import config, constants from cyberdrop_dl.utils import aio from cyberdrop_dl.utils.logger import log, log_debug, log_spacer @@ -64,7 +64,7 @@ async def _prepare_form(webhook: HttpAppriseURL, main_log: Path) -> FormData: async def send_webhook_message(manager: Manager) -> None: """Outputs the stats to a code block for webhook messages.""" - webhook = manager.config_manager.settings_data.logs.webhook + webhook = config.get().logs.webhook if not webhook: return diff --git a/tests/test_manager.py b/tests/test_manager.py index d21106043..0545e9781 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -5,7 +5,9 @@ import pytest -from cyberdrop_dl.managers import Manager, merge_dicts +from cyberdrop_dl import config +from cyberdrop_dl.managers import Manager, log_app_state +from cyberdrop_dl.models import merge_dicts if TYPE_CHECKING: from pydantic import BaseModel @@ -105,9 +107,10 @@ def test_value_should_not_overwrite_dict(self) -> None: def test_args_logging_should_censor_webhook( running_manager: Manager, logs: pytest.LogCaptureFixture, webhook: str, output: str ) -> None: - logs_model = running_manager.config_manager.settings_data.logs - running_manager.config_manager.settings_data.logs = update_model(logs_model, webhook=webhook) - running_manager.args_logging() + logs_model = config.get().logs + config.get().logs = update_model(logs_model, webhook=webhook) + log_app_state() + assert logs.messages assert "Starting Cyberdrop-DL Process" in logs.text assert webhook not in logs.text diff --git a/tests/test_startup.py b/tests/test_startup.py index bd1a434bc..30ae20b69 100644 --- a/tests/test_startup.py +++ b/tests/test_startup.py @@ -1,9 +1,9 @@ from pathlib import Path import pytest +from cyberdrop_dl.ui.program_ui import ProgramUI from cyberdrop_dl.main import run -from cyberdrop_dl.ui.program_ui import ProgramUI def test_startup(tmp_cwd: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None: From ff591e1e9baa6c58c74c66a08663eb837b51ce9c Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Mon, 23 Feb 2026 00:38:05 -0500 Subject: [PATCH 08/23] refactor: update errors panel --- cyberdrop_dl/progress/__init__.py | 28 ++- cyberdrop_dl/progress/downloads_progress.py | 150 ++++++++++------ cyberdrop_dl/progress/errors.py | 181 +++++++++++++++++++ cyberdrop_dl/progress/panels.py | 3 + cyberdrop_dl/progress/statistic_progress.py | 186 -------------------- 5 files changed, 291 insertions(+), 257 deletions(-) create mode 100644 cyberdrop_dl/progress/errors.py delete mode 100644 cyberdrop_dl/progress/statistic_progress.py diff --git a/cyberdrop_dl/progress/__init__.py b/cyberdrop_dl/progress/__init__.py index 289e2d4a2..cf9d824d7 100644 --- a/cyberdrop_dl/progress/__init__.py +++ b/cyberdrop_dl/progress/__init__.py @@ -17,10 +17,10 @@ from cyberdrop_dl import __version__, config from cyberdrop_dl.progress.downloads_progress import DownloadsProgress +from cyberdrop_dl.progress.errors import DownloadErrors, ScrapeErrors from cyberdrop_dl.progress.hash_progress import HashProgress from cyberdrop_dl.progress.panels import DownloadsPanel, ScrapingPanel from cyberdrop_dl.progress.sorting import SortingPanel -from cyberdrop_dl.progress.statistic_progress import DownloadStatsProgress, ScrapeStatsProgress from cyberdrop_dl.utils.logger import log_spacer if TYPE_CHECKING: @@ -28,7 +28,7 @@ from pathlib import Path from cyberdrop_dl.managers import Manager - from cyberdrop_dl.progress.statistic_progress import UiFailureTotal + from cyberdrop_dl.progress.errors import UIFailure spinner = SpinnerColumn(style="green", spinner_name="dots") @@ -65,18 +65,16 @@ def build(cls, progress: ProgressManager) -> Self: activity = Progress(spinner, "[progress.description]{task.description}") _ = activity.add_task(f"Running Cyberdrop-DL: v{__version__}", total=100, completed=0) - status_message_columns = Columns([activity, progress.status.progress]) - upper_layouts = ( - Layout(renderable=progress.download_progress.get_progress(), name="Files", ratio=1, minimum_size=9), - Layout(renderable=progress.scrape_stats_progress.get_progress(), name="Scrape Failures", ratio=1), - Layout(renderable=progress.download_stats_progress.get_progress(), name="Download Failures", ratio=1), + Layout(progress.download_progress, name="Files", ratio=1, minimum_size=9), + Layout(progress.scrape_stats_progress, name="Scrape Failures", ratio=1), + Layout(progress.download_stats_progress, name="Download Failures", ratio=1), ) lower_layouts = ( - Layout(renderable=progress.scraping_progress.get_renderable(), name="Scraping", ratio=20), - Layout(renderable=progress.file_progress.get_renderable(), name="Downloads", ratio=20), - Layout(renderable=status_message_columns, name="status_message", ratio=2), + Layout(progress.scraping_progress, name=progress.scraping_progress.title, ratio=20), + Layout(progress.file_progress, name=progress.file_progress.title, ratio=20), + Layout(Columns([activity, progress.status.progress]), name="status_message", ratio=2), ) horizontal.split_column(Layout(name="upper", ratio=20), *lower_layouts) @@ -98,9 +96,9 @@ def __init__(self, manager: Manager) -> None: self.scraping_progress = ScrapingPanel() self.status = StatusMessage() - self.download_progress: DownloadsProgress = DownloadsProgress(manager) - self.download_stats_progress: DownloadStatsProgress = DownloadStatsProgress() - self.scrape_stats_progress: ScrapeStatsProgress = ScrapeStatsProgress() + self.download_progress: DownloadsProgress = DownloadsProgress() + self.download_stats_progress: DownloadErrors = DownloadErrors() + self.scrape_stats_progress: ScrapeErrors = ScrapeErrors() self.hash_progress: HashProgress = HashProgress(manager) self.sorting: SortingPanel = SortingPanel(1) @@ -144,7 +142,7 @@ def print_stats(self, start_time: float) -> None: logger.info("Download Stats:") logger.info(f" Downloaded: {self.download_progress.completed_files:,} files") logger.info(f" Skipped (By Config): {self.download_progress.skipped_files:,} files") - logger.info(f" Skipped (Previously Downloaded): {self.download_progress.previously_completed_files:,} files") + logger.info(f" Skipped (Previously Downloaded): {self.download_progress.previously_completed:,} files") logger.info(f" Failed: {self.download_stats_progress.failed_files:,} files") logger.info("Unsupported URLs Stats:") @@ -169,7 +167,7 @@ def print_dedupe_stats(self) -> None: logger.info(f" Removed (Downloads): {self.hash_progress.removed_files:,} files") -def log_failures(failures: list[UiFailureTotal], title: str = "Failures:", last_padding: int = 0) -> int: +def log_failures(failures: list[UIFailure], title: str = "Failures:", last_padding: int = 0) -> int: logger.info(title) if not failures: logger.info(" None") diff --git a/cyberdrop_dl/progress/downloads_progress.py b/cyberdrop_dl/progress/downloads_progress.py index 6d791c5b6..f17b4ec63 100644 --- a/cyberdrop_dl/progress/downloads_progress.py +++ b/cyberdrop_dl/progress/downloads_progress.py @@ -1,20 +1,46 @@ from __future__ import annotations -from typing import TYPE_CHECKING +import dataclasses -from rich.console import Group from rich.panel import Panel -from rich.progress import BarColumn, Progress +from rich.progress import BarColumn, Progress, TaskID -if TYPE_CHECKING: - from cyberdrop_dl.managers import Manager +from cyberdrop_dl import signature + + +class SimpleProgress(Progress): + """A progress with a single task""" + + @signature.copy(Progress.__init__) + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self._task_id: TaskID | None = None + + @signature.copy(Progress.add_task) + def add_task(self, *args, **kwargs) -> TaskID: + assert self._task_id is None + self._task_id = super().add_task(*args, **kwargs) + return self._task_id + + +@dataclasses.dataclass(slots=True) +class Tracker: + id: TaskID + count: int = 0 + + +class Tasks(dict[str, Tracker]): + def __getattr__(self, name: str, /) -> Tracker: + return self[name] class DownloadsProgress: """Class that keeps track of completed, skipped and failed files.""" - def __init__(self, manager: Manager) -> None: - self.manager = manager + def __repr__(self) -> str: + return f"{type(self).__name__}({vars(self)!r})" + + def __init__(self) -> None: self.progress = Progress( "[progress.description]{task.description}", BarColumn(bar_width=None), @@ -22,25 +48,15 @@ def __init__(self, manager: Manager) -> None: "━", "{task.completed:,}", ) - self.progress_group = Group(self.progress) - - self.total_files = 0 - self.completed_files_task_id = self.progress.add_task("[green]Completed", total=0) - self.completed_files = 0 - self.previously_completed_files_task_id = self.progress.add_task("[yellow]Previously Downloaded", total=0) - self.previously_completed_files = 0 - self.skipped_files_task_id = self.progress.add_task("[yellow]Skipped By Configuration", total=0) - self.skipped_files = 0 - self.queued_files_task_id = self.progress.add_task("[cyan]Queued", total=0) - self.queued_files = 0 - self.failed_files_task_id = self.progress.add_task("[red]Failed", total=0) - self.failed_files = 0 - self.panel = Panel( - self.progress_group, - title=f"Config: {self.manager.config_manager.loaded_config}", + + self._total_files = 0 + + self._panel = Panel( + self.progress, + title="Files", border_style="green", padding=(1, 1), - subtitle=f"Total Files: [white]{self.total_files:,}", + subtitle=f"Total Files: [white]{self._total_files:,}", ) self.simple_progress = Progress( "[progress.description]{task.description}", @@ -49,53 +65,75 @@ def __init__(self, manager: Manager) -> None: "━", "{task.completed:,}", ) - self.simple_progress_task_id = self.simple_progress.add_task("Completed", total=0) - @property - def simple_completed(self): - return self.total_files - self.queued_files + self._tasks = Tasks() + + for name, color, desc in ( + ("completed", "green", "Completed"), + ("previously_completed", "yellow", "Previously Downloaded"), + ("skipped", "yellow", "Skipped By Configuration"), + ("queued", "cyan", "Queued"), + ("failed", "red", "Failed"), + ): + self._tasks[name] = Tracker(self.progress.add_task(f"[{color}]{desc}", total=0)) - def get_progress(self) -> Panel: - """Returns the progress bar.""" - return self.panel + self._tasks["simple"] = Tracker(self.simple_progress.add_task("Completed", total=0)) + + def __rich__(self) -> Panel: + return self._panel def update_total(self, increase_total: bool = True) -> None: - """Updates the total number of files to be downloaded.""" - if increase_total: - self.total_files = self.total_files + 1 - self.progress.update(self.completed_files_task_id, total=self.total_files) - self.progress.update(self.previously_completed_files_task_id, total=self.total_files) - self.progress.update(self.skipped_files_task_id, total=self.total_files) - self.progress.update(self.failed_files_task_id, total=self.total_files) - self.progress.update(self.queued_files_task_id, total=self.total_files) + self._panel.subtitle = f"Total Files: [white]{self._total_files:,}" + if not increase_total: + return + + self._total_files = self._total_files + 1 + self.progress.update(self._tasks.completed.id, total=self._total_files) + self.progress.update(self._tasks.previously_completed.id, total=self._total_files) + self.progress.update(self._tasks.skipped.id, total=self._total_files) + self.progress.update(self._tasks.failed.id, total=self._total_files) + self.progress.update(self._tasks.queued.id, total=self._total_files) self.simple_progress.update( - self.simple_progress_task_id, total=self.total_files, completed=self.simple_completed + self._tasks.simple.id, + total=self._total_files, + completed=self._total_files - self._tasks.queued.count, ) - self.panel.subtitle = f"Total Files: [white]{self.total_files:,}" def add_completed(self) -> None: - """Adds a completed file to the progress bar.""" - self.progress.advance(self.completed_files_task_id, 1) - self.completed_files += 1 + self.progress.advance(self._tasks.completed.id) + self._tasks.completed.count += 1 def add_previously_completed(self, increase_total: bool = True) -> None: - """Adds a previously completed file to the progress bar.""" if increase_total: self.update_total() - self.previously_completed_files += 1 - self.progress.advance(self.previously_completed_files_task_id, 1) + + self._tasks.previously_completed.count += 1 + self.progress.advance(self._tasks.previously_completed.id) def add_skipped(self) -> None: - """Adds a skipped file to the progress bar.""" - self.progress.advance(self.skipped_files_task_id, 1) - self.skipped_files += 1 + self.progress.advance(self._tasks.skipped.id) + self._tasks.skipped.count += 1 def add_failed(self) -> None: - """Adds a failed file to the progress bar.""" - self.progress.advance(self.failed_files_task_id, 1) - self.failed_files += 1 + self.progress.advance(self._tasks.failed.id) + self._tasks.failed.count += 1 def update_queued(self, number: int) -> None: - """Adds a queed file to the progress bar.""" - self.queued_files = number - self.progress.update(self.queued_files_task_id, completed=self.queued_files) + self._tasks.queued.count = number + self.progress.update(self._tasks.queued.id, completed=self._tasks.queued.count) + + @property + def skipped_files(self) -> int: + return self._tasks.skipped.count + + @property + def failed_files(self) -> int: + return self._tasks.failed.count + + @property + def completed_files(self) -> int: + return self._tasks.completed.count + + @property + def previously_completed(self) -> int: + return self._tasks.previously_completed.count diff --git a/cyberdrop_dl/progress/errors.py b/cyberdrop_dl/progress/errors.py new file mode 100644 index 000000000..94920ec34 --- /dev/null +++ b/cyberdrop_dl/progress/errors.py @@ -0,0 +1,181 @@ +from __future__ import annotations + +import dataclasses +import functools +from typing import ClassVar, NamedTuple + +from rich.panel import Panel +from rich.progress import BarColumn, Progress, TaskID + +_FAILURE_OVERRIDES = { + "ClientConnectorCertificateError": "Client Connector Certificate Error", + "ClientConnectorDNSError": "Client Connector DNS Error", + "ClientConnectorError": "Client Connector Error", + "ClientConnectorSSLError": "Client Connector SSL Error", + "ClientHttpProxyError": "Client HTTP Proxy Error", + "ClientPayloadError": "Client Payload Error", + "ClientProxyConnectionError": "Client Proxy Connection Error", + "ConnectionTimeoutError": "Connection Timeout", + "ContentTypeError": "Content Type Error", + "InvalidURL": "Invalid URL", + "InvalidUrlClientError": "Invalid URL Client Error", + "InvalidUrlRedirectClientError": "Invalid URL Redirect", + "NonHttpUrlRedirectClientError": "Non HTTP URL Redirect", + "RedirectClientError": "Redirect Error", + "ServerConnectionError": "Server Connection Error", + "ServerDisconnectedError": "Server Disconnected", + "ServerFingerprintMismatch": "Server Fingerprint Mismatch", + "ServerTimeoutError": "Server Timeout Error", + "SocketTimeoutError": "Socket Timeout Error", +} + + +class TaskInfo(NamedTuple): + id: TaskID + description: str + completed: float + total: float | None + + +@dataclasses.dataclass(slots=True, order=True) +class UIFailure: + full_msg: str + total: int + error_code: int | None = None + msg: str = dataclasses.field(init=False) + + def __post_init__(self) -> None: + parts = self.full_msg.split(" ", 1) + if len(parts) > 1 and parts[0].isdigit(): + error_code, self.msg = parts + self.error_code = int(error_code) + else: + self.msg = self.full_msg + + +def _get_tasks_info_sorted(progress: Progress) -> tuple[list[TaskInfo], bool]: + tasks = [ + TaskInfo( + id=task.id, + description=task.description, + completed=task.completed, + total=task.total, + ) + for task in progress.tasks + ] + + tasks_sorted = sorted(tasks, key=lambda x: x.completed, reverse=True) + were_sorted = tasks == tasks_sorted + return tasks_sorted, were_sorted + + +class _ErrorsPanel: + """Base class that keeps track of errors and reasons.""" + + title: ClassVar[str] + + def __repr__(self) -> str: + return f"{type(self).__name__}(failed_files={self.failed_files!r}, failures={self._failures.keys()!r})" + + def __init__(self) -> None: + self._progress = Progress( + "[progress.description]{task.description}", + BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>6.2f}%", + "━", + "{task.completed:,}", + ) + + self._failures: dict[str, TaskID] = {} + self.failed_files = 0 + self._panel = Panel( + self._progress, + title=self.title, + border_style="green", + padding=(1, 1), + subtitle=self._subtitle, + ) + + @property + def _subtitle(self) -> str: + return f"Total {self.title}: [white]{self.failed_files:,}" + + def __rich__(self) -> Panel: + return self._panel + + def add_failure(self, failure: str) -> None: + self.failed_files += 1 + key = _get_pretty_error(failure) + if (task_id := self._failures.get(key)) is not None: + self._progress.advance(task_id) + else: + self._failures[key] = self._progress.add_task(key, total=self.failed_files, completed=1) + + self._redraw() + + def _redraw(self) -> None: + self._panel.subtitle = self._subtitle + for task_id in self._failures.values(): + self._progress.update(task_id, total=self.failed_files) + + tasks_sorted, were_sorted = _get_tasks_info_sorted(self._progress) + if were_sorted: + return + + for task in tasks_sorted: + self._progress.remove_task(task.id) + + for task in tasks_sorted: + self._failures[task.description] = self._progress.add_task( + task.description, + total=task.total, + completed=int(task.completed), + ) + + def return_totals(self) -> list[UIFailure]: + """Returns the total number of failed sites and reasons.""" + + return sorted( + UIFailure(msg, int(self._progress._tasks[task_id].completed)) for msg, task_id in self._failures.items() + ) + + +class DownloadErrors(_ErrorsPanel): + """Class that keeps track of download failures and reasons.""" + + title: ClassVar[str] = "Download Failures" + + +class ScrapeErrors(_ErrorsPanel): + """Class that keeps track of scraping failures and reasons.""" + + title = "Scrape Failures" + + def __init__(self) -> None: + super().__init__() + self.unsupported_urls: int = 0 + self.sent_to_jdownloader: int = 0 + self.unsupported_urls_skipped: int = 0 + + def add_unsupported(self, *, sent_to_jdownloader: bool = False) -> None: + self.unsupported_urls += 1 + if sent_to_jdownloader: + self.sent_to_jdownloader += 1 + else: + self.unsupported_urls_skipped += 1 + + +@functools.cache +def _get_pretty_error(failure: str) -> str: + return _FAILURE_OVERRIDES.get(failure) or _capitalize_words(failure) + + +def _capitalize_words(text: str) -> str: + """Capitalize first letter of each word + + Unlike `str.capwords()`, this only caps the first letter of each word without modifying the rest of the word""" + + def cap(word: str) -> str: + return word[0].capitalize() + word[1:] + + return " ".join([cap(word) for word in text.split()]) diff --git a/cyberdrop_dl/progress/panels.py b/cyberdrop_dl/progress/panels.py index e9345a966..7c44a18bc 100644 --- a/cyberdrop_dl/progress/panels.py +++ b/cyberdrop_dl/progress/panels.py @@ -56,6 +56,9 @@ def __init__(self, progress: Progress, visible_tasks_limit: int) -> None: def _clean_task_desc(cls, desc: str) -> str: return escape(truncate(desc.encode("ascii", "ignore").decode().strip(), length=40)) + def __rich__(self) -> Panel: + return self.get_renderable() + def get_renderable(self) -> Panel: return Panel( Group(self._progress, self._overflow.progress), diff --git a/cyberdrop_dl/progress/statistic_progress.py b/cyberdrop_dl/progress/statistic_progress.py deleted file mode 100644 index f31965aef..000000000 --- a/cyberdrop_dl/progress/statistic_progress.py +++ /dev/null @@ -1,186 +0,0 @@ -from __future__ import annotations - -import contextlib -import functools -from typing import NamedTuple - -from rich.console import Group -from rich.panel import Panel -from rich.progress import BarColumn, Progress, TaskID - -FAILURE_OVERRIDES = { - "ClientConnectorCertificateError": "Client Connector Certificate Error", - "ClientConnectorDNSError": "Client Connector DNS Error", - "ClientConnectorError": "Client Connector Error", - "ClientConnectorSSLError": "Client Connector SSL Error", - "ClientHttpProxyError": "Client HTTP Proxy Error", - "ClientPayloadError": "Client Payload Error", - "ClientProxyConnectionError": "Client Proxy Connection Error", - "ConnectionTimeoutError": "Connection Timeout", - "ContentTypeError": "Content Type Error", - "InvalidURL": "Invalid URL", - "InvalidUrlClientError": "Invalid URL Client Error", - "InvalidUrlRedirectClientError": "Invalid URL Redirect", - "NonHttpUrlRedirectClientError": "Non HTTP URL Redirect", - "RedirectClientError": "Redirect Error", - "ServerConnectionError": "Server Connection Error", - "ServerDisconnectedError": "Server Disconnected", - "ServerFingerprintMismatch": "Server Fingerprint Mismatch", - "ServerTimeoutError": "Server Timeout Error", - "SocketTimeoutError": "Socket Timeout Error", -} - - -class TaskInfo(NamedTuple): - id: TaskID - description: str - completed: float - total: float | None - progress: float - - -class UiFailureTotal(NamedTuple): - full_msg: str - total: int - error_code: int | None - msg: str - - @classmethod - def from_pair(cls, full_msg: str, total: int) -> UiFailureTotal: - parts = full_msg.split(" ", 1) - if len(parts) > 1 and parts[0].isdigit(): - error_code, msg = parts - return cls(full_msg, total, int(error_code), msg) - return cls(full_msg, total, None, full_msg) - - -def get_tasks_info_sorted(progress: Progress) -> tuple[list[TaskInfo], bool]: - tasks = [ - TaskInfo( - id=task.id, - description=task.description, - completed=task.completed, - total=task.total, - progress=(task.completed / task.total if task.total else 0), - ) - for task in progress.tasks - ] - - tasks_sorted = sorted(tasks, key=lambda x: x.completed, reverse=True) - were_sorted = tasks == tasks_sorted - return tasks_sorted, were_sorted - - -class StatsProgress: - """Base class that keeps track of failures and reasons.""" - - title = "Download Failures" - - def __init__(self) -> None: - self.progress = Progress( - "[progress.description]{task.description}", - BarColumn(bar_width=None), - "[progress.percentage]{task.percentage:>6.2f}%", - "━", - "{task.completed:,}", - ) - self.progress_group = Group(self.progress) - self.failure_types: dict[str, TaskID] = {} - self.failed_files = 0 - self.panel = Panel( - self.progress_group, - title=self.title, - border_style="green", - padding=(1, 1), - subtitle=self.subtitle, - ) - - @property - def subtitle(self) -> str: - return f"Total {self.title}: [white]{self.failed_files:,}" - - def get_progress(self) -> Panel: - """Returns the progress bar.""" - return self.panel - - def update_total(self, total: int) -> None: - """Updates the total number download failures.""" - self.panel.subtitle = self.subtitle - for key in self.failure_types: - self.progress.update(self.failure_types[key], total=total) - - tasks_sorted, were_sorted = get_tasks_info_sorted(self.progress) - if not were_sorted: - self.sort_tasks(tasks_sorted) - - def sort_tasks(self, tasks_sorted: list[TaskInfo]) -> None: - for task_id in [task.id for task in tasks_sorted]: - self.progress.remove_task(task_id) - - for task in tasks_sorted: - self.failure_types[task.description] = self.progress.add_task( - task.description, - total=task.total, - completed=task.completed, # type: ignore - ) - - def add_failure(self, failure: str) -> None: - """Adds a failed file to the progress bar.""" - self.failed_files += 1 - key = get_pretty_failure(failure) - task_id = self.failure_types.get(key) - if task_id is not None: - self.progress.advance(task_id) - else: - self.failure_types[key] = self.progress.add_task(key, total=self.failed_files, completed=1) - self.update_total(self.failed_files) - - def return_totals(self) -> list[UiFailureTotal]: - """Returns the total number of failed sites and reasons.""" - failures = {} - for key, task_id in self.failure_types.items(): - task = next(task for task in self.progress.tasks if task.id == task_id) - failures[key] = task.completed - return sorted(UiFailureTotal.from_pair(*f) for f in failures.items()) - - -class DownloadStatsProgress(StatsProgress): - """Class that keeps track of download failures and reasons.""" - - -class ScrapeStatsProgress(StatsProgress): - """Class that keeps track of scraping failures and reasons.""" - - title = "Scrape Failures" - - def __init__(self) -> None: - super().__init__() - self.unsupported_urls = 0 - self.sent_to_jdownloader = 0 - self.unsupported_urls_skipped = 0 - - def add_unsupported(self, sent_to_jdownloader: bool = False) -> None: - """Adds an unsupported url to the progress bar.""" - self.unsupported_urls += 1 - if sent_to_jdownloader: - self.sent_to_jdownloader += 1 - else: - self.unsupported_urls_skipped += 1 - - -@functools.lru_cache -def get_pretty_failure(failure: str) -> str: - with contextlib.suppress(KeyError): - return FAILURE_OVERRIDES[failure] - return capitalize_words(failure) - - -def capitalize_words(text: str) -> str: - """Capitalize first letter of each word - - Unlike `str.capwords()`, this only caps the first letter of each word without modifying the rest of the word""" - return " ".join([capitalize_first_letter(word) for word in text.split()]) - - -def capitalize_first_letter(word: str) -> str: - return word[0].capitalize() + word[1:] From 3dc5bdbd0aa99f7e1eec227d44379ccb22bfd165 Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Mon, 23 Feb 2026 01:39:47 -0500 Subject: [PATCH 09/23] refactor: update progress --- cyberdrop_dl/clients/download_client.py | 16 +-- cyberdrop_dl/clients/hash_client.py | 11 +- cyberdrop_dl/crawlers/archivebate.py | 2 +- cyberdrop_dl/crawlers/crawler.py | 14 +- cyberdrop_dl/downloader/downloader.py | 20 +-- cyberdrop_dl/downloader/mega_nz.py | 2 +- cyberdrop_dl/managers/__init__.py | 3 +- cyberdrop_dl/progress/__init__.py | 80 +++++------ cyberdrop_dl/progress/_common.py | 18 +++ cyberdrop_dl/progress/errors.py | 45 ++++--- .../{downloads_progress.py => files.py} | 68 +++------- cyberdrop_dl/progress/hash_progress.py | 125 ------------------ cyberdrop_dl/progress/hashing.py | 112 ++++++++++++++++ cyberdrop_dl/progress/sorting.py | 6 +- cyberdrop_dl/progress/{panels.py => ui.py} | 0 cyberdrop_dl/scraper/scrape_mapper.py | 4 +- cyberdrop_dl/utils/utilities.py | 2 +- 17 files changed, 254 insertions(+), 274 deletions(-) create mode 100644 cyberdrop_dl/progress/_common.py rename cyberdrop_dl/progress/{downloads_progress.py => files.py} (58%) delete mode 100644 cyberdrop_dl/progress/hash_progress.py create mode 100644 cyberdrop_dl/progress/hashing.py rename cyberdrop_dl/progress/{panels.py => ui.py} (100%) diff --git a/cyberdrop_dl/clients/download_client.py b/cyberdrop_dl/clients/download_client.py index 44f55efa9..606fd8fbf 100644 --- a/cyberdrop_dl/clients/download_client.py +++ b/cyberdrop_dl/clients/download_client.py @@ -138,13 +138,13 @@ async def _process_response( proceed, skip = await self.get_final_file_info(media_item, domain) self.client_manager.check_content_length(resp.headers) if skip: - self.manager.progress_manager.download_progress.add_skipped() + self.manager.progress_manager.files.add_skipped() return False if not proceed: if media_item.is_segment: return True log(f"Skipping {media_item.url} as it has already been downloaded", 10) - self.manager.progress_manager.download_progress.add_previously_completed(False) + self.manager.progress_manager.files.add_previously_completed(False) await self.process_completed(media_item, domain) await self.handle_media_item_completion(media_item, downloaded=False) @@ -161,12 +161,12 @@ async def _process_response( task_id = media_item.task_id if task_id is None: size = (media_item.filesize + resume_point) if media_item.filesize is not None else None - task_id = self.manager.progress_manager.file_progress.new_task( + task_id = self.manager.progress_manager.downloads.new_task( domain=domain, filename=media_item.filename, expected_size=size ) media_item.set_task_id(task_id) - self.manager.progress_manager.file_progress.advance_file(task_id, resume_point) + self.manager.progress_manager.downloads.advance_file(task_id, resume_point) await self._append_content(media_item, self._get_resp_reader(resp)) return True @@ -247,7 +247,7 @@ async def _append_content(self, media_item: MediaItem, content: aiohttp.StreamRe chunk_size = len(chunk) await self.client_manager.speed_limiter.acquire(chunk_size) await f.write(chunk) - self.manager.progress_manager.file_progress.advance_file(media_item.task_id, chunk_size) + self.manager.progress_manager.downloads.advance_file(media_item.task_id, chunk_size) check_download_speed() await self._post_download_check(media_item) @@ -284,7 +284,7 @@ def check_download_speed() -> None: if not self.download_speed_threshold: return assert media_item.task_id is not None - speed = self.manager.progress_manager.file_progress.get_speed(media_item.task_id) + speed = self.manager.progress_manager.downloads.get_speed(media_item.task_id) if speed > self.download_speed_threshold: last_slow_speed_read = None elif not last_slow_speed_read: @@ -298,7 +298,7 @@ async def download_file(self, domain: str, media_item: MediaItem) -> bool: """Starts a file.""" if config.get().download_options.skip_download_mark_completed and not media_item.is_segment: log(f"Download Removed {media_item.url} due to mark completed option", 10) - self.manager.progress_manager.download_progress.add_skipped() + self.manager.progress_manager.files.add_skipped() # set completed path await self.process_completed(media_item, domain) return False @@ -315,7 +315,7 @@ async def download_file(self, domain: str, media_item: MediaItem) -> bool: log(f"Download Skip {media_item.url} due to runtime restrictions", 10) await asyncio.to_thread(media_item.complete_file.unlink) await self.mark_incomplete(media_item, domain) - self.manager.progress_manager.download_progress.add_skipped() + self.manager.progress_manager.files.add_skipped() return False await self.process_completed(media_item, domain) await self.handle_media_item_completion(media_item, downloaded=True) diff --git a/cyberdrop_dl/clients/hash_client.py b/cyberdrop_dl/clients/hash_client.py index 32c4abb24..b3dc1b911 100644 --- a/cyberdrop_dl/clients/hash_client.py +++ b/cyberdrop_dl/clients/hash_client.py @@ -61,7 +61,7 @@ async def hash_directory(self, path: Path) -> None: path = Path(path) with ( self.manager.live_manager.get_hash_live(stop=True), - self.manager.progress_manager.hash_progress.currently_hashing_dir(path), + self.manager.progress_manager.hashing.currently_hashing_dir(path), ): if not await asyncio.to_thread(path.is_dir): raise NotADirectoryError @@ -117,7 +117,7 @@ async def _update_db_and_retrive_hash_helper( hash_type: str, ) -> str | None: """Generates hash of a file.""" - self.manager.progress_manager.hash_progress.update_currently_hashing(file) + await self.manager.progress_manager.hashing.update_currently_hashing(file) hash = await self.manager.db_manager.hash_table.get_file_hash_exists(file, hash_type) try: if not hash: @@ -129,9 +129,9 @@ async def _update_db_and_retrive_hash_helper( original_filename, referer, ) - self.manager.progress_manager.hash_progress.add_new_completed_hash(hash_type) + self.manager.progress_manager.hashing.add_new_completed_hash(hash_type) else: - self.manager.progress_manager.hash_progress.add_prev_hash() + self.manager.progress_manager.hashing.add_prev_hash() await self.manager.db_manager.hash_table.insert_or_update_hash_db( hash, hash_type, @@ -199,8 +199,7 @@ async def _delete_and_log(self, file: Path, xxh128_value: str) -> None: f"File hash matches with a previous download ({hash_string})" ) log(msg, 10) - self.manager.progress_manager.hash_progress.add_removed_file() - + self.manager.progress_manager.hashing.add_removed_file() finally: self._sem.release() diff --git a/cyberdrop_dl/crawlers/archivebate.py b/cyberdrop_dl/crawlers/archivebate.py index 6f4b7a181..ab605c244 100644 --- a/cyberdrop_dl/crawlers/archivebate.py +++ b/cyberdrop_dl/crawlers/archivebate.py @@ -60,7 +60,7 @@ async def video(self, scrape_item: ScrapeItem) -> None: check_complete = await self.manager.db_manager.history_table.check_complete(self.DOMAIN, url, url, db_path) if check_complete: self.log(f"Skipping {scrape_item.url} as it has already been downloaded", 10) - self.manager.progress_manager.download_progress.add_previously_completed() + self.manager.progress_manager.files.add_previously_completed() return soup = await self.request_soup(scrape_item.url) diff --git a/cyberdrop_dl/crawlers/crawler.py b/cyberdrop_dl/crawlers/crawler.py index b9d6b1e31..75d2bf0a6 100644 --- a/cyberdrop_dl/crawlers/crawler.py +++ b/cyberdrop_dl/crawlers/crawler.py @@ -367,11 +367,11 @@ def raise_exc(self, scrape_item: ScrapeItem, exc: type[Exception] | Exception | def new_task_id(self, url: AbsoluteHttpURL) -> Generator[TaskID]: """Creates a new task_id (shows the URL in the UI and logs)""" log(f"Scraping [{self.FOLDER_DOMAIN}]: {url}", 20) - task_id = self.manager.progress_manager.scraping_progress.new_task(url) + task_id = self.manager.progress_manager.scrape.new_task(url) try: yield task_id finally: - self.manager.progress_manager.scraping_progress.remove_task(task_id) + self.manager.progress_manager.scrape.remove_task(task_id) @staticmethod def is_subdomain(url: AbsoluteHttpURL) -> bool: @@ -468,7 +468,7 @@ async def check_complete(self, url: AbsoluteHttpURL, referer: AbsoluteHttpURL) - check_complete = await self.manager.db_manager.history_table.check_complete(self.DOMAIN, url, referer, db_path) if check_complete: log(f"Skipping {url} as it has already been downloaded", 10) - self.manager.progress_manager.download_progress.add_previously_completed() + self.manager.progress_manager.files.add_previously_completed() return check_complete async def handle_media_item(self, media_item: MediaItem, m3u8: m3u8.RenditionGroup | None = None) -> None: @@ -484,7 +484,7 @@ async def handle_media_item(self, media_item: MediaItem, m3u8: m3u8.RenditionGro return if await self.check_skip_by_config(media_item): - self.manager.progress_manager.download_progress.add_skipped() + self.manager.progress_manager.files.add_skipped() return self.create_task(self._download(media_item, m3u8)) @@ -520,7 +520,7 @@ async def check_complete_from_referer( downloaded = await self.manager.db_manager.history_table.check_complete_by_referer(domain, url) if downloaded: log(f"Skipping {url} as it has already been downloaded", 10) - self.manager.progress_manager.download_progress.add_previously_completed() + self.manager.progress_manager.files.add_previously_completed() return True return False @@ -533,7 +533,7 @@ async def check_complete_by_hash( if downloaded: url = scrape_item if isinstance(scrape_item, URL) else scrape_item.url log(f"Skipping {url} as its hash ({hash_type}:{hash_value}) has already been downloaded", 10) - self.manager.progress_manager.download_progress.add_previously_completed() + self.manager.progress_manager.files.add_previously_completed() return downloaded async def get_album_results(self, album_id: str) -> dict[str, int]: @@ -569,7 +569,7 @@ def check_album_results(self, url: URL, album_results: dict[str, Any]) -> bool: url_path = self.create_db_path(url) if url_path in album_results and album_results[url_path] != 0: log(f"Skipping {url} as it has already been downloaded") - self.manager.progress_manager.download_progress.add_previously_completed() + self.manager.progress_manager.files.add_previously_completed() return True return False diff --git a/cyberdrop_dl/downloader/downloader.py b/cyberdrop_dl/downloader/downloader.py index b6bbb7f65..d38032137 100644 --- a/cyberdrop_dl/downloader/downloader.py +++ b/cyberdrop_dl/downloader/downloader.py @@ -149,9 +149,9 @@ def startup(self) -> None: self.manager.path_manager.sorted_folder.mkdir(parents=True, exist_ok=True) def update_queued_files(self, increase_total: bool = True): - queued_files = self.manager.progress_manager.file_progress.get_queue_length() - self.manager.progress_manager.download_progress.update_queued(queued_files) - self.manager.progress_manager.download_progress.update_total(increase_total) + queued_files = self.manager.progress_manager.downloads.get_queue_length() + self.manager.progress_manager.files.update_queued(queued_files) + self.manager.progress_manager.files.update_total(increase_total) @contextlib.asynccontextmanager async def _download_context(self, media_item: MediaItem): @@ -208,7 +208,7 @@ async def _start_hls_download(self, media_item: MediaItem, m3u8_group: Rendition # TODO: compute approx size for UI from the m3u8 info media_item.download_filename = media_item.complete_file.name await self.manager.db_manager.history_table.add_download_filename(self.domain, media_item) - task_id = self.manager.progress_manager.file_progress.new_task(domain=self.domain, filename=media_item.filename) + task_id = self.manager.progress_manager.downloads.new_task(domain=self.domain, filename=media_item.filename) media_item.set_task_id(task_id) video, audio, _subs = await self._download_rendition_group(media_item, m3u8_group) if not audio: @@ -317,7 +317,7 @@ async def finalize_download(self, media_item: MediaItem, downloaded: bool) -> No await asyncio.to_thread(Path.chmod, media_item.complete_file, 0o666) await self.set_file_datetime(media_item, media_item.complete_file) self.attempt_task_removal(media_item) - self.manager.progress_manager.download_progress.add_completed() + self.manager.progress_manager.files.add_completed() log(f"Download finished: {media_item.url}", 20) """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" @@ -408,7 +408,7 @@ def attempt_task_removal(self, media_item: MediaItem) -> None: return if media_item.task_id is not None: try: - self.manager.progress_manager.file_progress.remove_task(media_item.task_id) + self.manager.progress_manager.downloads.remove_task(media_item.task_id) except ValueError: pass @@ -451,7 +451,7 @@ async def download(self, media_item: MediaItem) -> bool | None: await asyncio.to_thread(Path.chmod, media_item.complete_file, 0o666) if not media_item.is_segment: await self.set_file_datetime(media_item, media_item.complete_file) - self.manager.progress_manager.download_progress.add_completed() + self.manager.progress_manager.files.add_completed() log(f"Download finished: {media_item.url}", 20) self.attempt_task_removal(media_item) return downloaded @@ -459,7 +459,7 @@ async def download(self, media_item: MediaItem) -> bool | None: except SkipDownloadError as e: if not media_item.is_segment: log(f"Download skip {media_item.url}: {e}", 10) - self.manager.progress_manager.download_progress.add_skipped() + self.manager.progress_manager.files.add_skipped() self.attempt_task_removal(media_item) except (DownloadError, ClientResponseError, InvalidContentTypeError): @@ -494,5 +494,5 @@ def write_download_error( full_message = f"{self.log_prefix} Failed: {media_item.url} ({error_log_msg.main_log_msg}) \n -> Referer: {media_item.referer}" log(full_message, 40, exc_info=exc_info) self.manager.log_manager.write_download_error_log(media_item, error_log_msg.csv_log_msg) - self.manager.progress_manager.download_stats_progress.add_failure(error_log_msg.ui_failure) - self.manager.progress_manager.download_progress.add_failed() + self.manager.progress_manager.download_errors.add_failure(error_log_msg.ui_failure) + self.manager.progress_manager.files.add_failed() diff --git a/cyberdrop_dl/downloader/mega_nz.py b/cyberdrop_dl/downloader/mega_nz.py index fe8a697cd..0ee3d774e 100644 --- a/cyberdrop_dl/downloader/mega_nz.py +++ b/cyberdrop_dl/downloader/mega_nz.py @@ -48,7 +48,7 @@ async def _append_content(self, media_item: MediaItem, content: aiohttp.StreamRe await self.client_manager.speed_limiter.acquire(chunk_size) await f.write(chunk) - self.manager.progress_manager.file_progress.advance_file(media_item.task_id, chunk_size) + self.manager.progress_manager.downloads.advance_file(media_item.task_id, chunk_size) check_download_speed() await self._post_download_check(media_item) diff --git a/cyberdrop_dl/managers/__init__.py b/cyberdrop_dl/managers/__init__.py index df8f987c4..a5a5eb238 100644 --- a/cyberdrop_dl/managers/__init__.py +++ b/cyberdrop_dl/managers/__init__.py @@ -42,7 +42,7 @@ def __init__(self) -> None: self.client_manager: ClientManager = field(init=False) self.storage_manager: StorageManager = field(init=False) - self.progress_manager = ProgressManager(self) + self.progress_manager: ProgressManager = ProgressManager(self, portrait=False) self.live_manager: LiveManager = field(init=False) self.task_group: TaskGroup = field(init=False) @@ -90,7 +90,6 @@ async def async_db_close(self) -> None: "Partial shutdown for managers used for hash directory scanner" self.db_manager = await close_if_defined(self.db_manager) self.hash_manager = constants.NOT_DEFINED - self.progress_manager.hash_progress.reset() async def close(self) -> None: """Closes the manager.""" diff --git a/cyberdrop_dl/progress/__init__.py b/cyberdrop_dl/progress/__init__.py index cf9d824d7..f4d9b8fcc 100644 --- a/cyberdrop_dl/progress/__init__.py +++ b/cyberdrop_dl/progress/__init__.py @@ -9,19 +9,18 @@ from pydantic import ByteSize from rich.columns import Columns -from rich.console import Group +from rich.console import Group, RenderableType from rich.layout import Layout from rich.progress import Progress, SpinnerColumn from rich.text import Text from yarl import URL from cyberdrop_dl import __version__, config -from cyberdrop_dl.progress.downloads_progress import DownloadsProgress from cyberdrop_dl.progress.errors import DownloadErrors, ScrapeErrors -from cyberdrop_dl.progress.hash_progress import HashProgress -from cyberdrop_dl.progress.panels import DownloadsPanel, ScrapingPanel +from cyberdrop_dl.progress.files import FileStats +from cyberdrop_dl.progress.hashing import HashingPanel from cyberdrop_dl.progress.sorting import SortingPanel -from cyberdrop_dl.utils.logger import log_spacer +from cyberdrop_dl.progress.ui import DownloadsPanel, ScrapingPanel if TYPE_CHECKING: from collections.abc import AsyncGenerator @@ -51,11 +50,13 @@ def __repr__(self) -> str: return f"{type(self).__name__}(msg={self.msg!r})" -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass(slots=True, frozen=True) class UILayouts: horizontal: Layout vertical: Layout simple: Group + hashing: RenderableType + sorting: RenderableType @classmethod def build(cls, progress: ProgressManager) -> Self: @@ -66,14 +67,14 @@ def build(cls, progress: ProgressManager) -> Self: _ = activity.add_task(f"Running Cyberdrop-DL: v{__version__}", total=100, completed=0) upper_layouts = ( - Layout(progress.download_progress, name="Files", ratio=1, minimum_size=9), - Layout(progress.scrape_stats_progress, name="Scrape Failures", ratio=1), - Layout(progress.download_stats_progress, name="Download Failures", ratio=1), + Layout(progress.files, name="Files", ratio=1, minimum_size=9), + Layout(progress.scrape_errors, name="Scrape Failures", ratio=1), + Layout(progress.download_errors, name="Download Failures", ratio=1), ) lower_layouts = ( - Layout(progress.scraping_progress, name=progress.scraping_progress.title, ratio=20), - Layout(progress.file_progress, name=progress.file_progress.title, ratio=20), + Layout(progress.scrape, name=progress.scrape.title, ratio=20), + Layout(progress.downloads, name=progress.downloads.title, ratio=20), Layout(Columns([activity, progress.status.progress]), name="status_message", ratio=2), ) @@ -83,29 +84,30 @@ def build(cls, progress: ProgressManager) -> Self: horizontal["upper"].split_row(*upper_layouts) vertical["upper"].split_column(*upper_layouts) - simple = Group(activity, progress.download_progress.simple_progress) - return cls(horizontal, vertical, simple) + simple = Group(activity, progress.files.simple_progress) + return cls(horizontal, vertical, simple, progress.hashing, progress.sorting) +@dataclasses.dataclass(slots=True) class ProgressManager: - def __init__(self, manager: Manager) -> None: - self.manager = manager + manager: Manager + + portrait: bool - self.portrait = True - self.file_progress = DownloadsPanel() - self.scraping_progress = ScrapingPanel() - self.status = StatusMessage() + layouts: UILayouts = dataclasses.field(init=False) + status: StatusMessage = dataclasses.field(default_factory=StatusMessage) - self.download_progress: DownloadsProgress = DownloadsProgress() - self.download_stats_progress: DownloadErrors = DownloadErrors() - self.scrape_stats_progress: ScrapeErrors = ScrapeErrors() - self.hash_progress: HashProgress = HashProgress(manager) - self.sorting: SortingPanel = SortingPanel(1) + downloads: DownloadsPanel = dataclasses.field(default_factory=DownloadsPanel) + scrape: ScrapingPanel = dataclasses.field(default_factory=ScrapingPanel) + hashing: HashingPanel = dataclasses.field(default_factory=HashingPanel) + sorting: SortingPanel = dataclasses.field(default_factory=SortingPanel) + files: FileStats = dataclasses.field(default_factory=FileStats) + download_errors: DownloadErrors = dataclasses.field(default_factory=DownloadErrors) + scrape_errors: ScrapeErrors = dataclasses.field(default_factory=ScrapeErrors) + + def __post_init__(self) -> None: self.layouts = UILayouts.build(self) - self.hash_remove_layout = self.hash_progress.get_removed_progress() - self.hash_layout = self.hash_progress.get_renderable() - self.sort_layout = self.sorting.get_renderable() @asynccontextmanager async def show_status_msg(self, msg: str | None) -> AsyncGenerator[None]: @@ -125,6 +127,8 @@ def print_stats(self, start_time: float) -> None: """Prints the stats of the program.""" # if not self.manager.parsed_args.cli_only_args.print_stats: # return + from cyberdrop_dl.utils.logger import log_spacer + end_time = time.perf_counter() runtime = timedelta(seconds=int(end_time - start_time)) total_data_written = ByteSize(self.manager.storage_manager.total_data_written).human_readable(decimal=True) @@ -140,14 +144,14 @@ def print_stats(self, start_time: float) -> None: logger.info(f" Total Downloaded Data: {total_data_written}") logger.info("Download Stats:") - logger.info(f" Downloaded: {self.download_progress.completed_files:,} files") - logger.info(f" Skipped (By Config): {self.download_progress.skipped_files:,} files") - logger.info(f" Skipped (Previously Downloaded): {self.download_progress.previously_completed:,} files") - logger.info(f" Failed: {self.download_stats_progress.failed_files:,} files") + logger.info(f" Downloaded: {self.files.completed_files:,} files") + logger.info(f" Skipped (By Config): {self.files.skipped_files:,} files") + logger.info(f" Skipped (Previously Downloaded): {self.files.previously_completed:,} files") + logger.info(f" Failed: {self.download_errors.failed_files:,} files") logger.info("Unsupported URLs Stats:") - logger.info(f" Sent to Jdownloader: {self.scrape_stats_progress.sent_to_jdownloader:,}") - logger.info(f" Skipped: {self.scrape_stats_progress.unsupported_urls_skipped:,}") + logger.info(f" Sent to Jdownloader: {self.scrape_errors.sent_to_jdownloader:,}") + logger.info(f" Skipped: {self.scrape_errors.unsupported_urls_skipped:,}") self.print_dedupe_stats() @@ -157,14 +161,14 @@ def print_stats(self, start_time: float) -> None: logger.info(f" Videos: {self.sorting.video_count:,}") logger.info(f" Other Files: {self.sorting.other_count:,}") - last_padding = log_failures(self.scrape_stats_progress.return_totals(), "Scrape Failures:") - log_failures(self.download_stats_progress.return_totals(), "Download Failures:", last_padding) + last_padding = log_failures(self.scrape_errors.return_totals(), "Scrape Failures:") + log_failures(self.download_errors.return_totals(), "Download Failures:", last_padding) def print_dedupe_stats(self) -> None: logger.info("Dupe Stats:") - logger.info(f" Newly Hashed: {self.hash_progress.hashed_files:,} files") - logger.info(f" Previously Hashed: {self.hash_progress.prev_hashed_files:,} files") - logger.info(f" Removed (Downloads): {self.hash_progress.removed_files:,} files") + logger.info(f" Newly Hashed: {self.hashing.hashed_files:,} files") + logger.info(f" Previously Hashed: {self.hashing.prev_hashed_files:,} files") + logger.info(f" Removed (Downloads): {self.hashing.removed_files:,} files") def log_failures(failures: list[UIFailure], title: str = "Failures:", last_padding: int = 0) -> int: diff --git a/cyberdrop_dl/progress/_common.py b/cyberdrop_dl/progress/_common.py new file mode 100644 index 000000000..1cfc4c0cf --- /dev/null +++ b/cyberdrop_dl/progress/_common.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +import dataclasses +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from rich.progress import TaskID + + +@dataclasses.dataclass(slots=True) +class TaskCounter: + id: TaskID + count: int = 0 + + +class TasksMap(dict[str, TaskCounter]): + def __getattr__(self, name: str, /) -> TaskCounter: + return self[name] diff --git a/cyberdrop_dl/progress/errors.py b/cyberdrop_dl/progress/errors.py index 94920ec34..f06945325 100644 --- a/cyberdrop_dl/progress/errors.py +++ b/cyberdrop_dl/progress/errors.py @@ -7,28 +7,6 @@ from rich.panel import Panel from rich.progress import BarColumn, Progress, TaskID -_FAILURE_OVERRIDES = { - "ClientConnectorCertificateError": "Client Connector Certificate Error", - "ClientConnectorDNSError": "Client Connector DNS Error", - "ClientConnectorError": "Client Connector Error", - "ClientConnectorSSLError": "Client Connector SSL Error", - "ClientHttpProxyError": "Client HTTP Proxy Error", - "ClientPayloadError": "Client Payload Error", - "ClientProxyConnectionError": "Client Proxy Connection Error", - "ConnectionTimeoutError": "Connection Timeout", - "ContentTypeError": "Content Type Error", - "InvalidURL": "Invalid URL", - "InvalidUrlClientError": "Invalid URL Client Error", - "InvalidUrlRedirectClientError": "Invalid URL Redirect", - "NonHttpUrlRedirectClientError": "Non HTTP URL Redirect", - "RedirectClientError": "Redirect Error", - "ServerConnectionError": "Server Connection Error", - "ServerDisconnectedError": "Server Disconnected", - "ServerFingerprintMismatch": "Server Fingerprint Mismatch", - "ServerTimeoutError": "Server Timeout Error", - "SocketTimeoutError": "Socket Timeout Error", -} - class TaskInfo(NamedTuple): id: TaskID @@ -179,3 +157,26 @@ def cap(word: str) -> str: return word[0].capitalize() + word[1:] return " ".join([cap(word) for word in text.split()]) + + +_FAILURE_OVERRIDES = { + "ClientConnectorCertificateError": "Client Connector Certificate Error", + "ClientConnectorDNSError": "Client Connector DNS Error", + "ClientConnectorError": "Client Connector Error", + "ClientConnectorSSLError": "Client Connector SSL Error", + "ClientHttpProxyError": "Client HTTP Proxy Error", + "ClientPayloadError": "Client Payload Error", + "ClientProxyConnectionError": "Client Proxy Connection Error", + "ConnectionTimeoutError": "Connection Timeout", + "ContentTypeError": "Content Type Error", + "InvalidURL": "Invalid URL", + "InvalidUrlClientError": "Invalid URL Client Error", + "InvalidUrlRedirectClientError": "Invalid URL Redirect", + "NonHttpUrlRedirectClientError": "Non HTTP URL Redirect", + "RedirectClientError": "Redirect Error", + "ServerConnectionError": "Server Connection Error", + "ServerDisconnectedError": "Server Disconnected", + "ServerFingerprintMismatch": "Server Fingerprint Mismatch", + "ServerTimeoutError": "Server Timeout Error", + "SocketTimeoutError": "Socket Timeout Error", +} diff --git a/cyberdrop_dl/progress/downloads_progress.py b/cyberdrop_dl/progress/files.py similarity index 58% rename from cyberdrop_dl/progress/downloads_progress.py rename to cyberdrop_dl/progress/files.py index f17b4ec63..4e128ec59 100644 --- a/cyberdrop_dl/progress/downloads_progress.py +++ b/cyberdrop_dl/progress/files.py @@ -1,47 +1,19 @@ from __future__ import annotations -import dataclasses - from rich.panel import Panel -from rich.progress import BarColumn, Progress, TaskID - -from cyberdrop_dl import signature - - -class SimpleProgress(Progress): - """A progress with a single task""" - - @signature.copy(Progress.__init__) - def __init__(self, *args, **kwargs) -> None: - super().__init__(*args, **kwargs) - self._task_id: TaskID | None = None - - @signature.copy(Progress.add_task) - def add_task(self, *args, **kwargs) -> TaskID: - assert self._task_id is None - self._task_id = super().add_task(*args, **kwargs) - return self._task_id - - -@dataclasses.dataclass(slots=True) -class Tracker: - id: TaskID - count: int = 0 - +from rich.progress import BarColumn, Progress -class Tasks(dict[str, Tracker]): - def __getattr__(self, name: str, /) -> Tracker: - return self[name] +from cyberdrop_dl.progress._common import TaskCounter, TasksMap -class DownloadsProgress: +class FileStats: """Class that keeps track of completed, skipped and failed files.""" def __repr__(self) -> str: return f"{type(self).__name__}({vars(self)!r})" def __init__(self) -> None: - self.progress = Progress( + self._progress = Progress( "[progress.description]{task.description}", BarColumn(bar_width=None), "[progress.percentage]{task.percentage:>6.2f}%", @@ -52,7 +24,7 @@ def __init__(self) -> None: self._total_files = 0 self._panel = Panel( - self.progress, + self._progress, title="Files", border_style="green", padding=(1, 1), @@ -66,7 +38,7 @@ def __init__(self) -> None: "{task.completed:,}", ) - self._tasks = Tasks() + self._tasks = TasksMap() for name, color, desc in ( ("completed", "green", "Completed"), @@ -75,9 +47,9 @@ def __init__(self) -> None: ("queued", "cyan", "Queued"), ("failed", "red", "Failed"), ): - self._tasks[name] = Tracker(self.progress.add_task(f"[{color}]{desc}", total=0)) + self._tasks[name] = TaskCounter(self._progress.add_task(f"[{color}]{desc}", total=0)) - self._tasks["simple"] = Tracker(self.simple_progress.add_task("Completed", total=0)) + self._tasks["simple"] = TaskCounter(self.simple_progress.add_task("Completed", total=0)) def __rich__(self) -> Panel: return self._panel @@ -88,11 +60,11 @@ def update_total(self, increase_total: bool = True) -> None: return self._total_files = self._total_files + 1 - self.progress.update(self._tasks.completed.id, total=self._total_files) - self.progress.update(self._tasks.previously_completed.id, total=self._total_files) - self.progress.update(self._tasks.skipped.id, total=self._total_files) - self.progress.update(self._tasks.failed.id, total=self._total_files) - self.progress.update(self._tasks.queued.id, total=self._total_files) + self._progress.update(self._tasks.completed.id, total=self._total_files) + self._progress.update(self._tasks.previously_completed.id, total=self._total_files) + self._progress.update(self._tasks.skipped.id, total=self._total_files) + self._progress.update(self._tasks.failed.id, total=self._total_files) + self._progress.update(self._tasks.queued.id, total=self._total_files) self.simple_progress.update( self._tasks.simple.id, total=self._total_files, @@ -100,7 +72,7 @@ def update_total(self, increase_total: bool = True) -> None: ) def add_completed(self) -> None: - self.progress.advance(self._tasks.completed.id) + self._progress.advance(self._tasks.completed.id) self._tasks.completed.count += 1 def add_previously_completed(self, increase_total: bool = True) -> None: @@ -108,19 +80,19 @@ def add_previously_completed(self, increase_total: bool = True) -> None: self.update_total() self._tasks.previously_completed.count += 1 - self.progress.advance(self._tasks.previously_completed.id) + self._progress.advance(self._tasks.previously_completed.id) def add_skipped(self) -> None: - self.progress.advance(self._tasks.skipped.id) + self._progress.advance(self._tasks.skipped.id) self._tasks.skipped.count += 1 def add_failed(self) -> None: - self.progress.advance(self._tasks.failed.id) + self._progress.advance(self._tasks.failed.id) self._tasks.failed.count += 1 - def update_queued(self, number: int) -> None: - self._tasks.queued.count = number - self.progress.update(self._tasks.queued.id, completed=self._tasks.queued.count) + def update_queued(self, count: int) -> None: + self._tasks.queued.count = count + self._progress.update(self._tasks.queued.id, completed=count) @property def skipped_files(self) -> int: diff --git a/cyberdrop_dl/progress/hash_progress.py b/cyberdrop_dl/progress/hash_progress.py deleted file mode 100644 index 400190da1..000000000 --- a/cyberdrop_dl/progress/hash_progress.py +++ /dev/null @@ -1,125 +0,0 @@ -from __future__ import annotations - -import contextlib -from pathlib import Path -from typing import TYPE_CHECKING - -from pydantic import ByteSize -from rich.console import Group -from rich.markup import escape -from rich.panel import Panel -from rich.progress import BarColumn, Progress, TaskID - -if TYPE_CHECKING: - from cyberdrop_dl.managers import Manager - - -def _generic_progress() -> Progress: - return Progress("[progress.description]{task.description}", BarColumn(bar_width=None), "{task.completed:,}") - - -class HashProgress: - """Class that keeps track of hashed files.""" - - def __init__(self, manager: Manager) -> None: - self.manager = manager - self._hash_progress = _generic_progress() - self._remove_progress = _generic_progress() - self._match_progress = _generic_progress() - self._file_info = Progress("{task.description}") - self._base_dir: Path | None = None - - # hashing - self._computed_hashes = self._prev_hashed = 0 - self.hash_progress_group = Group(self._file_info, self._hash_progress) - - self._tasks: dict[str, TaskID] = {} - - def add_hashed_task(hash_type: str) -> None: - desc = "[green]Hashed " + escape(f"[{hash_type}]") - self._tasks[hash_type] = self._hash_progress.add_task(desc, total=None) - - add_hashed_task("xxh128") - if manager.config.dupe_cleanup_options.add_md5_hash: - add_hashed_task("md5") - if manager.config.dupe_cleanup_options.add_sha256_hash: - add_hashed_task("sha256") - - self.prev_hashed_files_task_id = self._hash_progress.add_task("[green]Previously Hashed", total=None) - - self._base_dir_task_id = self._file_info.add_task("") - self._file_task_id = self._file_info.add_task("") - - # remove - self.removed_files = 0 - self.removed_progress_group = Group(self._match_progress, self._remove_progress) - self.removed_files_task_id = self._remove_progress.add_task( - "[green]Removed From Downloaded Files", - total=None, - ) - - @property - def hashed_files(self) -> int: - return int(self._computed_hashes / len(self._tasks)) - - @property - def prev_hashed_files(self) -> int: - return int(self._prev_hashed / len(self._tasks)) - - def get_renderable(self) -> Panel: - """Returns the progress bar.""" - return Panel( - self.hash_progress_group, - title=f"Config: {self.manager.config_manager.loaded_config}", - border_style="green", - padding=(1, 1), - ) - - def get_removed_progress(self) -> Panel: - """Returns the progress bar.""" - return Panel(self.removed_progress_group, border_style="green", padding=(1, 1)) - - @contextlib.contextmanager - def currently_hashing_dir(self, path: Path): - self._base_dir = path - desc = "[green]Base dir: [blue]" + escape(f"{self._base_dir}") - self._file_info.update(self._base_dir_task_id, description=desc) - try: - yield - finally: - self._base_dir = None - self._file_info.update(self._base_dir_task_id, description="") - - def update_currently_hashing(self, file: Path) -> None: - if not self._base_dir: - return - file_size = ByteSize(Path(file).stat().st_size) - size_text = file_size.human_readable(decimal=True) - path = file.relative_to(self._base_dir) - desc = "[green]Current file: [blue]" + escape(f"{path}") + f" [green]({size_text})" - self._file_info.update(self._file_task_id, description=desc) - - def add_new_completed_hash(self, hash_type: str) -> None: - """Adds a completed file to the progress bar.""" - self._hash_progress.advance(self._tasks[hash_type], 1) - self._computed_hashes += 1 - - def add_prev_hash(self) -> None: - """Adds a completed file to the progress bar.""" - self._hash_progress.advance(self.prev_hashed_files_task_id, 1) - self._prev_hashed += 1 - - def add_removed_file(self) -> None: - """Adds a removed file to the progress bar.""" - self._remove_progress.advance(self.removed_files_task_id, 1) - self.removed_files += 1 - - def reset(self): - """Resets the progress bar.""" - for task in self._tasks.values(): - self._hash_progress.reset(task) - self._hash_progress.reset(self.prev_hashed_files_task_id) - self._computed_hashes = self._prev_hashed = 0 - - self._remove_progress.reset(self.removed_files_task_id) - self.removed_files = 0 diff --git a/cyberdrop_dl/progress/hashing.py b/cyberdrop_dl/progress/hashing.py new file mode 100644 index 000000000..6f7661b3b --- /dev/null +++ b/cyberdrop_dl/progress/hashing.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +import asyncio +import contextlib +from contextvars import ContextVar +from pathlib import Path +from typing import TYPE_CHECKING + +from pydantic import ByteSize +from rich.console import Group +from rich.markup import escape +from rich.panel import Panel +from rich.progress import BarColumn, Progress + +from cyberdrop_dl import config + +from ._common import TaskCounter, TasksMap + +if TYPE_CHECKING: + from collections.abc import Generator + + +def _get_enabled_hashes(): + yield "xxh128" + if config.get().dupe_cleanup_options.add_md5_hash: + yield "md5" + if config.get().dupe_cleanup_options.add_sha256_hash: + yield "sha256" + + +_base_dir: ContextVar[Path] = ContextVar("_base_dir") + + +class HashingPanel: + """Class that keeps track of hashed files.""" + + def __init__(self) -> None: + self._hash_progress = Progress( + "[progress.description]{task.description}", BarColumn(bar_width=None), "{task.completed:,}" + ) + self._progress = Progress("{task.description}") + self._enabled_hashes: tuple[str, ...] = tuple(_get_enabled_hashes()) + self._computed_hashes: int = 0 + self._prev_hashed: int = 0 + self._tasks: TasksMap = TasksMap() + + for hash_type in self._enabled_hashes: + desc = "[green]Hashed " + escape(f"[{hash_type}]") + self._tasks[hash_type] = TaskCounter(self._hash_progress.add_task(desc, total=None)) + + self._tasks.update( + prev_hashed=TaskCounter(self._hash_progress.add_task("[green]Previously Hashed", total=None)), + removed=TaskCounter(self._progress.add_task("", visible=False)), + base_dir=TaskCounter(self._progress.add_task("")), + file=TaskCounter(self._progress.add_task("")), + ) + + self._panel = Panel( + Group(self._progress, self._hash_progress), + title="Hashing", + border_style="green", + padding=(1, 1), + ) + + @property + def hashed_files(self) -> int: + return int(self._computed_hashes / len(self._enabled_hashes)) + + @property + def prev_hashed_files(self) -> int: + return int(self._prev_hashed / len(self._enabled_hashes)) + + @property + def removed_files(self) -> int: + return self._tasks["removed"].count + + def __rich__(self) -> Panel: + return self._panel + + @contextlib.contextmanager + def currently_hashing_dir(self, path: Path) -> Generator[None]: + token = _base_dir.set(path) + + desc = "[green]Base dir: [blue]" + escape(str(path)) + self._progress.update(self._tasks["base_dir"].id, description=desc) + try: + yield + finally: + _base_dir.reset(token) + self._progress.update(self._tasks["base_dir"].id, description="") + + async def update_currently_hashing(self, file: Path | str) -> None: + file = Path(file) + size = await asyncio.to_thread(lambda *_: file.stat().st_size) + size_text = ByteSize(size).human_readable(decimal=True) + path = file.relative_to(_base_dir.get()) + self._progress.update( + self._tasks["file"].id, + description="[green]Current file: [blue]" + escape(f"{path}") + f" [green]({size_text})", + ) + + def add_new_completed_hash(self, hash_type: str) -> None: + self._hash_progress.advance(self._tasks[hash_type].id) + self._tasks[hash_type].count += 1 + + def add_prev_hash(self) -> None: + self._hash_progress.advance(self._tasks["prev_hashed"].id) + self._tasks["prev_hashed"].count += 1 + + def add_removed_file(self) -> None: + self._hash_progress.advance(self._tasks["removed"].id) + self._tasks["removed"].count += 1 diff --git a/cyberdrop_dl/progress/sorting.py b/cyberdrop_dl/progress/sorting.py index 32ebd6b03..cb4e3e9e1 100644 --- a/cyberdrop_dl/progress/sorting.py +++ b/cyberdrop_dl/progress/sorting.py @@ -2,7 +2,7 @@ from rich.progress import BarColumn, Progress, SpinnerColumn, TaskID -from cyberdrop_dl.progress.panels import UIPanel +from cyberdrop_dl.progress.ui import UIPanel class SortingPanel(UIPanel): @@ -11,7 +11,7 @@ class SortingPanel(UIPanel): title = "Sorting" name = "Folders" - def __init__(self, visible_tasks_limit: int) -> None: + def __init__(self) -> None: progress = Progress( SpinnerColumn(), "[progress.description]{task.description}", @@ -20,7 +20,7 @@ def __init__(self, visible_tasks_limit: int) -> None: "━", "{task.completed}/{task.total} files", ) - super().__init__(progress, visible_tasks_limit) + super().__init__(progress, visible_tasks_limit=1) self.audio_count = self.video_count = self.image_count = self.other_count = 0 diff --git a/cyberdrop_dl/progress/panels.py b/cyberdrop_dl/progress/ui.py similarity index 100% rename from cyberdrop_dl/progress/panels.py rename to cyberdrop_dl/progress/ui.py diff --git a/cyberdrop_dl/scraper/scrape_mapper.py b/cyberdrop_dl/scraper/scrape_mapper.py index 51fa75d20..8a6a09fe6 100644 --- a/cyberdrop_dl/scraper/scrape_mapper.py +++ b/cyberdrop_dl/scraper/scrape_mapper.py @@ -252,7 +252,7 @@ async def send_to_crawler(self, scrape_item: ScrapeItem) -> None: scrape_item.url, scrape_item.parents[0] if scrape_item.parents else None, ) - self.manager.progress_manager.scrape_stats_progress.add_unsupported(sent_to_jdownloader=success) + self.manager.progress_manager.scrape_errors.add_unsupported(sent_to_jdownloader=success) return log(f"Unsupported URL: {scrape_item.url}", 30) @@ -260,7 +260,7 @@ async def send_to_crawler(self, scrape_item: ScrapeItem) -> None: scrape_item.url, scrape_item.parents[0] if scrape_item.parents else None, ) - self.manager.progress_manager.scrape_stats_progress.add_unsupported() + self.manager.progress_manager.scrape_errors.add_unsupported() def filter_items(self, scrape_item: ScrapeItem) -> bool: """Pre-filter scrape items base on URL.""" diff --git a/cyberdrop_dl/utils/utilities.py b/cyberdrop_dl/utils/utilities.py index 68dae5a54..435a85928 100644 --- a/cyberdrop_dl/utils/utilities.py +++ b/cyberdrop_dl/utils/utilities.py @@ -139,7 +139,7 @@ def error_handling_context(self: Crawler | Downloader, item: ScrapeItem | MediaI log(f"Scrape Failed: {link_to_show} ({error_log_msg.main_log_msg})", 40, exc_info=exc_info) self.manager.log_manager.write_scrape_error_log(link_to_show, error_log_msg.csv_log_msg, origin) - self.manager.progress_manager.scrape_stats_progress.add_failure(error_log_msg.ui_failure) + self.manager.progress_manager.scrape_errors.add_failure(error_log_msg.ui_failure) @overload From b258a26d4bfb238e1dec8d2bed257b103389249f Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Mon, 23 Feb 2026 13:02:36 -0500 Subject: [PATCH 10/23] refactor: update progress --- cyberdrop_dl/clients/flaresolverr.py | 4 +- cyberdrop_dl/progress/__init__.py | 75 +++++++------- cyberdrop_dl/progress/_common.py | 49 ++++++++- cyberdrop_dl/progress/errors.py | 63 ++++-------- cyberdrop_dl/progress/files.py | 102 +++++++++---------- cyberdrop_dl/progress/hashing.py | 40 ++++---- cyberdrop_dl/progress/scrape.py | 144 +++++++++++++++++++++++++++ cyberdrop_dl/progress/sorting.py | 30 +++--- cyberdrop_dl/progress/ui.py | 132 ------------------------ 9 files changed, 333 insertions(+), 306 deletions(-) create mode 100644 cyberdrop_dl/progress/scrape.py delete mode 100644 cyberdrop_dl/progress/ui.py diff --git a/cyberdrop_dl/clients/flaresolverr.py b/cyberdrop_dl/clients/flaresolverr.py index 486245b15..8677b6690 100644 --- a/cyberdrop_dl/clients/flaresolverr.py +++ b/cyberdrop_dl/clients/flaresolverr.py @@ -150,9 +150,7 @@ async def _request(self, command: _Command, /, data: Any = None, **kwargs: Any) async with ( self._request_lock, - self.manager.progress_manager.show_status_msg( - f"Waiting For Flaresolverr Response [{self._next_request_id()}]" - ), + self.manager.progress_manager.status.show(f"Waiting For Flaresolverr Response [{self._next_request_id()}]"), ): async with self.manager.client_manager._session.post( self.url, diff --git a/cyberdrop_dl/progress/__init__.py b/cyberdrop_dl/progress/__init__.py index f4d9b8fcc..b4181d4c2 100644 --- a/cyberdrop_dl/progress/__init__.py +++ b/cyberdrop_dl/progress/__init__.py @@ -16,11 +16,12 @@ from yarl import URL from cyberdrop_dl import __version__, config +from cyberdrop_dl.progress._common import ProgressProxy from cyberdrop_dl.progress.errors import DownloadErrors, ScrapeErrors from cyberdrop_dl.progress.files import FileStats from cyberdrop_dl.progress.hashing import HashingPanel +from cyberdrop_dl.progress.scrape import DownloadsPanel, ScrapingPanel from cyberdrop_dl.progress.sorting import SortingPanel -from cyberdrop_dl.progress.ui import DownloadsPanel, ScrapingPanel if TYPE_CHECKING: from collections.abc import AsyncGenerator @@ -30,24 +31,41 @@ from cyberdrop_dl.progress.errors import UIFailure -spinner = SpinnerColumn(style="green", spinner_name="dots") logger = logging.getLogger(__name__) -class StatusMessage: +class StatusMessage(ProgressProxy): + _columns = ( + SpinnerColumn(style="green", spinner_name="dots"), + "[progress.description]{task.description}", + ) + def __init__(self) -> None: - self.progress: Progress = Progress(spinner, "[progress.description]{task.description}") - self._task_id = self.progress.add_task("", total=100, completed=0, visible=False) + super().__init__() + self.activity = Progress(*self._columns) + _ = self.activity.add_task(f"Running Cyberdrop-DL: v{__version__}", total=100, completed=0) + self._task_id = self._progress.add_task("", total=100, completed=0, visible=False) + self._panel = Columns([self.activity, self._progress]) + + def __rich__(self) -> Columns: + return self._panel def update(self, description: str | None = None) -> None: - self.progress.update(self._task_id, description=description, visible=bool(description)) + self._progress.update(self._task_id, description=description, visible=bool(description)) - @property - def msg(self) -> str: - return self.progress._tasks[self._task_id].description + def __str__(self) -> str: + return self._tasks[self._task_id].description def __repr__(self) -> str: - return f"{type(self).__name__}(msg={self.msg!r})" + return f"{type(self).__name__}(msg={self!s})" + + @asynccontextmanager + async def show(self, msg: str | None) -> AsyncGenerator[None]: + try: + self.update(msg) + yield + finally: + self.update() @dataclasses.dataclass(slots=True, frozen=True) @@ -63,28 +81,25 @@ def build(cls, progress: ProgressManager) -> Self: horizontal = Layout() vertical = Layout() - activity = Progress(spinner, "[progress.description]{task.description}") - _ = activity.add_task(f"Running Cyberdrop-DL: v{__version__}", total=100, completed=0) - - upper_layouts = ( - Layout(progress.files, name="Files", ratio=1, minimum_size=9), - Layout(progress.scrape_errors, name="Scrape Failures", ratio=1), - Layout(progress.download_errors, name="Download Failures", ratio=1), + top = ( + Layout(progress.files, ratio=1, minimum_size=9), + Layout(progress.scrape_errors, ratio=1), + Layout(progress.download_errors, ratio=1), ) - lower_layouts = ( - Layout(progress.scrape, name=progress.scrape.title, ratio=20), - Layout(progress.downloads, name=progress.downloads.title, ratio=20), - Layout(Columns([activity, progress.status.progress]), name="status_message", ratio=2), + bottom = ( + Layout(progress.scrape, ratio=20), + Layout(progress.downloads, ratio=20), + Layout(progress.status, ratio=2), ) - horizontal.split_column(Layout(name="upper", ratio=20), *lower_layouts) - vertical.split_column(Layout(name="upper", ratio=60), *lower_layouts) + horizontal.split_column(Layout(name="top", ratio=20), *bottom) + vertical.split_column(Layout(name="top", ratio=60), *bottom) - horizontal["upper"].split_row(*upper_layouts) - vertical["upper"].split_column(*upper_layouts) + horizontal["top"].split_row(*top) + vertical["top"].split_column(*top) - simple = Group(activity, progress.files.simple_progress) + simple = Group(progress.status.activity, progress.files.simple) return cls(horizontal, vertical, simple, progress.hashing, progress.sorting) @@ -109,14 +124,6 @@ class ProgressManager: def __post_init__(self) -> None: self.layouts = UILayouts.build(self) - @asynccontextmanager - async def show_status_msg(self, msg: str | None) -> AsyncGenerator[None]: - try: - self.status.update(msg) - yield - finally: - self.status.update() - @property def layout(self) -> Layout: if self.portrait: diff --git a/cyberdrop_dl/progress/_common.py b/cyberdrop_dl/progress/_common.py index 1cfc4c0cf..689c5ee14 100644 --- a/cyberdrop_dl/progress/_common.py +++ b/cyberdrop_dl/progress/_common.py @@ -4,15 +4,58 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: + from collections.abc import Callable + + from rich.console import RenderableType from rich.progress import TaskID +from types import MappingProxyType +from typing import TYPE_CHECKING, ClassVar + +from rich.markup import escape +from rich.progress import ( + Progress, + ProgressColumn, + Task, + TaskID, +) + + +def truncate(s: str, length: int = 40, placeholder: str = "...") -> str: + return f"{s[: length - len(placeholder)]}{placeholder}" if len(s) >= length else s.ljust(length) + + @dataclasses.dataclass(slots=True) class TaskCounter: id: TaskID count: int = 0 -class TasksMap(dict[str, TaskCounter]): - def __getattr__(self, name: str, /) -> TaskCounter: - return self[name] +@dataclasses.dataclass(slots=True, frozen=True) +class ProgressHook: + advance: Callable[[int], None] + done: Callable[[], None] + speed: Callable[[], float] + + def __enter__(self) -> Callable[[int], None]: + return self.advance + + def __exit__(self, *_) -> None: + self.done() + + +class ProgressProxy: + _columns: ClassVar[tuple[ProgressColumn | str, ...]] + + @classmethod + def _clean_task_desc(cls, desc: str) -> str: + return escape(truncate(desc.encode("ascii", "ignore").decode().strip())) + + def __init__(self) -> None: + self._progress: Progress = Progress(*self._columns) + self._tasks: MappingProxyType[TaskID, Task] = MappingProxyType(self._progress._tasks) + self._tasks_map: dict[str, TaskCounter] = {} + + def __rich__(self) -> RenderableType: + return self._progress diff --git a/cyberdrop_dl/progress/errors.py b/cyberdrop_dl/progress/errors.py index f06945325..6b8155d72 100644 --- a/cyberdrop_dl/progress/errors.py +++ b/cyberdrop_dl/progress/errors.py @@ -2,17 +2,11 @@ import dataclasses import functools -from typing import ClassVar, NamedTuple from rich.panel import Panel -from rich.progress import BarColumn, Progress, TaskID +from rich.progress import BarColumn, TaskID - -class TaskInfo(NamedTuple): - id: TaskID - description: str - completed: float - total: float | None +from cyberdrop_dl.progress._common import ProgressProxy @dataclasses.dataclass(slots=True, order=True) @@ -31,41 +25,25 @@ def __post_init__(self) -> None: self.msg = self.full_msg -def _get_tasks_info_sorted(progress: Progress) -> tuple[list[TaskInfo], bool]: - tasks = [ - TaskInfo( - id=task.id, - description=task.description, - completed=task.completed, - total=task.total, - ) - for task in progress.tasks - ] - - tasks_sorted = sorted(tasks, key=lambda x: x.completed, reverse=True) - were_sorted = tasks == tasks_sorted - return tasks_sorted, were_sorted - - -class _ErrorsPanel: +class _ErrorsPanel(ProgressProxy): """Base class that keeps track of errors and reasons.""" - title: ClassVar[str] + _columns = ( + "[progress.description]{task.description}", + BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>6.2f}%", + "━", + "{task.completed:,}", + ) def __repr__(self) -> str: return f"{type(self).__name__}(failed_files={self.failed_files!r}, failures={self._failures.keys()!r})" def __init__(self) -> None: - self._progress = Progress( - "[progress.description]{task.description}", - BarColumn(bar_width=None), - "[progress.percentage]{task.percentage:>6.2f}%", - "━", - "{task.completed:,}", - ) - + super().__init__() + self.title = type(self).__name__.removesuffix("Errors") + " Failures" self._failures: dict[str, TaskID] = {} - self.failed_files = 0 + self.failed_files: int = 0 self._panel = Panel( self._progress, title=self.title, @@ -96,14 +74,13 @@ def _redraw(self) -> None: for task_id in self._failures.values(): self._progress.update(task_id, total=self.failed_files) - tasks_sorted, were_sorted = _get_tasks_info_sorted(self._progress) - if were_sorted: + tasks = list(self._tasks.values()) + tasks_sorted = sorted(tasks, key=lambda x: x.completed, reverse=True) + if tasks == tasks_sorted: return for task in tasks_sorted: self._progress.remove_task(task.id) - - for task in tasks_sorted: self._failures[task.description] = self._progress.add_task( task.description, total=task.total, @@ -113,22 +90,16 @@ def _redraw(self) -> None: def return_totals(self) -> list[UIFailure]: """Returns the total number of failed sites and reasons.""" - return sorted( - UIFailure(msg, int(self._progress._tasks[task_id].completed)) for msg, task_id in self._failures.items() - ) + return sorted(UIFailure(msg, int(self._tasks[task_id].completed)) for msg, task_id in self._failures.items()) class DownloadErrors(_ErrorsPanel): """Class that keeps track of download failures and reasons.""" - title: ClassVar[str] = "Download Failures" - class ScrapeErrors(_ErrorsPanel): """Class that keeps track of scraping failures and reasons.""" - title = "Scrape Failures" - def __init__(self) -> None: super().__init__() self.unsupported_urls: int = 0 diff --git a/cyberdrop_dl/progress/files.py b/cyberdrop_dl/progress/files.py index 4e128ec59..293cfcc26 100644 --- a/cyberdrop_dl/progress/files.py +++ b/cyberdrop_dl/progress/files.py @@ -3,42 +3,26 @@ from rich.panel import Panel from rich.progress import BarColumn, Progress -from cyberdrop_dl.progress._common import TaskCounter, TasksMap +from cyberdrop_dl.progress._common import ProgressProxy, TaskCounter -class FileStats: +class FileStats(ProgressProxy): """Class that keeps track of completed, skipped and failed files.""" + _columns = ( + "[progress.description]{task.description}", + BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>6.2f}%", + "━", + "{task.completed:,}", + ) + def __repr__(self) -> str: return f"{type(self).__name__}({vars(self)!r})" def __init__(self) -> None: - self._progress = Progress( - "[progress.description]{task.description}", - BarColumn(bar_width=None), - "[progress.percentage]{task.percentage:>6.2f}%", - "━", - "{task.completed:,}", - ) - - self._total_files = 0 - - self._panel = Panel( - self._progress, - title="Files", - border_style="green", - padding=(1, 1), - subtitle=f"Total Files: [white]{self._total_files:,}", - ) - self.simple_progress = Progress( - "[progress.description]{task.description}", - BarColumn(bar_width=None), - "[progress.percentage]{task.percentage:>6.2f}%", - "━", - "{task.completed:,}", - ) - - self._tasks = TasksMap() + super().__init__() + self._total_files: int = 0 for name, color, desc in ( ("completed", "green", "Completed"), @@ -47,65 +31,77 @@ def __init__(self) -> None: ("queued", "cyan", "Queued"), ("failed", "red", "Failed"), ): - self._tasks[name] = TaskCounter(self._progress.add_task(f"[{color}]{desc}", total=0)) + self._tasks_map[name] = TaskCounter(self._progress.add_task(f"[{color}]{desc}", total=0)) - self._tasks["simple"] = TaskCounter(self.simple_progress.add_task("Completed", total=0)) + self.simple: Progress = Progress(*self._columns) + self._tasks_map["simple"] = TaskCounter(self.simple.add_task("Completed", total=0)) + self._panel = Panel( + self._progress, + title="Files", + border_style="green", + padding=(1, 1), + subtitle=self.subtitle, + ) def __rich__(self) -> Panel: return self._panel + @property + def subtitle(self) -> str: + return f"Total Files: [white]{self._total_files:,}" + def update_total(self, increase_total: bool = True) -> None: - self._panel.subtitle = f"Total Files: [white]{self._total_files:,}" + self._panel.subtitle = self.subtitle if not increase_total: return self._total_files = self._total_files + 1 - self._progress.update(self._tasks.completed.id, total=self._total_files) - self._progress.update(self._tasks.previously_completed.id, total=self._total_files) - self._progress.update(self._tasks.skipped.id, total=self._total_files) - self._progress.update(self._tasks.failed.id, total=self._total_files) - self._progress.update(self._tasks.queued.id, total=self._total_files) - self.simple_progress.update( - self._tasks.simple.id, + self._progress.update(self._tasks_map["completed"].id, total=self._total_files) + self._progress.update(self._tasks_map["previously_completed"].id, total=self._total_files) + self._progress.update(self._tasks_map["skipped"].id, total=self._total_files) + self._progress.update(self._tasks_map["failed"].id, total=self._total_files) + self._progress.update(self._tasks_map["queued"].id, total=self._total_files) + self.simple.update( + self._tasks_map["simple"].id, total=self._total_files, - completed=self._total_files - self._tasks.queued.count, + completed=self._total_files - self._tasks_map["queued"].count, ) def add_completed(self) -> None: - self._progress.advance(self._tasks.completed.id) - self._tasks.completed.count += 1 + self._progress.advance(self._tasks_map["completed"].id) + self._tasks_map["completed"].count += 1 def add_previously_completed(self, increase_total: bool = True) -> None: if increase_total: self.update_total() - self._tasks.previously_completed.count += 1 - self._progress.advance(self._tasks.previously_completed.id) + self._tasks_map["previously_completed"].count += 1 + self._progress.advance(self._tasks_map["previously_completed"].id) def add_skipped(self) -> None: - self._progress.advance(self._tasks.skipped.id) - self._tasks.skipped.count += 1 + self._progress.advance(self._tasks_map["skipped"].id) + self._tasks_map["skipped"].count += 1 def add_failed(self) -> None: - self._progress.advance(self._tasks.failed.id) - self._tasks.failed.count += 1 + self._progress.advance(self._tasks_map["failed"].id) + self._tasks_map["failed"].count += 1 def update_queued(self, count: int) -> None: - self._tasks.queued.count = count - self._progress.update(self._tasks.queued.id, completed=count) + self._tasks_map["queued"].count = count + self._progress.update(self._tasks_map["queued"].id, completed=count) @property def skipped_files(self) -> int: - return self._tasks.skipped.count + return self._tasks_map["skipped"].count @property def failed_files(self) -> int: - return self._tasks.failed.count + return self._tasks_map["failed"].count @property def completed_files(self) -> int: - return self._tasks.completed.count + return self._tasks_map["completed"].count @property def previously_completed(self) -> int: - return self._tasks.previously_completed.count + return self._tasks_map["previously_completed"].count diff --git a/cyberdrop_dl/progress/hashing.py b/cyberdrop_dl/progress/hashing.py index 6f7661b3b..28ac2f131 100644 --- a/cyberdrop_dl/progress/hashing.py +++ b/cyberdrop_dl/progress/hashing.py @@ -14,7 +14,7 @@ from cyberdrop_dl import config -from ._common import TaskCounter, TasksMap +from ._common import ProgressProxy, TaskCounter if TYPE_CHECKING: from collections.abc import Generator @@ -31,24 +31,25 @@ def _get_enabled_hashes(): _base_dir: ContextVar[Path] = ContextVar("_base_dir") -class HashingPanel: +class HashingPanel(ProgressProxy): """Class that keeps track of hashed files.""" + _columns = ("{task.description}",) + def __init__(self) -> None: + super().__init__() self._hash_progress = Progress( "[progress.description]{task.description}", BarColumn(bar_width=None), "{task.completed:,}" ) - self._progress = Progress("{task.description}") self._enabled_hashes: tuple[str, ...] = tuple(_get_enabled_hashes()) self._computed_hashes: int = 0 self._prev_hashed: int = 0 - self._tasks: TasksMap = TasksMap() for hash_type in self._enabled_hashes: desc = "[green]Hashed " + escape(f"[{hash_type}]") - self._tasks[hash_type] = TaskCounter(self._hash_progress.add_task(desc, total=None)) + self._tasks_map[hash_type] = TaskCounter(self._hash_progress.add_task(desc, total=None)) - self._tasks.update( + self._tasks_map.update( prev_hashed=TaskCounter(self._hash_progress.add_task("[green]Previously Hashed", total=None)), removed=TaskCounter(self._progress.add_task("", visible=False)), base_dir=TaskCounter(self._progress.add_task("")), @@ -62,6 +63,9 @@ def __init__(self) -> None: padding=(1, 1), ) + def __rich__(self) -> Panel: + return self._panel + @property def hashed_files(self) -> int: return int(self._computed_hashes / len(self._enabled_hashes)) @@ -72,22 +76,18 @@ def prev_hashed_files(self) -> int: @property def removed_files(self) -> int: - return self._tasks["removed"].count - - def __rich__(self) -> Panel: - return self._panel + return self._tasks_map["removed"].count @contextlib.contextmanager def currently_hashing_dir(self, path: Path) -> Generator[None]: token = _base_dir.set(path) - desc = "[green]Base dir: [blue]" + escape(str(path)) - self._progress.update(self._tasks["base_dir"].id, description=desc) + self._progress.update(self._tasks_map["base_dir"].id, description=desc) try: yield finally: _base_dir.reset(token) - self._progress.update(self._tasks["base_dir"].id, description="") + self._progress.update(self._tasks_map["base_dir"].id, description="") async def update_currently_hashing(self, file: Path | str) -> None: file = Path(file) @@ -95,18 +95,18 @@ async def update_currently_hashing(self, file: Path | str) -> None: size_text = ByteSize(size).human_readable(decimal=True) path = file.relative_to(_base_dir.get()) self._progress.update( - self._tasks["file"].id, + self._tasks_map["file"].id, description="[green]Current file: [blue]" + escape(f"{path}") + f" [green]({size_text})", ) def add_new_completed_hash(self, hash_type: str) -> None: - self._hash_progress.advance(self._tasks[hash_type].id) - self._tasks[hash_type].count += 1 + self._hash_progress.advance(self._tasks_map[hash_type].id) + self._tasks_map[hash_type].count += 1 def add_prev_hash(self) -> None: - self._hash_progress.advance(self._tasks["prev_hashed"].id) - self._tasks["prev_hashed"].count += 1 + self._hash_progress.advance(self._tasks_map["prev_hashed"].id) + self._tasks_map["prev_hashed"].count += 1 def add_removed_file(self) -> None: - self._hash_progress.advance(self._tasks["removed"].id) - self._tasks["removed"].count += 1 + self._hash_progress.advance(self._tasks_map["removed"].id) + self._tasks_map["removed"].count += 1 diff --git a/cyberdrop_dl/progress/scrape.py b/cyberdrop_dl/progress/scrape.py new file mode 100644 index 000000000..8384a89f5 --- /dev/null +++ b/cyberdrop_dl/progress/scrape.py @@ -0,0 +1,144 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, ClassVar + +from rich.console import Group +from rich.panel import Panel +from rich.progress import ( + BarColumn, + DownloadColumn, + SpinnerColumn, + TaskID, + TimeRemainingColumn, + TransferSpeedColumn, +) + +from cyberdrop_dl.progress._common import ProgressHook, ProgressProxy + +if TYPE_CHECKING: + from yarl import URL + +_COLOR: str = "plum3" + + +class OverFlow(ProgressProxy): + _desc: ClassVar[str] = "[{color}]... and {number:,} other {name}" + _columns = ("[progress.description]{task.description}",) + + def __init__(self, name: str) -> None: + super().__init__() + self.name: str = name + self._count: int = 0 + self._task_id: TaskID = self._progress.add_task(str(self), visible=False) + + def __str__(self) -> str: + return self._desc.format(color=_COLOR, number=self._count, name=self.name) + + def __repr__(self) -> str: + return f"{type(self).__name__}(desc={self!s})" + + def update(self, count: int) -> None: + self._count = count + self._progress.update(self._task_id, description=str(self), visible=count > 0) + + +class UIPanel(ProgressProxy): + unit: ClassVar[str] + _desc_fmt: ClassVar[str] = "[{color}]{description}" + + def __repr__(self) -> str: + return f"{type(self).__name__}(progress={self._progress!r})" + + def __init__(self, visible_tasks_limit: int) -> None: + super().__init__() + self.title = type(self).__name__.removesuffix("Panel") + self._overflow = OverFlow(self.unit) + self._limit = visible_tasks_limit + self._panel = Panel( + Group(self._progress, self._overflow), + title=self.title, + border_style="green", + padding=(1, 1), + ) + + def __rich__(self) -> Panel: + return self._panel + + def _redraw(self) -> None: + self._overflow.update(count=len(self._tasks) - self._limit) + + def add_task(self, description: str, total: float | None = None) -> TaskID: + task_id = self._progress.add_task( + self._desc_fmt.format(color=_COLOR, description=description), + total=total, + visible=len(self._tasks) < self._limit, + ) + self._redraw() + return task_id + + def remove_task(self, task_id: TaskID) -> None: + self._progress.remove_task(task_id) + self._redraw() + + def new_hook(self, description: object, total: float | None = None) -> ProgressHook: + task_id = self.add_task(str(description), total) + + def advance(amount: int) -> None: + self._advance(task_id, amount) + + def done() -> None: + self.remove_task(task_id) + + def speed() -> float: + return self.get_speed(task_id) + + return ProgressHook(advance, done, speed) + + def _advance(self, task_id: TaskID, amount: int) -> None: + self._progress.advance(task_id, amount) + + def get_speed(self, task_id: TaskID) -> float: + task = self._tasks[task_id] + return task.finished_speed or task.speed or 0 + + +class ScrapingPanel(UIPanel): + unit: ClassVar[str] = "URLs" + _columns = SpinnerColumn(), "[progress.description]{task.description}" + + def __init__(self) -> None: + super().__init__(visible_tasks_limit=5) + + def new_task(self, url: URL) -> TaskID: # type: ignore[reportIncompatibleMethodOverride] + return self.add_task(str(url)) + + +class DownloadsPanel(UIPanel): + unit: ClassVar[str] = "files" + _columns = ( + SpinnerColumn(), + "[progress.description]{task.description}", + BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>6.2f}%", + "━", + DownloadColumn(), + "━", + TransferSpeedColumn(), + "━", + TimeRemainingColumn(), + ) + + def __init__(self) -> None: + self.total_data_written: int = 0 + super().__init__(visible_tasks_limit=10) + + def new_task(self, *, domain: str, filename: str, expected_size: int | None = None) -> TaskID: # type: ignore[reportIncompatibleMethodOverride] + description = self._clean_task_desc(filename.rsplit("/", 1)[-1]) + return self.add_task(description, expected_size) + + def _advance(self, task_id: TaskID, amount: int) -> None: + self.total_data_written += amount + super()._advance(task_id, amount) + + def advance_file(self, task_id: TaskID, amount: int) -> None: + self._advance(task_id, amount) diff --git a/cyberdrop_dl/progress/sorting.py b/cyberdrop_dl/progress/sorting.py index cb4e3e9e1..3651ea52c 100644 --- a/cyberdrop_dl/progress/sorting.py +++ b/cyberdrop_dl/progress/sorting.py @@ -1,27 +1,27 @@ from __future__ import annotations -from rich.progress import BarColumn, Progress, SpinnerColumn, TaskID +from typing import ClassVar -from cyberdrop_dl.progress.ui import UIPanel +from rich.progress import BarColumn, SpinnerColumn, TaskID + +from cyberdrop_dl.progress.scrape import UIPanel class SortingPanel(UIPanel): """Class that keeps track of sorted files.""" - title = "Sorting" - name = "Folders" + unit: ClassVar[str] = "Folders" + _columns = ( + SpinnerColumn(), + "[progress.description]{task.description}", + BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>6.2f}%", + "━", + "{task.completed}/{task.total} files", + ) def __init__(self) -> None: - progress = Progress( - SpinnerColumn(), - "[progress.description]{task.description}", - BarColumn(bar_width=None), - "[progress.percentage]{task.percentage:>6.2f}%", - "━", - "{task.completed}/{task.total} files", - ) - super().__init__(progress, visible_tasks_limit=1) - + super().__init__(visible_tasks_limit=1) self.audio_count = self.video_count = self.image_count = self.other_count = 0 def new_task(self, folder: str, expected_size: int | None) -> TaskID: @@ -29,7 +29,7 @@ def new_task(self, folder: str, expected_size: int | None) -> TaskID: return super().add_task(description, expected_size) def advance_folder(self, task_id: TaskID, amount: int = 1) -> None: - self._progress.advance(task_id, amount) + self._advance(task_id, amount) def increment_audio(self) -> None: self.audio_count += 1 diff --git a/cyberdrop_dl/progress/ui.py b/cyberdrop_dl/progress/ui.py deleted file mode 100644 index 7c44a18bc..000000000 --- a/cyberdrop_dl/progress/ui.py +++ /dev/null @@ -1,132 +0,0 @@ -from __future__ import annotations - -from types import MappingProxyType -from typing import TYPE_CHECKING, ClassVar - -from rich.console import Group -from rich.markup import escape -from rich.panel import Panel -from rich.progress import ( - BarColumn, - DownloadColumn, - Progress, - SpinnerColumn, - TaskID, - TimeRemainingColumn, - TransferSpeedColumn, -) - -if TYPE_CHECKING: - from yarl import URL - -_COLOR: str = "plum3" - - -def truncate(s: str, length: int = 40, placeholder: str = "...") -> str: - return f"{s[: length - len(placeholder)]}{placeholder}" if len(s) >= length else s.ljust(length) - - -class OverFlow: - _desc: ClassVar[str] = "[{color}]... and {number:,} other {name}" - - def __init__(self, name: str) -> None: - self.name: str = name - self.progress: Progress = Progress("[progress.description]{task.description}") - self._task_id = self.progress.add_task(self._format(count=0), visible=False) - - def _format(self, count: int) -> str: - return self._desc.format(color=_COLOR, number=count, name=self.name) - - def update(self, count: int) -> None: - self.progress.update(self._task_id, description=self._format(count=count), visible=count > 0) - - -class UIPanel: - title: ClassVar[str] - type_str: ClassVar[str] = "files" - desc_fmt: ClassVar[str] = "[{color}]{description}" - - def __init__(self, progress: Progress, visible_tasks_limit: int) -> None: - self._progress = progress - self._overflow = OverFlow(self.type_str) - self._limit = visible_tasks_limit - self._tasks = MappingProxyType(self._progress._tasks) - - @classmethod - def _clean_task_desc(cls, desc: str) -> str: - return escape(truncate(desc.encode("ascii", "ignore").decode().strip(), length=40)) - - def __rich__(self) -> Panel: - return self.get_renderable() - - def get_renderable(self) -> Panel: - return Panel( - Group(self._progress, self._overflow.progress), - title=self.title, - border_style="green", - padding=(1, 1), - ) - - def add_task(self, description: str, total: float | None = None) -> TaskID: - task_id = self._progress.add_task( - self.desc_fmt.format(color=_COLOR, description=description), - total=total, - visible=len(self._tasks) < self._limit, - ) - self.redraw() - return task_id - - def remove_task(self, task_id: TaskID) -> None: - self._progress.remove_task(task_id) - self.redraw() - - def redraw(self) -> None: - self._overflow.update(count=len(self._tasks) - self._limit) - - -class ScrapingPanel(UIPanel): - title: ClassVar[str] = "Scraping" - type_str: ClassVar[str] = "URLs" - - def __init__(self) -> None: - progress = Progress(SpinnerColumn(), "[progress.description]{task.description}") - super().__init__(progress, visible_tasks_limit=5) - - def new_task(self, url: URL) -> TaskID: # type: ignore[reportIncompatibleMethodOverride] - return self.add_task(str(url)) - - -class DownloadsPanel(UIPanel): - title: ClassVar[str] = "Downloads" - _base_columns = (SpinnerColumn(), "[progress.description]{task.description}", BarColumn(bar_width=None)) - _horizontal = ( - *_base_columns, - "[progress.percentage]{task.percentage:>6.2f}%", - "━", - DownloadColumn(), - "━", - TransferSpeedColumn(), - "━", - TimeRemainingColumn(), - ) - _vertical = (*_base_columns, DownloadColumn(), "━", TransferSpeedColumn()) - - def __init__(self) -> None: - self.total_data_written: int = 0 - progress = Progress(*self._vertical) if True else Progress(*self._horizontal) - super().__init__(progress, visible_tasks_limit=10) - - def new_task(self, *, domain: str, filename: str, expected_size: int | None = None) -> TaskID: # type: ignore[reportIncompatibleMethodOverride] - description = self._clean_task_desc(filename.split("/")[-1]) - if not True: - description = f"({domain.upper()}) {description}" - - return super().add_task(description, expected_size) - - def advance_file(self, task_id: TaskID, amount: int) -> None: - self.total_data_written += amount - self._progress.advance(task_id, amount) - - def get_speed(self, task_id: TaskID) -> float: - task = self._tasks[task_id] - return task.finished_speed or task.speed or 0 From 4ba1deceb9e7089f710feaff36304bdda2e5877d Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Mon, 23 Feb 2026 14:12:09 -0500 Subject: [PATCH 11/23] refactor: update jdownloader --- cyberdrop_dl/clients/jdownloader.py | 112 ++++++++++++++++++++++++++ cyberdrop_dl/exceptions.py | 14 ++-- cyberdrop_dl/scraper/filters.py | 46 ----------- cyberdrop_dl/scraper/jdownloader.py | 98 ---------------------- cyberdrop_dl/scraper/scrape_mapper.py | 99 +++++++++-------------- 5 files changed, 157 insertions(+), 212 deletions(-) create mode 100644 cyberdrop_dl/clients/jdownloader.py delete mode 100644 cyberdrop_dl/scraper/filters.py delete mode 100644 cyberdrop_dl/scraper/jdownloader.py diff --git a/cyberdrop_dl/clients/jdownloader.py b/cyberdrop_dl/clients/jdownloader.py new file mode 100644 index 000000000..4b01513bd --- /dev/null +++ b/cyberdrop_dl/clients/jdownloader.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +import asyncio +import dataclasses +import logging +from typing import TYPE_CHECKING, Self + +from myjdapi import myjdapi + +from cyberdrop_dl.exceptions import JDownloaderError + +if TYPE_CHECKING: + from pathlib import Path + + from myjdapi.myjdapi import Jddevice + + from cyberdrop_dl.config import Config + from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL + + +logger = logging.getLogger(__name__) + + +@dataclasses.dataclass(slots=True) +class JDownloaderConfig: + enabled: bool + username: str + password: str + device: str + download_dir: Path + autostart: bool + + @staticmethod + def from_config(config: Config) -> JDownloaderConfig: + download_dir = config.runtime_options.jdownloader_download_dir or config.files.download_folder + return JDownloaderConfig( + enabled=config.runtime_options.send_unsupported_to_jdownloader, + device=config.auth.jdownloader.device, + username=config.auth.jdownloader.username, + password=config.auth.jdownloader.password, + download_dir=download_dir.resolve(), + autostart=config.runtime_options.jdownloader_autostart, + ) + + +@dataclasses.dataclass(slots=True) +class JDownloader: + """Class that handles connecting and sending links to JDownloader.""" + + config: JDownloaderConfig + _enabled: bool = dataclasses.field(init=False) + _device: Jddevice | None = dataclasses.field(default=None, init=False) + + @classmethod + def new(cls, options: Config | JDownloaderConfig, /) -> Self: + if not isinstance(options, JDownloaderConfig): + options = JDownloaderConfig.from_config(options) + return cls(options) + + def __post_init__(self): + self._enabled = self.config.enabled + + async def _connect(self) -> None: + if not all((self.config.username, self.config.password, self.config.device)): + raise JDownloaderError("JDownloader credentials were not provided.") + + api = myjdapi.Myjdapi() + api.set_app_key("CYBERDROP-DL") + await asyncio.to_thread(api.connect, self.config.username, self.config.password) + self._device = api.get_device(self.config.device) + + async def connect(self) -> None: + if not self._enabled or self._device is not None: + return + try: + return await self._connect() + except JDownloaderError as e: + msg = e.message + except myjdapi.MYJDDeviceNotFoundException: + msg = f"Device not found ({self.config.device})" + except myjdapi.MYJDApiException as e: + msg = e + + logger.error(f"Failed to connect to jDownloader: {msg}") + self._enabled = False + + async def send( + self, + url: AbsoluteHttpURL, + title: str, + download_path: Path | None = None, + ) -> None: + assert self._device is not None + try: + download_folder = self.config.download_dir + if download_path: + download_folder = download_folder / download_path + + await asyncio.to_thread( + self._device.linkgrabber.add_links, + [ + { + "autostart": self.config.autostart, + "links": str(url), + "packageName": title if title else "Cyberdrop-DL", + "destinationFolder": str(download_folder), + "overwritePackagizerRules": True, + }, + ], + ) + except (AssertionError, myjdapi.MYJDException) as e: + raise JDownloaderError(str(e)) from e diff --git a/cyberdrop_dl/exceptions.py b/cyberdrop_dl/exceptions.py index 06fb0fb22..493871b3f 100644 --- a/cyberdrop_dl/exceptions.py +++ b/cyberdrop_dl/exceptions.py @@ -5,12 +5,9 @@ from pathlib import Path from typing import TYPE_CHECKING -from yaml import YAMLError -from yarl import URL - -from cyberdrop_dl.constants import VALIDATION_ERROR_FOOTER - if TYPE_CHECKING: + from yarl import URL + from cyberdrop_dl.data_structures.url_objects import MediaItem, ScrapeItem @@ -211,6 +208,8 @@ class JDownloaderError(CDLBaseError): class InvalidYamlError(CDLBaseError): def __init__(self, file: Path, e: Exception) -> None: """This error will be thrown when a yaml config file has invalid values.""" + from yaml import YAMLError + file_path = file.resolve() ui_failure = "Invalid YAML" msg = f"Unable to read file '{file_path}'" @@ -221,8 +220,7 @@ def __init__(self, file: Path, e: Exception) -> None: msg += f"\n\nThe error was found in this line: \n {mark}" problem = getattr(e, "problem", str(e)) - msg += f"\n\n{problem.capitalize()}" - msg += f"\n\n{VALIDATION_ERROR_FOOTER}" + msg += f"\n\n{problem.capitalize()}\n\nPlease delete the file or fix the errors" super().__init__(ui_failure, message=msg, origin=file) @@ -244,6 +242,8 @@ def create_error_msg(error: int | str) -> str: def get_origin(origin: ScrapeItem | Path | MediaItem | URL | None = None) -> Path | URL | None: + from yarl import URL + if origin and not isinstance(origin, URL | Path): return origin.parents[0] if origin.parents else None return origin diff --git a/cyberdrop_dl/scraper/filters.py b/cyberdrop_dl/scraper/filters.py deleted file mode 100644 index 7c6dc8993..000000000 --- a/cyberdrop_dl/scraper/filters.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import annotations - -import datetime -from typing import TYPE_CHECKING - -from yarl import URL - -from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL - -if TYPE_CHECKING: - from collections.abc import Sequence - - from cyberdrop_dl.data_structures.url_objects import ScrapeItem - - -def is_valid_url(scrape_item: ScrapeItem) -> bool: - if not scrape_item.url: - return False - if not isinstance(scrape_item.url, URL): - try: - scrape_item.url = AbsoluteHttpURL(scrape_item.url) - except AttributeError: - return False - try: - if not scrape_item.url.host: - return False - except AttributeError: - return False - - return True - - -def is_outside_date_range(scrape_item: ScrapeItem, before: datetime.date | None, after: datetime.date | None) -> bool: - skip = False - item_date = scrape_item.completed_at or scrape_item.created_at - if not item_date: - return False - date = datetime.datetime.fromtimestamp(item_date).date() - if (after and date < after) or (before and date > before): - skip = True - - return skip - - -def is_in_domain_list(scrape_item: ScrapeItem, domain_list: Sequence[str]) -> bool: - return any(domain in scrape_item.url.host for domain in domain_list) diff --git a/cyberdrop_dl/scraper/jdownloader.py b/cyberdrop_dl/scraper/jdownloader.py deleted file mode 100644 index fff9fe0a9..000000000 --- a/cyberdrop_dl/scraper/jdownloader.py +++ /dev/null @@ -1,98 +0,0 @@ -from __future__ import annotations - -import dataclasses -from typing import TYPE_CHECKING - -from myjdapi import myjdapi - -from cyberdrop_dl.exceptions import JDownloaderError -from cyberdrop_dl.utils.logger import log - -if TYPE_CHECKING: - from pathlib import Path - - from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL - from cyberdrop_dl.managers import Manager - - -@dataclasses.dataclass(slots=True) -class JDownloaderConfig: - enabled: bool - username: str - password: str - device: str - download_dir: Path - autostart: bool - - @staticmethod - def from_manager(manager: Manager) -> JDownloaderConfig: - download_dir = manager.config.runtime_options.jdownloader_download_dir or manager.path_manager.download_folder - return JDownloaderConfig( - enabled=manager.config.runtime_options.send_unsupported_to_jdownloader, - device=manager.auth_config.jdownloader.device, - username=manager.auth_config.jdownloader.username, - password=manager.auth_config.jdownloader.password, - download_dir=download_dir.resolve(), - autostart=manager.config.runtime_options.jdownloader_autostart, - ) - - -class JDownloader: - """Class that handles connecting and passing links to JDownloader.""" - - def __init__(self, options: Manager | JDownloaderConfig, /) -> None: - if isinstance(options, JDownloaderConfig): - self._config = options - else: - self._config = JDownloaderConfig.from_manager(options) - self.enabled = self._config.enabled - self._agent = None - - def _connect(self) -> None: - if not all((self._config.username, self._config.password, self._config.device)): - raise JDownloaderError("JDownloader credentials were not provided.") - jd = myjdapi.Myjdapi() - jd.set_app_key("CYBERDROP-DL") - jd.connect(self._config.username, self._config.password) - self._agent = jd.get_device(self._config.device) - - def connect(self) -> None: - if not self.enabled or self._agent is not None: - return - try: - return self._connect() - except JDownloaderError as e: - msg = e.message - except myjdapi.MYJDDeviceNotFoundException: - msg = f"Device not found ({self._config.device})" - except myjdapi.MYJDApiException as e: - msg = e - - log(f"Failed to connect to jDownloader: {msg}", 40) - self.enabled = False - - def direct_unsupported_to_jdownloader( - self, - url: AbsoluteHttpURL, - title: str, - relative_download_path: Path | None = None, - ) -> None: - """Sends links to JDownloader.""" - try: - assert self._agent is not None - download_folder = self._config.download_dir - if relative_download_path: - download_folder = download_folder / relative_download_path - self._agent.linkgrabber.add_links( - [ - { - "autostart": self._config.autostart, - "links": str(url), - "packageName": title if title else "Cyberdrop-DL", - "destinationFolder": str(download_folder), - "overwritePackagizerRules": True, - }, - ], - ) - except (AssertionError, myjdapi.MYJDException) as e: - raise JDownloaderError(str(e)) from e diff --git a/cyberdrop_dl/scraper/scrape_mapper.py b/cyberdrop_dl/scraper/scrape_mapper.py index 8a6a09fe6..5a2ef5107 100644 --- a/cyberdrop_dl/scraper/scrape_mapper.py +++ b/cyberdrop_dl/scraper/scrape_mapper.py @@ -2,15 +2,15 @@ import asyncio import contextlib +import datetime import re -from datetime import date, datetime from pathlib import Path from typing import TYPE_CHECKING, Literal, Self import aiofiles -from yarl import URL from cyberdrop_dl import config +from cyberdrop_dl.clients.jdownloader import JDownloader from cyberdrop_dl.constants import REGEX_LINKS, BlockedDomains from cyberdrop_dl.crawlers._chevereto import CheveretoCrawler from cyberdrop_dl.crawlers.crawler import Crawler, create_crawlers @@ -20,17 +20,15 @@ from cyberdrop_dl.crawlers.wordpress import WordPressHTMLCrawler, WordPressMediaCrawler from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, ScrapeItem from cyberdrop_dl.exceptions import JDownloaderError, NoExtensionError -from cyberdrop_dl.scraper.filters import is_in_domain_list, is_outside_date_range, is_valid_url -from cyberdrop_dl.scraper.jdownloader import JDownloader from cyberdrop_dl.utils.logger import log, log_spacer from cyberdrop_dl.utils.utilities import get_download_path, remove_trailing_slash if TYPE_CHECKING: - from collections.abc import AsyncGenerator, Generator + from collections.abc import AsyncGenerator, Generator, Sequence import aiosqlite - from cyberdrop_dl.config.global_model import GenericCrawlerInstances, GlobalSettings + from cyberdrop_dl.config.settings import GenericCrawlerInstances from cyberdrop_dl.crawlers import Crawler from cyberdrop_dl.managers import Manager @@ -39,6 +37,22 @@ _crawlers_disabled_at_runtime: set[str] = set() +def is_outside_date_range(scrape_item: ScrapeItem, before: datetime.date | None, after: datetime.date | None) -> bool: + skip = False + item_date = scrape_item.completed_at or scrape_item.created_at + if not item_date: + return False + date = datetime.datetime.fromtimestamp(item_date).date() + if (after and date < after) or (before and date > before): + skip = True + + return skip + + +def is_in_domain_list(scrape_item: ScrapeItem, domain_list: Sequence[str]) -> bool: + return any(domain in scrape_item.url.host for domain in domain_list) + + class ScrapeMapper: """This class maps links to their respective handlers, or JDownloader if they are unsupported.""" @@ -46,7 +60,7 @@ def __init__(self, manager: Manager) -> None: self.manager = manager self.existing_crawlers: dict[str, Crawler] = {} self.direct_crawler = DirectHttpFile(self.manager) - self.jdownloader = JDownloader(self.manager) + self.jdownloader = JDownloader.new(config.get()) self.jdownloader_whitelist = config.get().runtime_options.jdownloader_whitelist self.using_input_file = False self.groups = set() @@ -59,23 +73,15 @@ def __init__(self, manager: Manager) -> None: def group_count(self) -> int: return len(self.groups) - @property - def global_settings(self) -> GlobalSettings: - return config.get() - - @property - def enable_generic_crawler(self) -> bool: - return self.global_settings.general.enable_generic_crawler - def start_scrapers(self) -> None: """Starts all scrapers.""" from cyberdrop_dl import plugins self.existing_crawlers = get_crawlers_mapping(self.manager) - generic_crawlers = create_generic_crawlers_by_config(self.global_settings.generic_crawlers_instances) + generic_crawlers = create_generic_crawlers_by_config(config.get().generic_crawlers_instances) for crawler in generic_crawlers: register_crawler(self.existing_crawlers, crawler(self.manager), from_user=True) - disable_crawlers_by_config(self.existing_crawlers, self.global_settings.general.disable_crawlers) + disable_crawlers_by_config(self.existing_crawlers, config.get().general.disable_crawlers) plugins.load(self.manager) async def start_real_debrid(self) -> None: @@ -100,32 +106,20 @@ async def run(self) -> None: """Starts the orchestra.""" self.start_scrapers() await self.manager.db_manager.history_table.update_previously_unsupported(self.existing_crawlers) - self.jdownloader.connect() + await self.jdownloader.connect() await self.start_real_debrid() self.direct_crawler._init_downloader() async for item in self.get_input_items(): self.manager.task_group.create_task(self.send_to_crawler(item)) async def get_input_items(self) -> AsyncGenerator[ScrapeItem]: - item_limit = 0 - if self.manager.parsed_args.cli_only_args.retry_any and self.manager.parsed_args.cli_only_args.max_items_retry: - item_limit = self.manager.parsed_args.cli_only_args.max_items_retry - - if self.manager.parsed_args.cli_only_args.retry_failed: - items_generator = self.load_failed_links() - elif self.manager.parsed_args.cli_only_args.retry_all: - items_generator = self.load_all_links() - elif self.manager.parsed_args.cli_only_args.retry_maintenance: - items_generator = self.load_all_bunkr_failed_links_via_hash() - else: - items_generator = self.load_links() + items_generator = self.load_links(self.manager.path_manager.input_file) + children_limits = config.get().download_options.maximum_number_of_children async for item in items_generator: await self.manager.states.RUNNING.wait() - item.children_limits = config.get().download_options.maximum_number_of_children - if self.filter_items(item): - if item_limit and self.count >= item_limit: - break + item.children_limits = children_limits + if self.should_scrape(item): yield item self.count += 1 @@ -159,10 +153,10 @@ async def parse_input_file_groups(self) -> AsyncGenerator[tuple[str, list[Absolu # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`` - async def load_links(self) -> AsyncGenerator[ScrapeItem]: + async def load_links(self, source: list[AbsoluteHttpURL] | Path) -> AsyncGenerator[ScrapeItem]: """Loads links from args / input file.""" - if not self.manager.parsed_args.cli_only_args.links: + if isinstance(source, Path): self.using_input_file = True async for group_name, urls in self.parse_input_file_groups(): for url in urls: @@ -176,7 +170,7 @@ async def load_links(self) -> AsyncGenerator[ScrapeItem]: return - for url in self.manager.parsed_args.cli_only_args.links: + for url in source: yield ScrapeItem(url=url) async def load_failed_links(self) -> AsyncGenerator[ScrapeItem]: @@ -185,14 +179,6 @@ async def load_failed_links(self) -> AsyncGenerator[ScrapeItem]: for row in rows: yield _create_item_from_row(row) - async def load_all_links(self) -> AsyncGenerator[ScrapeItem]: - """Loads all links from database.""" - after = self.manager.parsed_args.cli_only_args.completed_after or date.min - before = self.manager.parsed_args.cli_only_args.completed_before or datetime.now().date() - async for rows in self.manager.db_manager.history_table.get_all_items(after, before): - for row in rows: - yield _create_item_from_row(row) - async def load_all_bunkr_failed_links_via_hash(self) -> AsyncGenerator[ScrapeItem]: """Loads all bunkr links with maintenance hash.""" async for rows in self.manager.db_manager.history_table.get_all_bunkr_failed(): @@ -203,9 +189,7 @@ async def load_all_bunkr_failed_links_via_hash(self) -> AsyncGenerator[ScrapeIte async def filter_and_send_to_crawler(self, scrape_item: ScrapeItem) -> None: """Send scrape_item to a supported crawler.""" - if not isinstance(scrape_item.url, URL): - scrape_item.url = AbsoluteHttpURL(scrape_item.url) - if self.filter_items(scrape_item): + if self.should_scrape(scrape_item): await self.send_to_crawler(scrape_item) async def send_to_crawler(self, scrape_item: ScrapeItem) -> None: @@ -234,13 +218,13 @@ async def send_to_crawler(self, scrape_item: ScrapeItem) -> None: except (NoExtensionError, ValueError): pass - if self.jdownloader.enabled and jdownloader_whitelisted: + if self.jdownloader._enabled and jdownloader_whitelisted: log(f"Sending unsupported URL to JDownloader: {scrape_item.url}", 20) success = False try: download_folder = get_download_path(self.manager, scrape_item, "jdownloader") relative_download_dir = download_folder.relative_to(self.manager.path_manager.download_folder) - self.jdownloader.direct_unsupported_to_jdownloader( + self.jdownloader.send( scrape_item.url, scrape_item.parent_title, relative_download_dir, @@ -262,13 +246,12 @@ async def send_to_crawler(self, scrape_item: ScrapeItem) -> None: ) self.manager.progress_manager.scrape_errors.add_unsupported() - def filter_items(self, scrape_item: ScrapeItem) -> bool: + def should_scrape(self, scrape_item: ScrapeItem) -> bool: """Pre-filter scrape items base on URL.""" - if not is_valid_url(scrape_item): - return False if scrape_item.url in _seen_urls: return False + _seen_urls.add(scrape_item.url) if ( @@ -278,12 +261,6 @@ def filter_items(self, scrape_item: ScrapeItem) -> bool: log(f"Skipping {scrape_item.url} as it is a blocked domain", 10) return False - before = self.manager.parsed_args.cli_only_args.completed_before - after = self.manager.parsed_args.cli_only_args.completed_after - if is_outside_date_range(scrape_item, before, after): - log(f"Skipping {scrape_item.url} as it is outside of the desired date range", 10) - return False - skip_hosts = config.get().ignore_options.skip_hosts if skip_hosts and is_in_domain_list(scrape_item, skip_hosts): log(f"Skipping URL by skip_hosts config: {scrape_item.url}", 10) @@ -343,9 +320,9 @@ def _create_item_from_row(row: aiosqlite.Row) -> ScrapeItem: url = AbsoluteHttpURL(referer, encoded="%" in referer) item = ScrapeItem(url=url, retry_path=Path(row["download_path"]), part_of_album=True) if completed_at := row["completed_at"]: - item.completed_at = int(datetime.fromisoformat(completed_at).timestamp()) + item.completed_at = int(datetime.datetime.fromisoformat(completed_at).timestamp()) if created_at := row["created_at"]: - item.created_at = int(datetime.fromisoformat(created_at).timestamp()) + item.created_at = int(datetime.datetime.fromisoformat(created_at).timestamp()) return item From a9d71ccde8a2c6060c95507998a63f153cab1acf Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Mon, 23 Feb 2026 14:27:20 -0500 Subject: [PATCH 12/23] refactor: move cookies --- cyberdrop_dl/clients/scraper_client.py | 2 +- .../cookie_management.py => cookies.py} | 31 +++---------------- cyberdrop_dl/director.py | 2 +- cyberdrop_dl/managers/__init__.py | 2 +- cyberdrop_dl/managers/client_manager.py | 2 +- cyberdrop_dl/{scraper => }/scrape_mapper.py | 0 cyberdrop_dl/scraper/__init__.py | 0 cyberdrop_dl/ui/prompts/user_prompts.py | 2 +- tests/crawlers/test_crawlers.py | 2 +- tests/test_flaresolverr.py | 2 +- 10 files changed, 12 insertions(+), 33 deletions(-) rename cyberdrop_dl/{utils/cookie_management.py => cookies.py} (81%) rename cyberdrop_dl/{scraper => }/scrape_mapper.py (100%) delete mode 100644 cyberdrop_dl/scraper/__init__.py diff --git a/cyberdrop_dl/clients/scraper_client.py b/cyberdrop_dl/clients/scraper_client.py index eb9624700..607bb66ba 100644 --- a/cyberdrop_dl/clients/scraper_client.py +++ b/cyberdrop_dl/clients/scraper_client.py @@ -10,8 +10,8 @@ import cyberdrop_dl.constants as constants from cyberdrop_dl.clients.response import AbstractResponse +from cyberdrop_dl.cookies import make_simple_cookie from cyberdrop_dl.exceptions import DDOSGuardError -from cyberdrop_dl.utils.cookie_management import make_simple_cookie from cyberdrop_dl.utils.utilities import sanitize_filename if TYPE_CHECKING: diff --git a/cyberdrop_dl/utils/cookie_management.py b/cyberdrop_dl/cookies.py similarity index 81% rename from cyberdrop_dl/utils/cookie_management.py rename to cyberdrop_dl/cookies.py index c7a5e6e08..66c831197 100644 --- a/cyberdrop_dl/utils/cookie_management.py +++ b/cyberdrop_dl/cookies.py @@ -9,7 +9,6 @@ from textwrap import dedent from typing import TYPE_CHECKING, NamedTuple, ParamSpec, TypeVar -from cyberdrop_dl import config from cyberdrop_dl.dependencies import browser_cookie3 from cyberdrop_dl.utils.logger import log @@ -64,30 +63,13 @@ def wrapper(*args, **kwargs) -> R: @cookie_wrapper -def get_cookies_from_browsers(manager: Manager, *, browser: BROWSERS, domains: list[str] | None = None) -> set[str]: - """Extract cookies from browsers. - - :param browsers: list of browsers to extract from. If `None`, config `browser_cookies.browsers` will be used - :param domains: list of domains to filter cookies. If `None`, config `browser_cookies.sites` will be used - :return: A set with all the domains that actually had cookies - :raises BrowserCookieError: If there's any error while extracting cookies""" - from cyberdrop_dl.supported_domains import SUPPORTED_FORUMS, SUPPORTED_SITES_DOMAINS, SUPPORTED_WEBSITES - +def get_cookies_from_browsers(manager: Manager, *, browser: BROWSERS, domains: list[str]) -> set[str]: if domains == []: msg = "No domains selected" raise ValueError(msg) extractor_name = browser.lower() - domains_to_extract: list[str] = domains or config.get().browser_cookies.sites - if "all" in domains_to_extract: - domains_to_extract.remove("all") - domains_to_extract.extend(SUPPORTED_SITES_DOMAINS) - elif "all_forums" in domains_to_extract: - domains_to_extract.remove("all_forums") - domains_to_extract.extend(SUPPORTED_FORUMS.values()) - elif "all_file_hosts" in domains_to_extract: - domains_to_extract.remove("all_file_hosts") - domains_to_extract.extend(SUPPORTED_WEBSITES.values()) + domains_to_extract: list[str] = domains extracted_cookies = extract_cookies(extractor_name) if not extracted_cookies: @@ -122,22 +104,19 @@ def clear_cookies(manager: Manager, domains: list[str]) -> None: def extract_cookies(extractor_name: str) -> CookieJar: - def is_decrypt_error(msg: str) -> bool: - return "Unable to get key for cookie decryption" in msg - extractor = next(extractor for extractor in COOKIE_EXTRACTORS if extractor.name == extractor_name) try: return extractor.extract() except browser_cookie3.BrowserCookieError as e: msg = str(e) - if is_decrypt_error(msg) and extractor.name in CHROMIUM_BROWSERS and os.name == "nt": + if "Unable to get key for cookie decryption" in msg and extractor.name in CHROMIUM_BROWSERS and os.name == "nt": msg = f"Cookie extraction from {extractor.name.capitalize()} is not supported on Windows - {msg}" raise UnsupportedBrowserError(msg) from None raise async def read_netscape_files(cookie_files: list[Path]) -> AsyncIterable[tuple[str, SimpleCookie]]: - now = time.time() + now = int(time.time()) domains_seen = set() cookie_jars = await asyncio.gather(*(_read_netscape_file(file) for file in cookie_files)) for file, cookie_jar in zip(cookie_files, cookie_jars, strict=True): @@ -155,7 +134,7 @@ async def read_netscape_files(cookie_files: list[Path]) -> AsyncIterable[tuple[s if simplified_domain in domains_seen: log(f"Previous cookies for domain {simplified_domain} detected. They will be overwritten", 30) - if (simplified_domain not in expired_cookies_domains) and cookie.is_expired(now): # type: ignore + if (simplified_domain not in expired_cookies_domains) and cookie.is_expired(now): expired_cookies_domains.add(simplified_domain) log(f"Cookies for {simplified_domain} are expired", 30) diff --git a/cyberdrop_dl/director.py b/cyberdrop_dl/director.py index 6a76c99b8..031d91c66 100644 --- a/cyberdrop_dl/director.py +++ b/cyberdrop_dl/director.py @@ -13,7 +13,7 @@ from cyberdrop_dl import config, constants, env from cyberdrop_dl.dependencies import browser_cookie3 from cyberdrop_dl.managers import Manager -from cyberdrop_dl.scraper.scrape_mapper import ScrapeMapper +from cyberdrop_dl.scrape_mapper import ScrapeMapper from cyberdrop_dl.utils.apprise import send_apprise_notifications from cyberdrop_dl.utils.logger import ( LogHandler, diff --git a/cyberdrop_dl/managers/__init__.py b/cyberdrop_dl/managers/__init__.py index a5a5eb238..123c7142e 100644 --- a/cyberdrop_dl/managers/__init__.py +++ b/cyberdrop_dl/managers/__init__.py @@ -24,7 +24,7 @@ if TYPE_CHECKING: from asyncio import TaskGroup - from cyberdrop_dl.scraper.scrape_mapper import ScrapeMapper + from cyberdrop_dl.scrape_mapper import ScrapeMapper class AsyncioEvents(NamedTuple): diff --git a/cyberdrop_dl/managers/client_manager.py b/cyberdrop_dl/managers/client_manager.py index b15ac4063..bdadcf601 100644 --- a/cyberdrop_dl/managers/client_manager.py +++ b/cyberdrop_dl/managers/client_manager.py @@ -19,12 +19,12 @@ from cyberdrop_dl.clients.flaresolverr import FlareSolverr from cyberdrop_dl.clients.response import AbstractResponse from cyberdrop_dl.clients.scraper_client import ScraperClient +from cyberdrop_dl.cookies import read_netscape_files from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, MediaItem from cyberdrop_dl.exceptions import DDOSGuardError, DownloadError, ScrapeError, TooManyCrawlerErrors from cyberdrop_dl.managers import Manager from cyberdrop_dl.ui.prompts.user_prompts import get_cookies_from_browsers from cyberdrop_dl.utils.aio import WeakAsyncLocks -from cyberdrop_dl.utils.cookie_management import read_netscape_files from cyberdrop_dl.utils.ffmpeg import probe from cyberdrop_dl.utils.logger import log, log_debug, log_spacer diff --git a/cyberdrop_dl/scraper/scrape_mapper.py b/cyberdrop_dl/scrape_mapper.py similarity index 100% rename from cyberdrop_dl/scraper/scrape_mapper.py rename to cyberdrop_dl/scrape_mapper.py diff --git a/cyberdrop_dl/scraper/__init__.py b/cyberdrop_dl/scraper/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/cyberdrop_dl/ui/prompts/user_prompts.py b/cyberdrop_dl/ui/prompts/user_prompts.py index 32820c920..865a54e5c 100644 --- a/cyberdrop_dl/ui/prompts/user_prompts.py +++ b/cyberdrop_dl/ui/prompts/user_prompts.py @@ -16,9 +16,9 @@ from cyberdrop_dl import __version__, config from cyberdrop_dl.constants import BROWSERS, RESERVED_CONFIG_NAMES +from cyberdrop_dl.cookies import get_cookies_from_browsers from cyberdrop_dl.ui.prompts import basic_prompts from cyberdrop_dl.ui.prompts.defaults import ALL_CHOICE, DONE_CHOICE, EXIT_CHOICE -from cyberdrop_dl.utils.cookie_management import get_cookies_from_browsers from cyberdrop_dl.utils.utilities import clear_term if TYPE_CHECKING: diff --git a/tests/crawlers/test_crawlers.py b/tests/crawlers/test_crawlers.py index 587ddd3a3..244a0f939 100644 --- a/tests/crawlers/test_crawlers.py +++ b/tests/crawlers/test_crawlers.py @@ -14,7 +14,7 @@ from cyberdrop_dl.data_structures import AbsoluteHttpURL from cyberdrop_dl.data_structures.url_objects import MediaItem, ScrapeItem -from cyberdrop_dl.scraper.scrape_mapper import ScrapeMapper +from cyberdrop_dl.scrape_mapper import ScrapeMapper from cyberdrop_dl.utils.utilities import parse_url if TYPE_CHECKING: diff --git a/tests/test_flaresolverr.py b/tests/test_flaresolverr.py index 59d67959b..261222159 100644 --- a/tests/test_flaresolverr.py +++ b/tests/test_flaresolverr.py @@ -5,7 +5,7 @@ from cyberdrop_dl.clients.flaresolverr import FlareSolverr, _Command from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL from cyberdrop_dl.managers import Manager -from cyberdrop_dl.scraper.scrape_mapper import ScrapeMapper +from cyberdrop_dl.scrape_mapper import ScrapeMapper ENV_NAME = "CDL_FLARESOLVERR" FLARESOLVER_URL = os.environ.get(ENV_NAME, "") # or "http://localhost:8191" From 348e8afead2486cb85a69a20f3d638d1c22b9b40 Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Mon, 23 Feb 2026 14:55:32 -0500 Subject: [PATCH 13/23] refactor: update file formats --- cyberdrop_dl/clients/download_client.py | 7 +- cyberdrop_dl/constants.py | 149 +++++++++++++----------- cyberdrop_dl/crawlers/bunkrr.py | 13 ++- cyberdrop_dl/crawlers/crawler.py | 2 +- cyberdrop_dl/crawlers/http_direct.py | 7 +- cyberdrop_dl/managers/client_manager.py | 54 +++------ cyberdrop_dl/utils/sorting.py | 7 +- cyberdrop_dl/utils/utilities.py | 10 +- 8 files changed, 112 insertions(+), 137 deletions(-) diff --git a/cyberdrop_dl/clients/download_client.py b/cyberdrop_dl/clients/download_client.py index 606fd8fbf..ef27a12f4 100644 --- a/cyberdrop_dl/clients/download_client.py +++ b/cyberdrop_dl/clients/download_client.py @@ -12,7 +12,6 @@ from cyberdrop_dl import config, constants from cyberdrop_dl.clients.response import AbstractResponse -from cyberdrop_dl.constants import FILE_FORMATS from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL from cyberdrop_dl.exceptions import DDOSGuardError, DownloadError, InvalidContentTypeError, SlowDownloadError from cyberdrop_dl.utils import aio, dates @@ -484,9 +483,9 @@ def check_filesize_limits(self, media: MediaItem) -> bool: min_other_filesize = file_size_limits.minimum_other_size assert media.filesize is not None - if media.ext in FILE_FORMATS["Images"]: + if media.ext in constants.FileFormats.IMAGE: proceed = min_image_filesize < media.filesize < max_image_filesize - elif media.ext in FILE_FORMATS["Videos"]: + elif media.ext in constants.FileFormats.VIDEO: proceed = min_video_filesize < media.filesize < max_video_filesize else: proceed = min_other_filesize < media.filesize < max_other_filesize @@ -509,7 +508,7 @@ def get_content_type(ext: str, headers: Mapping[str, str]) -> str | None: content_type = override or content_type content_type = content_type.lower() - if is_html_or_text(content_type) and ext.lower() not in FILE_FORMATS["Text"]: + if is_html_or_text(content_type) and ext.lower() not in constants.FileFormats.TEXT: msg = f"Received '{content_type}', was expecting other" raise InvalidContentTypeError(message=msg) diff --git a/cyberdrop_dl/constants.py b/cyberdrop_dl/constants.py index 7f5af2c3f..2a8fb601d 100644 --- a/cyberdrop_dl/constants.py +++ b/cyberdrop_dl/constants.py @@ -124,74 +124,81 @@ class NotificationResult(Enum): NONE = Text("No Notifications Sent", "yellow") -# file formats -FILE_FORMATS = { - "Images": { - ".gif", - ".gifv", - ".heic", - ".jfif", - ".jif", - ".jpe", - ".jpeg", - ".jpg", - ".jxl", - ".png", - ".svg", - ".tif", - ".tiff", - ".webp", - }, - "Videos": { - ".3gp", - ".avchd", - ".avi", - ".f4v", - ".flv", - ".m2ts", - ".m4p", - ".m4v", - ".mkv", - ".mov", - ".mp2", - ".mp4", - ".mpe", - ".mpeg", - ".mpg", - ".mpv", - ".mts", - ".ogg", - ".ogv", - ".qt", - ".swf", - ".ts", - ".webm", - ".wmv", - }, - "Audio": { - ".flac", - ".m4a", - ".mka", - ".mp3", - ".wav", - }, - "Text": { - ".htm", - ".html", - ".md", - ".nfo", - ".txt", - ".vtt", - ".sub", - }, - "7z": { - ".7z", - ".bz2", - ".gz", - ".tar", - ".zip", - }, -} - - -MEDIA_EXTENSIONS = FILE_FORMATS["Audio"] | FILE_FORMATS["Videos"] | FILE_FORMATS["Images"] +class FileFormats: + IMAGE = frozenset( + { + ".gif", + ".gifv", + ".heic", + ".jfif", + ".jif", + ".jpe", + ".jpeg", + ".jpg", + ".jxl", + ".png", + ".svg", + ".tif", + ".tiff", + ".webp", + } + ) + VIDEO = frozenset( + { + ".3gp", + ".avchd", + ".avi", + ".f4v", + ".flv", + ".m2ts", + ".m4p", + ".m4v", + ".mkv", + ".mov", + ".mp2", + ".mp4", + ".mpe", + ".mpeg", + ".mpg", + ".mpv", + ".mts", + ".ogg", + ".ogv", + ".qt", + ".swf", + ".ts", + ".webm", + ".wmv", + } + ) + AUDIO = frozenset( + { + ".flac", + ".m4a", + ".mka", + ".mp3", + ".wav", + } + ) + TEXT = frozenset( + { + ".htm", + ".html", + ".md", + ".nfo", + ".txt", + ".vtt", + ".sub", + } + ) + _7Z = frozenset( + { + ".7z", + ".bz2", + ".gz", + ".tar", + ".zip", + } + ) + VIDEO_OR_IMAGE = VIDEO | IMAGE + MEDIA = AUDIO | VIDEO_OR_IMAGE diff --git a/cyberdrop_dl/crawlers/bunkrr.py b/cyberdrop_dl/crawlers/bunkrr.py index a147b25bc..8df41cd75 100644 --- a/cyberdrop_dl/crawlers/bunkrr.py +++ b/cyberdrop_dl/crawlers/bunkrr.py @@ -10,7 +10,7 @@ from aiohttp import ClientConnectorError -from cyberdrop_dl.constants import FILE_FORMATS +from cyberdrop_dl import constants from cyberdrop_dl.crawlers.crawler import Crawler, RateLimit, SupportedPaths, auto_task_id from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL from cyberdrop_dl.exceptions import DDOSGuardError @@ -35,7 +35,6 @@ class Selector: IMAGE_PREVIEW = "img.max-h-full.w-auto.object-cover.relative" -VIDEO_AND_IMAGE_EXTS: set[str] = FILE_FORMATS["Images"] | FILE_FORMATS["Videos"] HOST_OPTIONS: set[str] = {"bunkr.site", "bunkr.cr", "bunkr.ph"} DEEP_SCRAPE_CDNS: set[str] = {"burger", "milkshake"} # CDNs under maintanance, ignore them and try to get a cached URL FILE_KEYS = "id", "name", "original", "slug", "type", "extension", "size", "timestamp", "thumbnail", "cdnEndpoint" @@ -100,7 +99,7 @@ def src(self) -> AbsoluteHttpURL: src_str = self.thumbnail.replace("/thumbs/", "/") ext = Path(self.name).suffix src = parse_url(src_str).with_suffix(ext).with_query(None) - if src.suffix.lower() not in FILE_FORMATS["Images"]: + if src.suffix.lower() not in constants.FileFormats.IMAGE: src = src.with_host(src.host.replace("i-", "")) return _override_cdn(src) @@ -173,10 +172,11 @@ async def _album_file(self, scrape_item: ScrapeItem, file: File, results: dict[s try: src = file.src() except ValueError: - deep_scrape = True + src = None - deep_scrape = deep_scrape or ( - src.suffix.lower() not in VIDEO_AND_IMAGE_EXTS + deep_scrape = ( + not src + or src.suffix.lower() not in constants.FileFormats.VIDEO_OR_IMAGE or "no-image" in src.name or self.deep_scrape or any(cdn in src.host for cdn in DEEP_SCRAPE_CDNS) @@ -185,6 +185,7 @@ async def _album_file(self, scrape_item: ScrapeItem, file: File, results: dict[s self.create_task(self.run(scrape_item)) return + assert src if self.check_album_results(src, results): return diff --git a/cyberdrop_dl/crawlers/crawler.py b/cyberdrop_dl/crawlers/crawler.py index 75d2bf0a6..59da74ab3 100644 --- a/cyberdrop_dl/crawlers/crawler.py +++ b/cyberdrop_dl/crawlers/crawler.py @@ -421,7 +421,7 @@ async def handle_file( if custom_filename: original_filename, filename = filename, custom_filename elif self.DOMAIN in ["cyberdrop"]: - original_filename, filename = remove_file_id(self.manager, filename, ext) + original_filename, filename = remove_file_id(filename, ext) else: original_filename = filename diff --git a/cyberdrop_dl/crawlers/http_direct.py b/cyberdrop_dl/crawlers/http_direct.py index 08f703e0b..d7444db53 100644 --- a/cyberdrop_dl/crawlers/http_direct.py +++ b/cyberdrop_dl/crawlers/http_direct.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, ClassVar -from cyberdrop_dl.constants import FILE_FORMATS +from cyberdrop_dl import constants from cyberdrop_dl.crawlers.crawler import Crawler from cyberdrop_dl.exceptions import NoExtensionError from cyberdrop_dl.utils.utilities import get_filename_and_ext @@ -11,9 +11,6 @@ from cyberdrop_dl.data_structures.url_objects import ScrapeItem -MEDIA_EXTENSIONS = FILE_FORMATS["Images"] | FILE_FORMATS["Videos"] | FILE_FORMATS["Audio"] - - class DirectHttpFile(Crawler, is_generic=True): DOMAIN: ClassVar[str] = "no_crawler" @@ -23,7 +20,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: except NoExtensionError: filename, ext = get_filename_and_ext(scrape_item.url.name, forum=True) - if ext not in MEDIA_EXTENSIONS: + if ext not in constants.FileFormats.MEDIA: raise ValueError scrape_item.add_to_parent_title("Loose Files") diff --git a/cyberdrop_dl/managers/client_manager.py b/cyberdrop_dl/managers/client_manager.py index bdadcf601..a7d3b6631 100644 --- a/cyberdrop_dl/managers/client_manager.py +++ b/cyberdrop_dl/managers/client_manager.py @@ -19,19 +19,14 @@ from cyberdrop_dl.clients.flaresolverr import FlareSolverr from cyberdrop_dl.clients.response import AbstractResponse from cyberdrop_dl.clients.scraper_client import ScraperClient -from cyberdrop_dl.cookies import read_netscape_files +from cyberdrop_dl.cookies import get_cookies_from_browsers, read_netscape_files from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, MediaItem from cyberdrop_dl.exceptions import DDOSGuardError, DownloadError, ScrapeError, TooManyCrawlerErrors from cyberdrop_dl.managers import Manager -from cyberdrop_dl.ui.prompts.user_prompts import get_cookies_from_browsers from cyberdrop_dl.utils.aio import WeakAsyncLocks from cyberdrop_dl.utils.ffmpeg import probe from cyberdrop_dl.utils.logger import log, log_debug, log_spacer -_VALID_EXTENSIONS = ( - constants.FILE_FORMATS["Images"] | constants.FILE_FORMATS["Videos"] | constants.FILE_FORMATS["Audio"] -) - if TYPE_CHECKING: from asyncio.locks import Semaphore from collections.abc import Callable, Generator, Iterable, Mapping @@ -68,7 +63,7 @@ class DownloadSpeedLimiter(AsyncLimiter): - __slots__ = (*AsyncLimiter.__slots__, "chunk_size") + __slots__ = ("chunk_size",) def __init__(self, speed_limit: int) -> None: self.chunk_size: int = 1024 * 1024 * 10 # 10MB @@ -84,33 +79,7 @@ async def acquire(self, amount: float | None = None) -> None: await super().acquire(amount) def __repr__(self): - return f"{self.__class__.__name__}(speed_limit={self.max_rate}, chunk_size={self.chunk_size})" - - -class DDosGuard: - TITLES = ("Just a moment...", "DDoS-Guard") - SELECTORS = ( - "#cf-challenge-running", - ".ray_id", - ".attack-box", - "#cf-please-wait", - "#challenge-spinner", - "#trk_jschal_js", - "#turnstile-wrapper", - ".lds-ring", - ) - ALL_SELECTORS = ", ".join(SELECTORS) - - -class CloudflareTurnstile: - TITLES = ("Simpcity Cuck Detection", "Attention Required! | Cloudflare", "Sentinel CAPTCHA") - SELECTORS = ( - "captchawrapper", - "cf-turnstile", - "script[src*='challenges.cloudflare.com/turnstile']", - "script:-soup-contains('Dont open Developer Tools')", - ) - ALL_SELECTORS = ", ".join(SELECTORS) + return f"{type(self).__name__}(speed_limit={self.max_rate!r}, chunk_size={self.chunk_size!r})" def _create_ssl(): @@ -208,14 +177,16 @@ def basic_auth(username: str, password: str) -> str: def check_allowed_filetype(self, media_item: MediaItem) -> bool: """Checks if the file type is allowed to download.""" ignore_options = config.get().ignore_options + ext = media_item.ext.lower() - if media_item.ext.lower() in constants.FILE_FORMATS["Images"] and ignore_options.exclude_images: + if ext in constants.FileFormats.IMAGE and ignore_options.exclude_images: return False - if media_item.ext.lower() in constants.FILE_FORMATS["Videos"] and ignore_options.exclude_videos: + if ext in constants.FileFormats.VIDEO and ignore_options.exclude_videos: return False - if media_item.ext.lower() in constants.FILE_FORMATS["Audio"] and ignore_options.exclude_audio: + if ext in constants.FileFormats.AUDIO and ignore_options.exclude_audio: return False - return not (ignore_options.exclude_other and media_item.ext.lower() not in _VALID_EXTENSIONS) + + return ext in constants.FileFormats.MEDIA or not ignore_options.exclude_other def check_allowed_date_range(self, media_item: MediaItem) -> bool: """Checks if the file was uploaded within the config date range""" @@ -244,7 +215,7 @@ def filter_cookies_by_word_in_domain(self, word: str) -> Iterable[tuple[str, Bas async def startup(self) -> None: await _set_dns_resolver() - def new_curl_cffi_session(self) -> AsyncSession: + def new_curl_cffi_session(self) -> AsyncSession[CurlResponse]: # Calling code should have validated if curl is actually available import warnings @@ -327,6 +298,7 @@ async def load_cookie_files(self) -> None: if config.get().browser_cookies.auto_import: assert config.get().browser_cookies.browser get_cookies_from_browsers(self.manager, browser=config.get().browser_cookies.browser) + cookie_files = sorted(self.manager.path_manager.cookies_dir.glob("*.txt")) if not cookie_files: return @@ -399,8 +371,8 @@ async def check_file_duration(self, media_item: MediaItem) -> bool: if media_item.is_segment: return True - is_video = media_item.ext.lower() in constants.FILE_FORMATS["Videos"] - is_audio = media_item.ext.lower() in constants.FILE_FORMATS["Audio"] + is_video = media_item.ext.lower() in constants.FileFormats.VIDEO + is_audio = media_item.ext.lower() in constants.FileFormats.AUDIO if not (is_video or is_audio): return True diff --git a/cyberdrop_dl/utils/sorting.py b/cyberdrop_dl/utils/sorting.py index fa8635afd..277f06c73 100644 --- a/cyberdrop_dl/utils/sorting.py +++ b/cyberdrop_dl/utils/sorting.py @@ -10,7 +10,6 @@ import imagesize from cyberdrop_dl import config, constants -from cyberdrop_dl.constants import FILE_FORMATS from cyberdrop_dl.utils import strings from cyberdrop_dl.utils.ffmpeg import probe from cyberdrop_dl.utils.logger import log, log_with_color @@ -97,11 +96,11 @@ async def _sort_files(self, files_to_sort: dict[str, list[Path]]) -> None: if ext in constants.TempExt: continue - if ext in FILE_FORMATS["Audio"]: + if ext in constants.FileFormats.AUDIO: await self.sort_audio(file, folder_name) - elif ext in FILE_FORMATS["Images"]: + elif ext in constants.FileFormats.IMAGE: await self.sort_image(file, folder_name) - elif ext in FILE_FORMATS["Videos"]: + elif ext in constants.FileFormats.VIDEO: await self.sort_video(file, folder_name) else: await self.sort_other(file, folder_name) diff --git a/cyberdrop_dl/utils/utilities.py b/cyberdrop_dl/utils/utilities.py index 435a85928..a26f92702 100644 --- a/cyberdrop_dl/utils/utilities.py +++ b/cyberdrop_dl/utils/utilities.py @@ -261,7 +261,7 @@ def get_download_path(manager: Manager, scrape_item: ScrapeItem, domain: str) -> return download_dir / scrape_item.create_download_path(domain) -def remove_file_id(manager: Manager, filename: str, ext: str) -> tuple[str, str]: +def remove_file_id(filename: str, ext: str) -> tuple[str, str]: """Removes the additional string some websites adds to the end of every filename.""" original_filename = filename if not config.get().download_options.remove_generated_id_from_filenames: @@ -275,7 +275,7 @@ def remove_file_id(manager: Manager, filename: str, ext: str) -> tuple[str, str] if re.match(constants.RAR_MULTIPART_PATTERN, tail_no_dot) and ext == ".rar" and "-" in filename: filename, part = filename.rsplit("-", 1) filename = f"{filename}.{part}" - elif ext_no_dot.isdigit() and tail in constants.FILE_FORMATS["7z"] and "-" in filename: + elif ext_no_dot.isdigit() and tail in constants.FileFormats._7Z and "-" in filename: filename, _7z_ext = filename.rsplit("-", 1) filename = f"{filename}.{_7z_ext}" if not filename.endswith(ext): @@ -286,11 +286,11 @@ def remove_file_id(manager: Manager, filename: str, ext: str) -> tuple[str, str] """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" -def clear_term(): - os.system("cls" if os.name == "nt" else "clear") +def clear_term() -> None: + _ = os.system("cls" if os.name == "nt" else "clear") -def get_size(path: os.DirEntry) -> int | None: +def get_size(path: os.DirEntry[str]) -> int | None: try: return path.stat(follow_symlinks=False).st_size except (OSError, ValueError): From 4c99283e219dcec5c1c58aa7c28af2c137ef39fb Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Mon, 23 Feb 2026 15:16:08 -0500 Subject: [PATCH 14/23] refactor: logging updates --- cyberdrop_dl/compat.py | 6 +++--- cyberdrop_dl/constants.py | 29 ++++++++++------------------- cyberdrop_dl/env.py | 5 +---- cyberdrop_dl/utils/logger.py | 34 +++++++++++++++++++++++----------- 4 files changed, 37 insertions(+), 37 deletions(-) diff --git a/cyberdrop_dl/compat.py b/cyberdrop_dl/compat.py index ca3f4772c..bf44c00dc 100644 --- a/cyberdrop_dl/compat.py +++ b/cyberdrop_dl/compat.py @@ -44,8 +44,8 @@ class StrEnum(enum.StrEnum, metaclass=_ContainerEnumType): ... class MayBeUpperStrEnum(StrEnum): @classmethod - def _missing_(cls, value: object): + def _missing_(cls, value: object) -> MayBeUpperStrEnum | None: try: return cls[str(value).upper()] - except KeyError as e: - raise e + except KeyError: + return None diff --git a/cyberdrop_dl/constants.py b/cyberdrop_dl/constants.py index 2a8fb601d..f2571c6ff 100644 --- a/cyberdrop_dl/constants.py +++ b/cyberdrop_dl/constants.py @@ -1,26 +1,28 @@ +from __future__ import annotations + import re from dataclasses import field from datetime import UTC, datetime from enum import auto from pathlib import Path -from typing import TYPE_CHECKING, Any, Final +from typing import TYPE_CHECKING, Final -from aiohttp.resolver import AsyncResolver, ThreadedResolver from rich.text import Text from cyberdrop_dl import env -from cyberdrop_dl.compat import Enum, StrEnum +from cyberdrop_dl.compat import Enum, MayBeUpperStrEnum, StrEnum if TYPE_CHECKING: + from aiohttp.resolver import AsyncResolver, ThreadedResolver + from cyberdrop_dl.utils.logger import LogHandler # TIME STARTUP_TIME = datetime.now() -STARTUP_TIME_UTC = datetime.now(UTC) +STARTUP_TIME_UTC = STARTUP_TIME.astimezone(UTC) LOGS_DATETIME_FORMAT = "%Y%m%d_%H%M%S" LOGS_DATE_FORMAT = "%Y_%m_%d" STARTUP_TIME_STR = STARTUP_TIME.strftime(LOGS_DATETIME_FORMAT) -STARTUP_TIME_UTC_STR = STARTUP_TIME_UTC.strftime(LOGS_DATETIME_FORMAT) DNS_RESOLVER: type[AsyncResolver] | type[ThreadedResolver] | None = None MAX_REDIRECTS: Final[int] = 8 @@ -28,16 +30,9 @@ # logging CONSOLE_LEVEL = 100 MAX_NAME_LENGTHS = {"FILE": 95, "FOLDER": 60} -DEFAULT_CONSOLE_WIDTH = 240 CSV_DELIMITER = "," LOG_OUTPUT_TEXT = Text("") -RICH_HANDLER_CONFIG: dict[str, Any] = {"rich_tracebacks": True, "tracebacks_show_locals": False} -RICH_HANDLER_DEBUG_CONFIG = RICH_HANDLER_CONFIG | { - "tracebacks_show_locals": True, - "locals_max_string": DEFAULT_CONSOLE_WIDTH, - "tracebacks_extra_lines": 2, - "locals_max_length": 20, -} + VALIDATION_ERROR_FOOTER = """Please delete the file or fix the errors""" @@ -48,7 +43,7 @@ HTTP_REGEX_LINKS = re.compile( r"https?://(www\.)?[-a-zA-Z0-9@:%._+~#=]{2,256}\.[a-z]{2,12}\b([-a-zA-Z0-9@:%_+.~#?&/=]*)" ) -console_handler: "LogHandler" +console_handler: LogHandler class TempExt(StrEnum): @@ -94,15 +89,11 @@ class HashType(StrEnum): xxh128 = "xxh128" -class Hashing(StrEnum): +class Hashing(MayBeUpperStrEnum): OFF = auto() IN_PLACE = auto() POST_DOWNLOAD = auto() - @classmethod - def _missing_(cls, value: object) -> "Hashing": - return cls[str(value).upper()] - class BROWSERS(StrEnum): chrome = auto() diff --git a/cyberdrop_dl/env.py b/cyberdrop_dl/env.py index 2eeed1476..ee54532bb 100644 --- a/cyberdrop_dl/env.py +++ b/cyberdrop_dl/env.py @@ -1,5 +1,4 @@ import os -from hashlib import sha256 os.environ["PYDANTIC_ERRORS_INCLUDE_URL"] = "0" RUNNING_IN_IDE = bool(os.getenv("PYCHARM_HOSTED") or os.getenv("TERM_PROGRAM") == "vscode") @@ -7,9 +6,7 @@ os.getenv("TERMUX_VERSION") or os.getenv("TERMUX_MAIN_PACKAGE_FORMAT") or "com.termux" in os.getenv("$PREFIX", "") ) PORTRAIT_MODE = bool(RUNNING_IN_TERMUX or os.getenv("CDL_PORTRAIT_MODE")) -ENABLE_DEBUG_CRAWLERS = os.getenv("CDL_ENABLE_DEBUG_CRAWLERS") -if ENABLE_DEBUG_CRAWLERS: - ENABLE_DEBUG_CRAWLERS = sha256(ENABLE_DEBUG_CRAWLERS.encode("utf-8")).hexdigest() + DEBUG_LOG_FOLDER = os.getenv("CDL_DEBUG_LOG_FOLDER") PROFILING = os.getenv("CDL_PROFILING") diff --git a/cyberdrop_dl/utils/logger.py b/cyberdrop_dl/utils/logger.py index 51908eed1..b24fa0664 100644 --- a/cyberdrop_dl/utils/logger.py +++ b/cyberdrop_dl/utils/logger.py @@ -12,7 +12,6 @@ from rich._log_render import LogRender from rich.console import Console, Group -from rich.containers import Lines, Renderables from rich.logging import RichHandler from rich.measure import Measurement from rich.padding import Padding @@ -36,6 +35,7 @@ from datetime import datetime from rich.console import ConsoleRenderable + from rich.containers import Lines from cyberdrop_dl.managers import Manager @@ -57,7 +57,7 @@ def getMessage(self) -> str: # noqa: N802 msg = str(self._proccess_msg(self.msg)) if self.args: - args = map(self._proccess_msg, self.args) + args = tuple(map(self._proccess_msg, self.args)) try: return msg.format(*args) except Exception: @@ -75,12 +75,14 @@ def _proccess_msg(msg: object) -> object: logging.setLogRecordFactory(JsonLogRecord) +DEFAULT_CONSOLE_WIDTH = 240 + class LogHandler(RichHandler): """Rich Handler with default settings, automatic console creation and custom log render to remove padding in files.""" def __init__( - self, level: int = 10, file: IO[str] | None = None, width: int | None = None, debug: bool = False, **kwargs + self, level: int = 10, file: IO[str] | None = None, width: int | None = None, debug: bool = False ) -> None: is_file: bool = file is not None redacted: bool = is_file and not debug @@ -89,9 +91,17 @@ def __init__( console = _DEFAULT_CONSOLE else: console = console_cls(file=file, width=width) - options = constants.RICH_HANDLER_DEBUG_CONFIG if debug else constants.RICH_HANDLER_CONFIG - options = options | kwargs - super().__init__(level, console, show_time=is_file, **options) + + super().__init__( + level, + console, + show_time=is_file, + rich_tracebacks=True, + tracebacks_show_locals=True, + locals_max_string=DEFAULT_CONSOLE_WIDTH, + tracebacks_extra_lines=2, + locals_max_length=20, + ) if is_file: self._log_render = NoPaddingLogRender(show_level=True) @@ -136,7 +146,7 @@ class NoPaddingLogRender(LogRender): cdl_padding: int = 0 EXCLUDE_PATH_LOGGING_FROM: tuple[str, ...] = "logger.py", "base.py", "session.py", "cache_control.py" - def __call__( # type: ignore[reportIncompatibleMethodOverride] + def __call__( # type: ignore[reportIncompatibleMethodOverride] # pyright: ignore[reportIncompatibleMethodOverride] self, console: Console, renderables: Iterable[ConsoleRenderable], @@ -163,6 +173,7 @@ def __call__( # type: ignore[reportIncompatibleMethodOverride] output.append(log_time_display) output.pad_right(1) self._last_time = log_time_display + if self.show_level: output.append(level) output.pad_right(1) @@ -184,12 +195,13 @@ def __call__( # type: ignore[reportIncompatibleMethodOverride] padded_lines: list[ConsoleRenderable] = [] - for renderable in Renderables(renderables): # type: ignore + for renderable in renderables: if isinstance(renderable, Text): renderable = _indent_text(renderable, console, self.cdl_padding) renderable.stylize("log.message") - output.append(renderable) + _ = output.append(renderable) continue + padded_lines.append(Padding(renderable, (0, 0, 0, self.cdl_padding), expand=False)) return Group(output, *padded_lines) @@ -244,7 +256,7 @@ def log_with_color(message: Text | str, style: str, level: int = 20, show_in_sta def log_spacer(level: int, char: str = "-", *, log_to_console: bool = True, log_to_file: bool = True) -> None: - spacer = char * min(int(constants.DEFAULT_CONSOLE_WIDTH / 2), 50) + spacer = char * min(int(DEFAULT_CONSOLE_WIDTH / 2), 50) if log_to_file: log(spacer, level) if log_to_console and constants.CONSOLE_LEVEL >= 50: @@ -280,7 +292,7 @@ def _setup_startup_logger() -> Generator[None]: file_handler = LogHandler( level=10, file=file.open("w", encoding="utf8"), - width=constants.DEFAULT_CONSOLE_WIDTH, + width=DEFAULT_CONSOLE_WIDTH, ) startup_logger.addHandler(file_handler) except OSError: From 160776ab6542cf3ad7e13f2c8081f5b27601b708 Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Mon, 23 Feb 2026 15:36:43 -0500 Subject: [PATCH 15/23] refactor: delete UI --- cyberdrop_dl/config/__init__.py | 9 +- cyberdrop_dl/config/settings.py | 24 +- cyberdrop_dl/director.py | 39 +-- cyberdrop_dl/ui/__init__.py | 0 cyberdrop_dl/ui/prompts/__init__.py | 0 cyberdrop_dl/ui/prompts/basic_prompts.py | 111 -------- cyberdrop_dl/ui/prompts/defaults.py | 7 - cyberdrop_dl/ui/prompts/user_prompts.py | 309 ----------------------- 8 files changed, 29 insertions(+), 470 deletions(-) delete mode 100644 cyberdrop_dl/ui/__init__.py delete mode 100644 cyberdrop_dl/ui/prompts/__init__.py delete mode 100644 cyberdrop_dl/ui/prompts/basic_prompts.py delete mode 100644 cyberdrop_dl/ui/prompts/defaults.py delete mode 100644 cyberdrop_dl/ui/prompts/user_prompts.py diff --git a/cyberdrop_dl/config/__init__.py b/cyberdrop_dl/config/__init__.py index 5f524ab11..477391978 100755 --- a/cyberdrop_dl/config/__init__.py +++ b/cyberdrop_dl/config/__init__.py @@ -1,5 +1,6 @@ from __future__ import annotations +import datetime from contextvars import ContextVar, Token from pathlib import Path from typing import Self @@ -61,8 +62,12 @@ def save(self, file: Path) -> None: yaml.save(file, self) def resolve_paths(self) -> None: - if not self._resolved: - self._resolve_paths(self) + if self._resolved: + return + self._resolve_paths(self) + now = datetime.datetime.now() + self.logs.set_output_filenames(now) + self.logs.delete_old_logs_and_folders(now) self._resolved = True @classmethod diff --git a/cyberdrop_dl/config/settings.py b/cyberdrop_dl/config/settings.py index bf459a9e7..325870952 100755 --- a/cyberdrop_dl/config/settings.py +++ b/cyberdrop_dl/config/settings.py @@ -1,5 +1,4 @@ # ruff: noqa: RUF012 -import itertools import random import re from datetime import date, datetime, timedelta @@ -115,7 +114,7 @@ def parse_logs_duration(input_date: timedelta | str | int | None) -> timedelta | if value := falsy_as(input_date, None): return to_timedelta(value) - def _set_output_filenames(self, now: datetime) -> None: + def set_output_filenames(self, now: datetime) -> None: self.log_folder.mkdir(exist_ok=True, parents=True) current_time_file_iso: str = now.strftime(constants.LOGS_DATETIME_FORMAT) current_time_folder_iso: str = now.strftime(constants.LOGS_DATE_FORMAT) @@ -130,17 +129,22 @@ def _set_output_filenames(self, now: datetime) -> None: log_file.parent.mkdir(exist_ok=True, parents=True) - def _delete_old_logs_and_folders(self, now: datetime | None = None) -> None: + def delete_old_logs_and_folders(self, now: datetime | None = None) -> None: + if not (now and self.logs_expire_after): + return + from cyberdrop_dl.utils.utilities import purge_dir_tree - if now and self.logs_expire_after: - for file in itertools.chain(self.log_folder.rglob("*.log"), self.log_folder.rglob("*.csv")): - file_date = file.stat().st_ctime - t_delta = now - datetime.fromtimestamp(file_date) - if t_delta > self.logs_expire_after: - file.unlink(missing_ok=True) + for file in self.log_folder.rglob("*"): + if file.suffix not in (".log", ".csv"): + continue + + file_date = file.stat().st_ctime + t_delta = now - datetime.fromtimestamp(file_date) + if t_delta > self.logs_expire_after: + file.unlink(missing_ok=True) - purge_dir_tree(self.log_folder) + _ = purge_dir_tree(self.log_folder) class FileSizeLimits(SettingsGroup): diff --git a/cyberdrop_dl/director.py b/cyberdrop_dl/director.py index 031d91c66..ba24060c7 100644 --- a/cyberdrop_dl/director.py +++ b/cyberdrop_dl/director.py @@ -15,13 +15,7 @@ from cyberdrop_dl.managers import Manager from cyberdrop_dl.scrape_mapper import ScrapeMapper from cyberdrop_dl.utils.apprise import send_apprise_notifications -from cyberdrop_dl.utils.logger import ( - LogHandler, - QueuedLogger, - log, - log_spacer, - log_with_color, -) +from cyberdrop_dl.utils.logger import LogHandler, QueuedLogger, log, log_spacer, log_with_color from cyberdrop_dl.utils.sorting import Sorter from cyberdrop_dl.utils.updates import check_latest_pypi from cyberdrop_dl.utils.utilities import check_partials_and_empty_folders @@ -123,7 +117,7 @@ async def _post_runtime(manager: Manager) -> None: await manager.hash_manager.hash_client.cleanup_dupes_after_download() - if config.get().sorting.sort_downloads and not manager.parsed_args.cli_only_args.retry_any: + if config.get().sorting.sort_downloads: sorter = Sorter(manager) await sorter.run() @@ -167,31 +161,14 @@ def _setup_debug_logger(manager: Manager) -> Path | None: def _setup_main_logger(manager: Manager) -> None: logger = logging.getLogger("cyberdrop_dl") - file_io = manager.path_manager.main_log.open("w", encoding="utf8") - settings_data = config.get() - log_level = settings_data.runtime_options.log_level + file_io = config.get().logs.main_log.open("w", encoding="utf8") + log_level = config.get().runtime_options.log_level logger.setLevel(log_level) - if not manager.parsed_args.cli_only_args.fullscreen_ui: - constants.CONSOLE_LEVEL = settings_data.runtime_options.console_log_level - constants.console_handler = LogHandler(level=constants.CONSOLE_LEVEL) - logger.addHandler(constants.console_handler) - file_handler = LogHandler(level=log_level, file=file_io, width=500) - queued_logger = QueuedLogger(manager, file_handler) - logger.addHandler(queued_logger.handler) - - -def _setup_manager(args: Sequence[str] | None = None) -> Manager: - """Starts the program and returns the manager. - - This will also run the UI for the program - After this function returns, the manager will be ready to use and scraping / downloading can begin. - """ - - manager = Manager() - - return manager + logger.addHandler( + QueuedLogger(manager, LogHandler(level=log_level, file=file_io, width=500)).handler, + ) def _loop_factory() -> asyncio.AbstractEventLoop: @@ -205,7 +182,7 @@ class Director: """Creates a manager and runs it""" def __init__(self, args: Sequence[str] | None = None) -> None: - self.manager = _setup_manager(args) + self.manager: Manager = Manager() def run(self) -> int: return self._run() diff --git a/cyberdrop_dl/ui/__init__.py b/cyberdrop_dl/ui/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/cyberdrop_dl/ui/prompts/__init__.py b/cyberdrop_dl/ui/prompts/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/cyberdrop_dl/ui/prompts/basic_prompts.py b/cyberdrop_dl/ui/prompts/basic_prompts.py deleted file mode 100644 index 86462a157..000000000 --- a/cyberdrop_dl/ui/prompts/basic_prompts.py +++ /dev/null @@ -1,111 +0,0 @@ -# type: ignore[reportPrivateImportUsage] -import sys -from pathlib import Path - -from InquirerPy import inquirer -from InquirerPy.base.control import Choice -from InquirerPy.separator import Separator -from InquirerPy.validator import EmptyInputValidator, PathValidator - -from cyberdrop_dl.ui.prompts.defaults import DEFAULT_OPTIONS, DONE_CHOICE - - -def ask_text(message: str, validate_empty: bool = True, **kwargs): - options = DEFAULT_OPTIONS | kwargs - return inquirer.text( - message=message, - validate=EmptyInputValidator("Input should not be empty") if validate_empty else None, - **options, - ).execute() - - -def ask_choice(choices: list[Choice], *, message: str = "What would you like to do:", **kwargs): - options = DEFAULT_OPTIONS | kwargs - return inquirer.select(message=message, choices=choices, **options).execute() - - -def ask_multi_choice(choices: list[Choice], *, message: str = "What would you like to do:", **kwargs): - return ask_choice(choices, message=message, multiselect=True, **kwargs) - - -def ask_checkbox(choices: list[Choice], *, message: str = "Select multiple options:", **kwargs): - options = DEFAULT_OPTIONS | {"long_instruction": "ARROW KEYS: Navigate | SPACE: Select | ENTER: Confirm"} | kwargs - return inquirer.checkbox(message=message, choices=choices, **options).execute() - - -def ask_choice_fuzzy(choices: list[Choice], message: str, validate_empty: bool = True, **kwargs): - options = ( - DEFAULT_OPTIONS - | {"long_instruction": "ARROW KEYS: Navigate | TYPE: Filter | TAB: select, ENTER: Finish Selection"} - | kwargs - ) - custom_validate = options.pop("validate", None) - validate = ( - EmptyInputValidator("Input should not be empty") - if validate_empty and custom_validate is None - else custom_validate - ) - return inquirer.fuzzy( - message=message, - choices=choices, - validate=validate, - **options, - ).execute() - - -def ask_path(message: str = "Select path", *, validator_options: dict | None = None, **kwargs) -> Path: - options = DEFAULT_OPTIONS | {"default": str(Path.home())} | kwargs - return Path( - inquirer.filepath(message=message, validate=PathValidator(**(validator_options or {})), **options).execute() - ) - - -def ask_file_path(message: str = "Select file path", **kwargs) -> Path: - options = DEFAULT_OPTIONS | kwargs - validator_options = {"is_file": True, "message": "Input is not a file"} - return ask_path(message, validator_options=validator_options, **options) - - -def ask_dir_path(message: str = "Select dir path", **kwargs) -> Path: - options = DEFAULT_OPTIONS | kwargs - validator_options = {"is_dir": True, "message": "Input is not a directory"} - return ask_path(message, validator_options=validator_options, **options) - - -def ask_toggle(message: str = "enable", **kwargs): - options = DEFAULT_OPTIONS | {"long_instruction": "Y: Yes | N: No"} | kwargs - return inquirer.confirm(message=message, **options).execute() - - -def enter_to_continue(message: str = "Press to continue", **kwargs): - if "pytest" in sys.modules: - return - options = DEFAULT_OPTIONS | {"long_instruction": "ENTER: continue"} | kwargs - msg = f"\n{message}" - return inquirer.confirm(message=msg, qmark="", **options).execute() - - -def create_choices( - options_groups: list[list[str]] | dict[str, list[list[str]]], - append_last: Choice = DONE_CHOICE, - *, - disabled_choices: list[str] | None = None, -): - if isinstance(options_groups, dict): - options_groups = list(options_groups.values()) - disabled_choices = disabled_choices or [] - options = [option for group in options_groups for option in group] - choices = [] - for index, option in enumerate(options, 1): - enabled = option not in disabled_choices - choices.append(Choice(index, option, enabled)) - choices.append(append_last) - - separator_indexes = [] - for group in options_groups: - separator_indexes.append(len(group) + (separator_indexes[-1] if separator_indexes else 0)) - - for count, index in enumerate(separator_indexes): - choices.insert(index + count, Separator()) - - return choices diff --git a/cyberdrop_dl/ui/prompts/defaults.py b/cyberdrop_dl/ui/prompts/defaults.py deleted file mode 100644 index bc5e4b993..000000000 --- a/cyberdrop_dl/ui/prompts/defaults.py +++ /dev/null @@ -1,7 +0,0 @@ -from InquirerPy.base.control import Choice - -EXIT_CHOICE = Choice("Exit") -DONE_CHOICE = Choice("Done") -ALL_CHOICE = Choice("All of the above") - -DEFAULT_OPTIONS = {"long_instruction": "ARROW KEYS: Navigate | ENTER: Select", "vi_mode": False} diff --git a/cyberdrop_dl/ui/prompts/user_prompts.py b/cyberdrop_dl/ui/prompts/user_prompts.py deleted file mode 100644 index 865a54e5c..000000000 --- a/cyberdrop_dl/ui/prompts/user_prompts.py +++ /dev/null @@ -1,309 +0,0 @@ -# type: ignore[reportPrivateImportUsage] -from __future__ import annotations - -import asyncio -from enum import IntEnum -from platform import system -from typing import TYPE_CHECKING - -from InquirerPy import get_style -from InquirerPy.base.control import Choice -from InquirerPy.enum import ( - INQUIRERPY_EMPTY_CIRCLE_SEQUENCE, - INQUIRERPY_FILL_CIRCLE_SEQUENCE, -) -from rich.console import Console - -from cyberdrop_dl import __version__, config -from cyberdrop_dl.constants import BROWSERS, RESERVED_CONFIG_NAMES -from cyberdrop_dl.cookies import get_cookies_from_browsers -from cyberdrop_dl.ui.prompts import basic_prompts -from cyberdrop_dl.ui.prompts.defaults import ALL_CHOICE, DONE_CHOICE, EXIT_CHOICE -from cyberdrop_dl.utils.utilities import clear_term - -if TYPE_CHECKING: - from pathlib import Path - - from yarl import URL - - from cyberdrop_dl.managers import Manager - -console = Console() - - -def main_prompt(manager: Manager) -> int: - """Main prompt for the program.""" - prompt_header(manager) - OPTIONS = { - "group_1": ["Download", "Retry failed downloads", "Create file hashes", "Sort files in download folder"], - "group_2": ["Edit URLs.txt", "Change config", "Edit configs"], - "group_3": ["Check for updates", "View changelog"], - } - - choices = basic_prompts.create_choices(OPTIONS, append_last=EXIT_CHOICE) - - return basic_prompts.ask_choice(choices) - - -""" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MANAGE CONFIG PROMPTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - -def manage_configs(manager: Manager) -> int: - """Manage Configs Prompt.""" - prompt_header(manager) - OPTIONS = { - "group_1": [ - "Change default config", - "Create a new config", - "Delete a config", - ], - "group_2": [ - "Edit current config", - "Edit authentication config", - "Edit global config", - ], - "group_3": ["Edit auto cookie extraction settings", "Import cookies now", "Clear cookies"], - "group_4": [ - "Clear cache", - ], - } - choices = basic_prompts.create_choices(OPTIONS) - return basic_prompts.ask_choice(choices) - - -def create_new_config(manager: Manager, *, title: str = "Create a new config file") -> str | None: - """Asks the user for a new config name. Returns `None` if the config name is invalid.""" - clear_term() - console.print(title) - answer: str = basic_prompts.ask_text("Enter the name of the config:") - return _check_valid_new_config_name(answer, manager) - - -def select_config(configs: list) -> str: - """Asks the user to select an existing config name.""" - return basic_prompts.ask_choice_fuzzy( - choices=configs, - message="Select a config file:", - validate_empty=True, - long_instruction="ARROW KEYS: Navigate | TYPE: Filter | TAB: select, ENTER: Finish Selection", - invalid_message="Need to select a config.", - ) - - -def switch_default_config_to(manager: Manager, config_name: str) -> str: - """Asks the user if they want to switch the default config to the provided config""" - if manager.config_manager.get_default_config() == config_name: - return - return basic_prompts.ask_toggle( - message=f"Do you want to switch the default config to {config_name}?", - ) - - -def switch_default_config() -> str: - """Asks the user if they want to switch the default config""" - return basic_prompts.ask_toggle( - message="Do you want to switch the default config?", - ) - - -def activate_config(manager: Manager, config) -> str: - """Asks the user if they want to activate the provided config""" - if manager.config_manager.get_loaded_config() == config: - return - return basic_prompts.ask_toggle(message=f"Do also want to activate the {config} config?") - - -def _check_valid_new_config_name(answer: str, manager: Manager) -> str | None: - """Check if the provided config name if. Returns `None` if the config name is invalid.""" - msg = None - if answer.casefold() in RESERVED_CONFIG_NAMES: - msg = f"[bold red]ERROR:[/bold red] Config name '{answer}' is a reserved internal name" - - elif manager.path_manager.config_folder.joinpath(answer).is_dir(): - msg = f"[bold red]ERROR:[/bold red] Config with name '{answer}' already exists!" - if msg: - console.print(msg) - basic_prompts.enter_to_continue() - return None - - return answer - - -""" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ AUTHENTICATION PROMPTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - -def auto_cookie_extraction(manager: Manager): - answer = basic_prompts.ask_toggle("Enable auto cookies import:") - config.get().browser_cookies.auto_import = answer - if answer: - extract_cookies(manager, dry_run=True) - manager.config_manager.write_updated_settings_config() - - -class DomainType(IntEnum): - WEBSITE = 0 - FORUM = 1 - - -def domains_prompt(*, domain_message: str = "Select site(s):") -> tuple[list[str], list[str]]: - """Asks the user to select website(s) for cookie actions and cache actions.""" - from cyberdrop_dl.supported_domains import SUPPORTED_FORUMS, SUPPORTED_SITES_DOMAINS, SUPPORTED_WEBSITES - - OPTIONS = [["Forum", "File Host"], ["All Supported Websites"]] - choices = basic_prompts.create_choices(OPTIONS) - domain_type = basic_prompts.ask_choice(choices, message="Select category:") - - if domain_type == DONE_CHOICE.value: - return [], [] - - if domain_type == 3: - return SUPPORTED_SITES_DOMAINS, SUPPORTED_SITES_DOMAINS - - all_domains = list(SUPPORTED_FORUMS.values() if domain_type == DomainType.FORUM else SUPPORTED_WEBSITES.values()) - domain_choices = [Choice(site) for site in all_domains] + [ALL_CHOICE] - - domains = basic_prompts.ask_choice_fuzzy( - choices=domain_choices, - message=domain_message, - validate_empty=True, - multiselect=True, - marker_pl=f" {INQUIRERPY_EMPTY_CIRCLE_SEQUENCE} ", - marker=f" {INQUIRERPY_FILL_CIRCLE_SEQUENCE} ", - style=get_style( - { - "marker": "#98c379", - "questionmark": "#e5c07b", - "pointer": "#61afef", - "long_instruction": "#abb2bf", - "fuzzy_prompt": "#c678dd", - "fuzzy_info": "#abb2bf", - "fuzzy_border": "#4b5263", - "fuzzy_match": "#c678dd", - } - ), - ) - if ALL_CHOICE.value in domains: - domains = all_domains - return domains, all_domains - - -def extract_cookies(manager: Manager, *, dry_run: bool = False) -> None: - """Asks the user to select browser(s) and domains(s) to import cookies from.""" - from cyberdrop_dl.supported_domains import SUPPORTED_FORUMS, SUPPORTED_SITES_DOMAINS, SUPPORTED_WEBSITES - - supported_forums, supported_websites = list(SUPPORTED_FORUMS.values()), list(SUPPORTED_WEBSITES.values()) - domains, all_domains = domains_prompt(domain_message="Select site(s) to import cookies from:") - if domains == []: - return - browser = BROWSERS(browser_prompt()) - - if dry_run: - config.get().browser_cookies.browser = browser - current_sites = set(config.get().browser_cookies.sites) - new_sites = current_sites - set(all_domains) - if domains == supported_forums: - new_sites -= {"all"} - new_sites.add("all_forums") - elif domains == supported_websites: - new_sites -= {"all"} - new_sites.add("all_file_hosts") - elif domains == SUPPORTED_SITES_DOMAINS: - new_sites -= {"all_forums", "all_file_hosts"} - new_sites.add("all") - else: - new_sites -= {"all", "all_forums", "all_file_hosts"} - new_sites.update(domains) - if "all_forums" in new_sites and "all_file_hosts" in new_sites: - new_sites -= {"all_forums", "all_file_hosts"} - new_sites.add("all") - config.get().browser_cookies.sites = sorted(new_sites) - return - - get_cookies_from_browsers(manager, browser=browser, domains=domains) - console.print("Import finished", style="green") - basic_prompts.enter_to_continue() - - -def browser_prompt() -> str: - """Asks the user to select browser(s) for cookie extraction.""" - unsupported_browsers = { - "Windows": { - "arc", - "brave", - "chrome", - "chromium", - "edge", - "lynx", - "opera", - "opera_gx", - "safari", - "vivaldi", - "w3m", - }, - "Linux": {"arc", "opera_gx", "safari"}, - "Darwin": {"lynx", "w3m"}, - }.get(system(), set()) - choices = [ - Choice(browser, browser.capitalize() if browser != "opera_gx" else "Opera GX") - for browser in BROWSERS - if browser not in unsupported_browsers - ] - return basic_prompts.ask_choice(choices, message="Select the browser(s) for extraction:") - - -""" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CACHE PROMPTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - -async def _get_urls(manager: Manager) -> set[URL]: - urls = set() - async for url in manager.cache_manager.request_cache.get_urls(): - urls.add(url) - return urls - - -def filter_cache_urls(manager: Manager, domains: list) -> set[URL]: - urls_to_remove = set() - cached_urls = asyncio.run(_get_urls(manager)) - cached_urls_copy = cached_urls.copy() - for domain in domains: - cached_urls = cached_urls_copy.copy() - cached_urls_copy = cached_urls.copy() - for url in cached_urls: - if url.host == domain: - urls_to_remove.add(url) - cached_urls_copy.remove(url) - return urls_to_remove - - -""" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ V4 IMPORT PROMPTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - -def import_cyberdrop_v4_items_prompt(manager: Manager) -> int: - """Import Cyberdrop_V4 Items.""" - prompt_header(manager) - OPTIONS = [["Import config", "Import download_history.sql"]] - choices = basic_prompts.create_choices(OPTIONS) - console.print("V4 Import Menu") - return basic_prompts.ask_choice(choices) - - -def import_v4_config_prompt(manager: Manager) -> tuple[str, Path] | None: - """Asks the user for the name and path of the config to import. Returns `None` if the config name is invalid.""" - new_config = create_new_config(manager, title="What should this config be called:") - if not new_config: - return None - return new_config, basic_prompts.ask_file_path("Select the config file to import:") - - -def import_v4_download_history_prompt() -> Path: - return basic_prompts.ask_file_path("Select the download_history.sql file to import:") - - -""" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ OTHERS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - -def prompt_header(manager: Manager, title: str | None = None) -> None: - clear_term() - title = title or f"[bold]Cyberdrop Downloader ([blue]V{__version__!s}[/blue])[/bold]" - console.print(title) - console.print(f"[bold]Current config:[/bold] [blue]{manager.config_manager.loaded_config}[/blue]") From 351f20cbef0c3485ad4124b4dca4600dfc0f25d2 Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Mon, 23 Feb 2026 17:32:13 -0500 Subject: [PATCH 16/23] refactor: update logs --- cyberdrop_dl/config/settings.py | 5 + cyberdrop_dl/constants.py | 1 - cyberdrop_dl/crawlers/_forum.py | 2 +- cyberdrop_dl/crawlers/crawler.py | 2 +- cyberdrop_dl/director.py | 5 +- cyberdrop_dl/downloader/downloader.py | 2 +- cyberdrop_dl/managers/__init__.py | 2 +- cyberdrop_dl/managers/config_manager.py | 2 +- cyberdrop_dl/managers/log_manager.py | 147 +++++++----------------- cyberdrop_dl/scrape_mapper.py | 10 +- cyberdrop_dl/utils/json.py | 14 +-- cyberdrop_dl/utils/utilities.py | 2 +- tests/conftest.py | 4 +- tests/crawlers/test_xenforo.py | 2 +- tests/test_manager.py | 2 +- 15 files changed, 64 insertions(+), 138 deletions(-) diff --git a/cyberdrop_dl/config/settings.py b/cyberdrop_dl/config/settings.py index 325870952..746b8a7a6 100755 --- a/cyberdrop_dl/config/settings.py +++ b/cyberdrop_dl/config/settings.py @@ -2,6 +2,7 @@ import random import re from datetime import date, datetime, timedelta +from functools import cached_property from logging import DEBUG from pathlib import Path from typing import Literal @@ -103,6 +104,10 @@ class Logs(SettingsGroup): unsupported_urls: LogPath = Path("Unsupported_URLs.csv") webhook: HttpAppriseURL | None = None + @cached_property + def jsonl_file(self): + return self.main_log.with_suffix(".results.jsonl") + @field_validator("webhook", mode="before") @classmethod def handle_falsy(cls, value: str) -> str | None: diff --git a/cyberdrop_dl/constants.py b/cyberdrop_dl/constants.py index f2571c6ff..953c5181b 100644 --- a/cyberdrop_dl/constants.py +++ b/cyberdrop_dl/constants.py @@ -30,7 +30,6 @@ # logging CONSOLE_LEVEL = 100 MAX_NAME_LENGTHS = {"FILE": 95, "FOLDER": 60} -CSV_DELIMITER = "," LOG_OUTPUT_TEXT = Text("") VALIDATION_ERROR_FOOTER = """Please delete the file or fix the errors""" diff --git a/cyberdrop_dl/crawlers/_forum.py b/cyberdrop_dl/crawlers/_forum.py index 4391e42a3..f595de7f2 100644 --- a/cyberdrop_dl/crawlers/_forum.py +++ b/cyberdrop_dl/crawlers/_forum.py @@ -327,7 +327,7 @@ async def handle_internal_link(self, scrape_item: ScrapeItem, link: AbsoluteHttp async def write_last_forum_post(self, thread_url: AbsoluteHttpURL, last_post_url: AbsoluteHttpURL | None) -> None: if not last_post_url or last_post_url == thread_url: return - self.manager.log_manager.write_last_post_log(last_post_url) + self.manager.logs.write_last_post_log(last_post_url) # TODO: Move this to the base crawler # TODO: Define an unified workflow for crawlers to perform and check login diff --git a/cyberdrop_dl/crawlers/crawler.py b/cyberdrop_dl/crawlers/crawler.py index 59da74ab3..37bfcbbbe 100644 --- a/cyberdrop_dl/crawlers/crawler.py +++ b/cyberdrop_dl/crawlers/crawler.py @@ -457,7 +457,7 @@ async def __write_to_jsonl(self, media_item: MediaItem) -> None: return data = [media_item.as_jsonable_dict()] - await self.manager.log_manager.write_jsonl(data) + await self.manager.logs.write_jsonl(data) async def check_complete(self, url: AbsoluteHttpURL, referer: AbsoluteHttpURL) -> bool: """Checks if this URL has been download before. diff --git a/cyberdrop_dl/director.py b/cyberdrop_dl/director.py index ba24060c7..4c17853ec 100644 --- a/cyberdrop_dl/director.py +++ b/cyberdrop_dl/director.py @@ -62,7 +62,7 @@ async def wrapper(*args, **kwargs) -> R | None: async def _run_manager(manager: Manager) -> None: """Runs the program and handles the UI.""" manager.path_manager.startup() - manager.log_manager.startup() + manager.logs.startup() debug_log_file_path = _setup_debug_logger(manager) start_time = manager.start_time _setup_main_logger(manager) @@ -123,9 +123,6 @@ async def _post_runtime(manager: Manager) -> None: check_partials_and_empty_folders(manager) - if config.get().runtime_options.update_last_forum_post: - await manager.log_manager.update_last_forum_post() - def _setup_debug_logger(manager: Manager) -> Path | None: if not env.DEBUG_VAR: diff --git a/cyberdrop_dl/downloader/downloader.py b/cyberdrop_dl/downloader/downloader.py index d38032137..df9fb2b1e 100644 --- a/cyberdrop_dl/downloader/downloader.py +++ b/cyberdrop_dl/downloader/downloader.py @@ -493,6 +493,6 @@ def write_download_error( self.attempt_task_removal(media_item) full_message = f"{self.log_prefix} Failed: {media_item.url} ({error_log_msg.main_log_msg}) \n -> Referer: {media_item.referer}" log(full_message, 40, exc_info=exc_info) - self.manager.log_manager.write_download_error_log(media_item, error_log_msg.csv_log_msg) + self.manager.logs.write_download_error_log(media_item, error_log_msg.csv_log_msg) self.manager.progress_manager.download_errors.add_failure(error_log_msg.ui_failure) self.manager.progress_manager.files.add_failed() diff --git a/cyberdrop_dl/managers/__init__.py b/cyberdrop_dl/managers/__init__.py index 123c7142e..6fee0961e 100644 --- a/cyberdrop_dl/managers/__init__.py +++ b/cyberdrop_dl/managers/__init__.py @@ -60,7 +60,7 @@ def __init__(self) -> None: self.config_manager.startup() self.path_manager.startup() - self.log_manager: LogManager = LogManager(self) + self.logs: LogManager = LogManager(config.get()) log_app_state() """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" diff --git a/cyberdrop_dl/managers/config_manager.py b/cyberdrop_dl/managers/config_manager.py index 7ace2bb6a..c82bc6cdb 100644 --- a/cyberdrop_dl/managers/config_manager.py +++ b/cyberdrop_dl/managers/config_manager.py @@ -184,7 +184,7 @@ def change_config(self, config_name: str) -> None: self.manager.path_manager.startup() sleep(1) - self.manager.log_manager = LogManager(self.manager) + self.manager.logs = LogManager(self.manager) sleep(1) def _set_apprise_fixed(self): diff --git a/cyberdrop_dl/managers/log_manager.py b/cyberdrop_dl/managers/log_manager.py index 41b5a67f0..3a0a9ff98 100644 --- a/cyberdrop_dl/managers/log_manager.py +++ b/cyberdrop_dl/managers/log_manager.py @@ -2,148 +2,83 @@ import asyncio import csv +import dataclasses from collections import defaultdict -from pathlib import Path from typing import TYPE_CHECKING, Any -from cyberdrop_dl.constants import CSV_DELIMITER from cyberdrop_dl.exceptions import get_origin from cyberdrop_dl.utils import json -from cyberdrop_dl.utils.logger import log, log_spacer if TYPE_CHECKING: from collections.abc import Iterable + from pathlib import Path from yarl import URL - from cyberdrop_dl.data_structures.url_objects import MediaItem - from cyberdrop_dl.managers import Manager + from cyberdrop_dl.config import Config + from cyberdrop_dl.data_structures.url_objects import MediaItem, ScrapeItem +_CSV_DELIMITER = "," +_file_locks: defaultdict[Path, asyncio.Lock] = defaultdict(asyncio.Lock) + + +@dataclasses.dataclass(slots=True, frozen=True) class LogManager: - def __init__(self, manager: Manager) -> None: - self.manager = manager - self.main_log: Path = manager.path_manager.main_log - self.last_post_log: Path = manager.path_manager.last_forum_post_log - self.unsupported_urls_log: Path = manager.path_manager.unsupported_urls_log - self.download_error_log: Path = manager.path_manager.download_error_urls_log - self.scrape_error_log: Path = manager.path_manager.scrape_error_urls_log - self.jsonl_file = self.main_log.with_suffix(".results.jsonl") - self._file_locks: dict[Path, asyncio.Lock] = defaultdict(asyncio.Lock) - self._has_headers: set[Path] = set() - - def startup(self) -> None: - """Startup process for the file manager.""" - for var in vars(self).values(): - if isinstance(var, Path): - var.unlink(missing_ok=True) - - async def write_jsonl(self, data: Iterable[dict[str, Any]]): - async with self._file_locks[self.jsonl_file]: - await json.dump_jsonl(data, self.jsonl_file) - - async def _write_to_csv(self, file: Path, **kwargs) -> None: + config: Config + task_group: asyncio.TaskGroup = dataclasses.field(default_factory=asyncio.TaskGroup, repr=False) + _has_headers: set[Path] = dataclasses.field(init=False, default_factory=set) + + async def write_jsonl(self, data: Iterable[dict[str, Any]]) -> None: + async with _file_locks[self.config.logs.jsonl_file]: + await asyncio.to_thread(json.dump_jsonl, data, self.config.logs.jsonl_file) + + async def _write_to_csv(self, file: Path, **kwargs: Any) -> None: """Write to the specified csv file. kwargs are columns for the CSV.""" - async with self._file_locks[file]: - write_headers = file not in self._has_headers + async with _file_locks[file]: + is_first_write = file not in self._has_headers self._has_headers.add(file) def write(): + if is_first_write: + file.parent.mkdir(parents=True, exist_ok=True) + file.unlink(missing_ok=True) + with file.open("a", encoding="utf8", newline="") as csv_file: writer = csv.DictWriter( - csv_file, fieldnames=kwargs.keys(), delimiter=CSV_DELIMITER, quoting=csv.QUOTE_ALL + csv_file, fieldnames=kwargs, delimiter=_CSV_DELIMITER, quoting=csv.QUOTE_ALL ) - if write_headers: + if is_first_write: writer.writeheader() writer.writerow(kwargs) await asyncio.to_thread(write) def write_last_post_log(self, url: URL) -> None: - """Writes to the last post log.""" - self.manager.task_group.create_task(self._write_to_csv(self.last_post_log, url=url)) + _ = self.task_group.create_task(self._write_to_csv(self.config.logs.last_forum_post, url=url)) - def write_unsupported_urls_log(self, url: URL, origin: URL | None = None) -> None: - """Writes to the unsupported urls log.""" - self.manager.task_group.create_task(self._write_to_csv(self.unsupported_urls_log, url=url, origin=origin)) + def write_unsupported(self, url: URL, origin: ScrapeItem | URL | None = None) -> None: + _ = self.task_group.create_task( + self._write_to_csv(self.config.logs.unsupported_urls, url=url, origin=get_origin(origin)) + ) def write_download_error_log(self, media_item: MediaItem, error_message: str) -> None: - """Writes to the download error log.""" - origin = get_origin(media_item) - self.manager.task_group.create_task( + _ = self.task_group.create_task( self._write_to_csv( - self.download_error_log, + self.config.logs.download_error_urls, url=media_item.url, error=error_message, referer=media_item.referer, - origin=origin, + origin=get_origin(media_item), ) ) def write_scrape_error_log(self, url: URL | str, error_message: str, origin: URL | Path | None = None) -> None: - """Writes to the scrape error log.""" - self.manager.task_group.create_task( - self._write_to_csv(self.scrape_error_log, url=url, error=error_message, origin=origin) + _ = self.task_group.create_task( + self._write_to_csv( + self.config.logs.scrape_error_urls, + url=url, + error=error_message, + origin=origin, + ) ) - - async def update_last_forum_post(self) -> None: - """Updates the last forum post.""" - input_file = self.manager.path_manager.input_file - - def proceed(): - return input_file.is_file() and self.last_post_log.is_file() - - if await asyncio.to_thread(proceed): - await asyncio.to_thread(_update_last_forum_post, input_file, self.last_post_log) - - -def _update_last_forum_post(input_file: Path, last_post_log: Path) -> None: - log_spacer(20) - log("Updating Last Forum Posts...\n", 20) - - current_urls, current_base_urls, new_urls, new_base_urls = [], [], [], [] - try: - with input_file.open(encoding="utf8") as f: - for line in f: - url = base_url = line.strip().removesuffix("/") - - if "https" in url and "/post-" in url: - base_url = url.rsplit("/post", 1)[0] - - # only keep 1 url of the same thread - if base_url not in current_base_urls: - current_urls.append(url) - current_base_urls.append(base_url) - except UnicodeDecodeError: - log("Unable to read input file, skipping update_last_forum_post", 40) - return - - with last_post_log.open(encoding="utf8") as f: - reader = csv.DictReader(f.readlines()) - for row in reader: - new_url = base_url = row.get("url").strip().removesuffix("/") # type: ignore - - if "https" in new_url and "/post-" in new_url: - base_url = new_url.rsplit("/post", 1)[0] - - # only keep 1 url of the same thread - if base_url not in new_base_urls: - new_urls.append(new_url) - new_base_urls.append(base_url) - - updated_urls = current_urls.copy() - for new_url, base in zip(new_urls, new_base_urls, strict=False): - if base in current_base_urls: - index = current_base_urls.index(base) - old_url = current_urls[index] - if old_url == new_url: - continue - log(f"Updating {base}\n {old_url = }\n {new_url = }", 20) - updated_urls[index] = new_url - - if updated_urls == current_urls: - log("No URLs updated", 20) - return - - with input_file.open("w", encoding="utf8") as f: - f.write("\n".join(updated_urls)) diff --git a/cyberdrop_dl/scrape_mapper.py b/cyberdrop_dl/scrape_mapper.py index 5a2ef5107..3b8a3bc32 100644 --- a/cyberdrop_dl/scrape_mapper.py +++ b/cyberdrop_dl/scrape_mapper.py @@ -232,18 +232,12 @@ async def send_to_crawler(self, scrape_item: ScrapeItem) -> None: success = True except JDownloaderError as e: log(f"Failed to send {scrape_item.url} to JDownloader\n{e.message}", 40) - self.manager.log_manager.write_unsupported_urls_log( - scrape_item.url, - scrape_item.parents[0] if scrape_item.parents else None, - ) + self.manager.logs.write_unsupported(scrape_item.url, scrape_item) self.manager.progress_manager.scrape_errors.add_unsupported(sent_to_jdownloader=success) return log(f"Unsupported URL: {scrape_item.url}", 30) - self.manager.log_manager.write_unsupported_urls_log( - scrape_item.url, - scrape_item.parents[0] if scrape_item.parents else None, - ) + self.manager.logs.write_unsupported(scrape_item.url, scrape_item) self.manager.progress_manager.scrape_errors.add_unsupported() def should_scrape(self, scrape_item: ScrapeItem) -> bool: diff --git a/cyberdrop_dl/utils/json.py b/cyberdrop_dl/utils/json.py index a218160a4..20f4adc34 100644 --- a/cyberdrop_dl/utils/json.py +++ b/cyberdrop_dl/utils/json.py @@ -1,6 +1,5 @@ from __future__ import annotations -import asyncio import base64 import dataclasses import datetime @@ -151,14 +150,11 @@ def dumps(obj: object, /, *, sort_keys: bool = False, indent: int | None = None) return encoder.encode(obj) -async def dump_jsonl(data: Iterable[dict[str, Any]], /, file: Path, *, append: bool = True) -> None: - def dump(): - with file.open(mode="a" if append else "w", encoding="utf8") as f: - for item in data: - f.writelines(_DEFAULT_ENCODER.iterencode(item)) - f.write("\n") - - await asyncio.to_thread(dump) +def dump_jsonl(data: Iterable[dict[str, Any]], /, file: Path, *, append: bool = True) -> None: + with file.open(mode="a" if append else "w", encoding="utf8") as f: + for item in data: + f.writelines(_DEFAULT_ENCODER.iterencode(item)) + f.write("\n") loads = _verbose_decode_error_msg(json.loads) diff --git a/cyberdrop_dl/utils/utilities.py b/cyberdrop_dl/utils/utilities.py index a26f92702..d523e794a 100644 --- a/cyberdrop_dl/utils/utilities.py +++ b/cyberdrop_dl/utils/utilities.py @@ -138,7 +138,7 @@ def error_handling_context(self: Crawler | Downloader, item: ScrapeItem | MediaI return log(f"Scrape Failed: {link_to_show} ({error_log_msg.main_log_msg})", 40, exc_info=exc_info) - self.manager.log_manager.write_scrape_error_log(link_to_show, error_log_msg.csv_log_msg, origin) + self.manager.logs.write_scrape_error_log(link_to_show, error_log_msg.csv_log_msg, origin) self.manager.progress_manager.scrape_errors.add_failure(error_log_msg.ui_failure) diff --git a/tests/conftest.py b/tests/conftest.py index 46f3fd62b..28754a473 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,9 +4,9 @@ from typing import TYPE_CHECKING import pytest +from cyberdrop_dl.scraper import scrape_mapper from cyberdrop_dl.managers import Manager -from cyberdrop_dl.scraper import scrape_mapper if TYPE_CHECKING: from collections.abc import AsyncGenerator @@ -71,7 +71,7 @@ def post_startup_manager(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Man bare_manager = Manager(("--appdata-folder", appdata, "-d", downloads, "--download-tiktok-audios")) bare_manager.startup() bare_manager.path_manager.startup() - bare_manager.log_manager.startup() + bare_manager.logs.startup() return bare_manager diff --git a/tests/crawlers/test_xenforo.py b/tests/crawlers/test_xenforo.py index e735a3ee1..6d0103ef2 100644 --- a/tests/crawlers/test_xenforo.py +++ b/tests/crawlers/test_xenforo.py @@ -76,7 +76,7 @@ async def post_startup_manager(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) manager = Manager(("--appdata-folder", appdata, "-d", downloads)) manager.startup() manager.path_manager.startup() - manager.log_manager.startup() + manager.logs.startup() await manager.async_startup() yield manager await manager.async_db_close() diff --git a/tests/test_manager.py b/tests/test_manager.py index 0545e9781..21b8750d7 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -124,7 +124,7 @@ async def test_async_db_close(running_manager: Manager) -> None: await running_manager.async_startup() assert not isinstance(running_manager.db_manager, Field) assert not isinstance(running_manager.hash_manager, Field) - assert "overwrite" not in str(running_manager.log_manager.main_log) + assert "overwrite" not in str(running_manager.logs.main_log) await running_manager.async_db_close() assert isinstance(running_manager.db_manager, Field) assert isinstance(running_manager.hash_manager, Field) From 218f5c717e3022e57649350c44e3439d2e8ef96c Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Mon, 23 Feb 2026 18:04:40 -0500 Subject: [PATCH 17/23] refactor: remove path manager and config_manager --- cyberdrop_dl/clients/download_client.py | 4 +- cyberdrop_dl/clients/hash_client.py | 8 +- cyberdrop_dl/clients/scraper_client.py | 4 - cyberdrop_dl/config/settings.py | 9 +- cyberdrop_dl/cookies.py | 17 +- cyberdrop_dl/director.py | 3 - cyberdrop_dl/downloader/downloader.py | 4 +- cyberdrop_dl/managers/__init__.py | 38 +++-- cyberdrop_dl/managers/client_manager.py | 5 +- cyberdrop_dl/managers/config_manager.py | 217 ------------------------ cyberdrop_dl/managers/log_manager.py | 2 +- cyberdrop_dl/managers/path_manager.py | 166 ------------------ cyberdrop_dl/scrape_mapper.py | 10 +- cyberdrop_dl/utils/apprise.py | 4 +- cyberdrop_dl/utils/sorting.py | 15 +- cyberdrop_dl/utils/utilities.py | 12 +- cyberdrop_dl/utils/webhook.py | 2 +- tests/conftest.py | 3 - tests/crawlers/test_xenforo.py | 3 - tests/test_apprise.py | 8 +- tests/test_cli.py | 5 +- tests/test_hashing.py | 15 +- 22 files changed, 81 insertions(+), 473 deletions(-) delete mode 100644 cyberdrop_dl/managers/config_manager.py delete mode 100644 cyberdrop_dl/managers/path_manager.py diff --git a/cyberdrop_dl/clients/download_client.py b/cyberdrop_dl/clients/download_client.py index ef27a12f4..baba5c802 100644 --- a/cyberdrop_dl/clients/download_client.py +++ b/cyberdrop_dl/clients/download_client.py @@ -347,7 +347,7 @@ async def handle_media_item_completion(self, media_item: MediaItem, downloaded: try: media_item.downloaded = downloaded await self.manager.hash_manager.hash_client.hash_item_during_download(media_item) - self.manager.path_manager.add_completed(media_item) + self.manager.add_completed(media_item) except Exception: log(f"Error handling media item completion of: {media_item.complete_file}", 10, exc_info=True) @@ -358,7 +358,7 @@ def get_download_dir(self, media_item: MediaItem) -> Path: download_folder = media_item.download_folder if config.get().download_options.block_download_sub_folders: - while download_folder.parent != self.manager.path_manager.download_folder: + while download_folder.parent != config.get().files.download_folder: download_folder = download_folder.parent media_item.download_folder = download_folder return download_folder diff --git a/cyberdrop_dl/clients/hash_client.py b/cyberdrop_dl/clients/hash_client.py index b3dc1b911..d5199cf23 100644 --- a/cyberdrop_dl/clients/hash_client.py +++ b/cyberdrop_dl/clients/hash_client.py @@ -205,21 +205,19 @@ async def _delete_and_log(self, file: Path, xxh128_value: str) -> None: async def get_file_hashes_dict(self) -> dict: """Get a dictionary of files based on matching file hashes and file size.""" - downloads = self.manager.path_manager.completed_downloads - self.hashed_media_items + downloads = self.manager.completed_downloads - self.hashed_media_items async def exists(item: MediaItem) -> MediaItem | None: if await asyncio.to_thread(item.complete_file.is_file): return item - results = await asyncio.gather(*(exists(item) for item in downloads)) - for media_item in results: - if media_item is None: - continue + for media_item in filter(None, await asyncio.gather(*(exists(item) for item in downloads))): try: await self.hash_item(media_item) except Exception as e: msg = f"Unable to hash file = {media_item.complete_file}: {e}" log(msg, 40) + return self.hashes_dict diff --git a/cyberdrop_dl/clients/scraper_client.py b/cyberdrop_dl/clients/scraper_client.py index 607bb66ba..3774954fb 100644 --- a/cyberdrop_dl/clients/scraper_client.py +++ b/cyberdrop_dl/clients/scraper_client.py @@ -29,10 +29,6 @@ class ScraperClient: def __init__(self, client_manager: ClientManager) -> None: self.client_manager = client_manager - self._save_pages_html = client_manager.config.get().files.save_pages_html - self._pages_folder = self.client_manager.manager.path_manager.pages_folder - min_html_file_path_len = len(str(self._pages_folder)) + len(constants.STARTUP_TIME_STR) + 10 - self._max_html_stem_len = 245 - min_html_file_path_len @contextlib.asynccontextmanager async def _limiter(self, domain: str) -> AsyncGenerator[None]: diff --git a/cyberdrop_dl/config/settings.py b/cyberdrop_dl/config/settings.py index 746b8a7a6..a5f8bd5a5 100755 --- a/cyberdrop_dl/config/settings.py +++ b/cyberdrop_dl/config/settings.py @@ -18,7 +18,6 @@ field_serializer, field_validator, ) -from yarl import URL from cyberdrop_dl import constants from cyberdrop_dl.constants import BROWSERS, DEFAULT_APP_STORAGE, DEFAULT_DOWNLOAD_STORAGE, Hashing @@ -60,7 +59,7 @@ def _validate_format(cls, value: str, valid_keys: set[str]) -> None: validate_format_string(value, valid_keys) -class DownloadOptions(SettingsGroup): +class DownloadOptions(FormatValidator, SettingsGroup): block_download_sub_folders: bool = False disable_download_attempt_limit: bool = False disable_file_timestamps: bool = False @@ -79,10 +78,8 @@ class DownloadOptions(SettingsGroup): @field_validator("separate_posts_format", mode="after") @classmethod def valid_format(cls, value: str) -> str: - from cyberdrop_dl.utils.strings import validate_format_string - valid_keys = {"default", "title", "id", "number", "date"} - validate_format_string(value, valid_keys) + cls._validate_format(value, valid_keys) return value @@ -325,7 +322,7 @@ def unique_list(cls, value: list[str]) -> list[str]: return sorted(set(value)) @field_serializer("flaresolverr", "proxy") - def serialize(self, value: URL | str) -> str | None: + def serialize(self, value: str) -> str | None: return falsy_as(value, None, str) @field_validator("flaresolverr", "proxy", mode="before") diff --git a/cyberdrop_dl/cookies.py b/cyberdrop_dl/cookies.py index 66c831197..ecad96d29 100644 --- a/cyberdrop_dl/cookies.py +++ b/cyberdrop_dl/cookies.py @@ -9,6 +9,7 @@ from textwrap import dedent from typing import TYPE_CHECKING, NamedTuple, ParamSpec, TypeVar +from cyberdrop_dl import appdata from cyberdrop_dl.dependencies import browser_cookie3 from cyberdrop_dl.utils.logger import log @@ -76,10 +77,10 @@ def get_cookies_from_browsers(manager: Manager, *, browser: BROWSERS, domains: l msg = "None of the provided browsers is supported for extraction" raise ValueError(msg) - manager.path_manager.cookies_dir.mkdir(parents=True, exist_ok=True) + appdata.get().cookies_dir.mkdir(parents=True, exist_ok=True) domains_with_cookies: set[str] = set() for domain in domains_to_extract: - cookie_file_path = manager.path_manager.cookies_dir / f"{domain}.txt" + cookie_file_path = appdata.get().cookies_dir / f"{domain}.txt" cdl_cookie_jar = MozillaCookieJar(cookie_file_path) for cookie in extracted_cookies: if domain in cookie.domain: @@ -92,15 +93,11 @@ def get_cookies_from_browsers(manager: Manager, *, browser: BROWSERS, domains: l return domains_with_cookies -def clear_cookies(manager: Manager, domains: list[str]) -> None: - if not domains: - raise ValueError("No domains selected") - - manager.path_manager.cookies_dir.mkdir(parents=True, exist_ok=True) +def clear_cookies(*domains: str) -> None: + appdata.get().cookies_dir.mkdir(parents=True, exist_ok=True) for domain in domains: - cookie_file_path = manager.path_manager.cookies_dir / f"{domain}.txt" - cookie_jar = MozillaCookieJar(cookie_file_path) - cookie_jar.save(ignore_discard=True, ignore_expires=True) + cookie_file_path = appdata.get().cookies_dir / f"{domain}.txt" + cookie_file_path.unlink(missing_ok=True) def extract_cookies(extractor_name: str) -> CookieJar: diff --git a/cyberdrop_dl/director.py b/cyberdrop_dl/director.py index 4c17853ec..9c582cbf4 100644 --- a/cyberdrop_dl/director.py +++ b/cyberdrop_dl/director.py @@ -60,9 +60,6 @@ async def wrapper(*args, **kwargs) -> R | None: @_ui_error_handling_wrapper async def _run_manager(manager: Manager) -> None: - """Runs the program and handles the UI.""" - manager.path_manager.startup() - manager.logs.startup() debug_log_file_path = _setup_debug_logger(manager) start_time = manager.start_time _setup_main_logger(manager) diff --git a/cyberdrop_dl/downloader/downloader.py b/cyberdrop_dl/downloader/downloader.py index df9fb2b1e..1fe3bcd6e 100644 --- a/cyberdrop_dl/downloader/downloader.py +++ b/cyberdrop_dl/downloader/downloader.py @@ -144,9 +144,9 @@ def startup(self) -> None: self.client = self.manager.client_manager.download_client self._semaphore = asyncio.Semaphore(self.manager.client_manager.get_download_slots(self.domain)) - self.manager.path_manager.download_folder.mkdir(parents=True, exist_ok=True) + config.get().files.download_folder.mkdir(parents=True, exist_ok=True) if config.get().sorting.sort_downloads: - self.manager.path_manager.sorted_folder.mkdir(parents=True, exist_ok=True) + config.get().sorting.sort_folder.mkdir(parents=True, exist_ok=True) def update_queued_files(self, increase_total: bool = True): queued_files = self.manager.progress_manager.downloads.get_queue_length() diff --git a/cyberdrop_dl/managers/__init__.py b/cyberdrop_dl/managers/__init__.py index 6fee0961e..f0d7ba3a4 100644 --- a/cyberdrop_dl/managers/__init__.py +++ b/cyberdrop_dl/managers/__init__.py @@ -10,11 +10,9 @@ from cyberdrop_dl import __version__, appdata, config, constants from cyberdrop_dl.database import Database from cyberdrop_dl.managers.client_manager import ClientManager -from cyberdrop_dl.managers.config_manager import ConfigManager from cyberdrop_dl.managers.hash_manager import HashManager from cyberdrop_dl.managers.live_manager import LiveManager from cyberdrop_dl.managers.log_manager import LogManager -from cyberdrop_dl.managers.path_manager import PathManager from cyberdrop_dl.managers.storage_manager import StorageManager from cyberdrop_dl.progress import ProgressManager from cyberdrop_dl.utils import ffmpeg @@ -23,7 +21,9 @@ if TYPE_CHECKING: from asyncio import TaskGroup + from pathlib import Path + from cyberdrop_dl.data_structures.url_objects import MediaItem from cyberdrop_dl.scrape_mapper import ScrapeMapper @@ -45,7 +45,7 @@ def __init__(self) -> None: self.progress_manager: ProgressManager = ProgressManager(self, portrait=False) self.live_manager: LiveManager = field(init=False) - self.task_group: TaskGroup = field(init=False) + self.task_group: TaskGroup = asyncio.TaskGroup() self.scrape_mapper: ScrapeMapper = field(init=False) self.start_time: float = perf_counter() @@ -54,14 +54,30 @@ def __init__(self) -> None: constants.console_handler = LogHandler(level=constants.CONSOLE_LEVEL) - self.path_manager: PathManager = PathManager(self) - self.path_manager.pre_startup() - self.config_manager: ConfigManager = ConfigManager(self) - self.config_manager.startup() - - self.path_manager.startup() - self.logs: LogManager = LogManager(config.get()) + self.logs: LogManager = LogManager(config.get(), self.task_group) log_app_state() + self._completed_downloads: set[MediaItem] = set() + self._completed_downloads_paths: set[Path] = set() + self._prev_downloads: set[MediaItem] = set() + self._prev_downloads_paths: set[Path] = set() + + def add_completed(self, media_item: MediaItem) -> None: + if media_item.is_segment: + return + self._completed_downloads.add(media_item) + self._completed_downloads_paths.add(media_item.complete_file) + + def add_prev(self, media_item: MediaItem) -> None: + self._prev_downloads.add(media_item) + self._prev_downloads_paths.add(media_item.complete_file) + + @property + def completed_downloads(self) -> set[MediaItem]: + return self._completed_downloads + + @property + def prev_downloads(self) -> set[MediaItem]: + return self._prev_downloads """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" @@ -79,7 +95,7 @@ async def async_startup(self) -> None: async def async_db_hash_startup(self) -> None: self.db_manager = Database( - self.path_manager.history_db, + appdata.get().db_file, config.get().runtime_options.ignore_history, ) await self.db_manager.startup() diff --git a/cyberdrop_dl/managers/client_manager.py b/cyberdrop_dl/managers/client_manager.py index a7d3b6631..510ab128b 100644 --- a/cyberdrop_dl/managers/client_manager.py +++ b/cyberdrop_dl/managers/client_manager.py @@ -14,7 +14,7 @@ from aiohttp import ClientResponse, ClientSession from aiolimiter import AsyncLimiter -from cyberdrop_dl import config, constants, ddos_guard, env +from cyberdrop_dl import appdata, config, constants, ddos_guard, env from cyberdrop_dl.clients.download_client import DownloadClient from cyberdrop_dl.clients.flaresolverr import FlareSolverr from cyberdrop_dl.clients.response import AbstractResponse @@ -299,9 +299,10 @@ async def load_cookie_files(self) -> None: assert config.get().browser_cookies.browser get_cookies_from_browsers(self.manager, browser=config.get().browser_cookies.browser) - cookie_files = sorted(self.manager.path_manager.cookies_dir.glob("*.txt")) + cookie_files = sorted(appdata.get().cookies_dir.glob("*.txt")) if not cookie_files: return + async for domain, cookie in read_netscape_files(cookie_files): self.cookies.update_cookies(cookie, response_url=AbsoluteHttpURL(f"https://{domain}")) diff --git a/cyberdrop_dl/managers/config_manager.py b/cyberdrop_dl/managers/config_manager.py deleted file mode 100644 index c82bc6cdb..000000000 --- a/cyberdrop_dl/managers/config_manager.py +++ /dev/null @@ -1,217 +0,0 @@ -from __future__ import annotations - -import os -import shutil -from dataclasses import field -from time import sleep -from typing import TYPE_CHECKING - -from cyberdrop_dl import cache -from cyberdrop_dl.config import AuthSettings, ConfigSettings, GlobalSettings -from cyberdrop_dl.exceptions import InvalidYamlError -from cyberdrop_dl.managers.log_manager import LogManager -from cyberdrop_dl.utils import yaml -from cyberdrop_dl.utils.apprise import get_apprise_urls - -if TYPE_CHECKING: - from pathlib import Path - - from pydantic import BaseModel - - from cyberdrop_dl.managers import Manager - from cyberdrop_dl.utils.apprise import AppriseURL - - -class ConfigManager: - def __init__(self, manager: Manager) -> None: - self.manager = manager - self.loaded_config: str = "" - - self.authentication_settings: Path = field(init=False) - self.settings: Path = field(init=False) - self.global_settings: Path = field(init=False) - self.deep_scrape: bool = False - self.apprise_urls: list[AppriseURL] = [] - - self.authentication_data: AuthSettings = field(init=False) - self.settings_data: ConfigSettings = field(init=False) - self.global_settings_data: GlobalSettings = field(init=False) - self.pydantic_config: str | None = None - - def startup(self) -> None: - """Startup process for the config manager.""" - self.loaded_config = self.get_loaded_config() - self.settings = self.manager.path_manager.config_folder / self.loaded_config / "settings.yaml" - self.global_settings = self.manager.path_manager.config_folder / "global_settings.yaml" - self.authentication_settings = self.manager.path_manager.config_folder / "authentication.yaml" - auth_override = self.manager.path_manager.config_folder / self.loaded_config / "authentication.yaml" - - if auth_override.is_file(): - self.authentication_settings = auth_override - - self.settings.parent.mkdir(parents=True, exist_ok=True) - self.pydantic_config = cache.get().get("pydantic_config") - self.load_configs() - - def get_loaded_config(self): - return self.loaded_config or self.get_default_config() - - def get_default_config(self) -> str: - return cache.get().get("default_config") or "Default" - - def load_configs(self) -> None: - """Loads all the configs.""" - self._load_authentication_config() - self._load_global_settings_config() - self._load_settings_config() - self.apprise_file = self.manager.path_manager.config_folder / self.loaded_config / "apprise.txt" - self.apprise_urls = get_apprise_urls(file=self.apprise_file) - self._set_apprise_fixed() - self._set_pydantic_config() - - @staticmethod - def get_model_fields(model: BaseModel, *, exclude_unset: bool = True) -> set[str]: - fields = set() - default_dict: dict = model.model_dump(exclude_unset=exclude_unset) - for submodel_name, submodel in default_dict.items(): - for field_name in submodel: - fields.add(f"{submodel_name}.{field_name}") - return fields - - def _load_authentication_config(self) -> None: - """Verifies the authentication config file and creates it if it doesn't exist.""" - needs_update = is_in_file("socialmediagirls_username:", self.authentication_settings) - posible_fields = self.get_model_fields(AuthSettings(), exclude_unset=False) - if self.authentication_settings.is_file(): - self.authentication_data = AuthSettings.model_validate(yaml.load(self.authentication_settings)) - set_fields = self.get_model_fields(self.authentication_data) - if posible_fields == set_fields and not needs_update and self.pydantic_config: - return - - else: - self.authentication_data = AuthSettings() - - yaml.save(self.authentication_settings, self.authentication_data) - - def _load_settings_config(self) -> None: - """Verifies the settings config file and creates it if it doesn't exist.""" - needs_update = is_in_file("download_error_urls_filename:", self.settings) - posible_fields = self.get_model_fields(ConfigSettings(), exclude_unset=False) - if self.manager.parsed_args.cli_only_args.config_file: - self.settings = self.manager.parsed_args.cli_only_args.config_file - self.loaded_config = "CLI-Arg Specified" - - if self.settings.is_file(): - self.settings_data = ConfigSettings.model_validate(yaml.load(self.settings)) - set_fields = self.get_model_fields(self.settings_data) - self.deep_scrape = self.settings_data.runtime_options.deep_scrape - self.settings_data.runtime_options.deep_scrape = False - if posible_fields == set_fields and not needs_update and self.pydantic_config: - return - else: - self.settings_data = ConfigSettings() - self.settings_data.files.input_file = ( - self.manager.path_manager.appdata / "Configs" / self.loaded_config / "URLs.txt" - ) - downloads = self.manager.path_manager.cwd / "Downloads" - self.settings_data.sorting.sort_folder = downloads / "Cyberdrop-DL Sorted Downloads" - self.settings_data.files.download_folder = downloads / "Cyberdrop-DL Downloads" - self.settings_data.logs.log_folder = ( - self.manager.path_manager.appdata / "Configs" / self.loaded_config / "Logs" - ) - - yaml.save(self.settings, self.settings_data) - - def _load_global_settings_config(self) -> None: - """Verifies the global settings config file and creates it if it doesn't exist.""" - needs_update = is_in_file("Dupe_Cleanup_Options:", self.global_settings) - posible_fields = self.get_model_fields(GlobalSettings(), exclude_unset=False) - if self.global_settings.is_file(): - self.global_settings_data = GlobalSettings.model_validate(yaml.load(self.global_settings)) - set_fields = self.get_model_fields(self.global_settings_data) - if posible_fields == set_fields and not needs_update and self.pydantic_config: - return - else: - self.global_settings_data = GlobalSettings() - - yaml.save(self.global_settings, self.global_settings_data) - - """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - def save_as_new_config(self, new_settings: Path, settings_data: ConfigSettings) -> None: - """Creates a new settings config file.""" - yaml.save(new_settings, settings_data) - - def write_updated_authentication_config(self) -> None: - """Write updated authentication data.""" - yaml.save(self.authentication_settings, self.authentication_data) - - def write_updated_settings_config(self) -> None: - """Write updated settings data.""" - yaml.save(self.settings, self.settings_data) - - def write_updated_global_settings_config(self) -> None: - """Write updated global settings data.""" - yaml.save(self.global_settings, self.global_settings_data) - - """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - def get_configs(self) -> list: - """Returns a list of all the configs.""" - configs = [config.name for config in self.manager.path_manager.config_folder.iterdir() if config.is_dir()] - configs.sort() - return configs - - def change_default_config(self, config_name: str) -> None: - """Changes the default config.""" - cache.get().save("default_config", config_name) - - def delete_config(self, config_name: str) -> None: - """Deletes a config.""" - configs = self.get_configs() - configs.remove(config_name) - - if cache.get().get("default_config") == config_name: - cache.get().save("default_config", configs[0]) - - config = self.manager.path_manager.config_folder / config_name - shutil.rmtree(config) - - def change_config(self, config_name: str) -> None: - """Changes the config.""" - self.loaded_config = config_name - self.startup() - - self.manager.path_manager.startup() - sleep(1) - self.manager.logs = LogManager(self.manager) - sleep(1) - - def _set_apprise_fixed(self): - apprise_fixed = cache.get().get("apprise_fixed") - if apprise_fixed: - return - if os.name == "nt": - try: - import win32con # noqa: F401 - except ImportError: - pass - else: - with self.apprise_file.open("a", encoding="utf8") as f: - f.write("windows://\n") - cache.get().save("apprise_fixed", True) - - def _set_pydantic_config(self): - if self.pydantic_config: - return - cache.get().save("pydantic_config", True) - self.pydantic_config = True - - -def is_in_file(search_value: str, file: Path) -> bool: - if not file.is_file(): - return False - try: - return search_value.casefold() in file.read_text(encoding="utf8").casefold() - except Exception as e: - raise InvalidYamlError(file, e) from e diff --git a/cyberdrop_dl/managers/log_manager.py b/cyberdrop_dl/managers/log_manager.py index 3a0a9ff98..cb25f149e 100644 --- a/cyberdrop_dl/managers/log_manager.py +++ b/cyberdrop_dl/managers/log_manager.py @@ -26,7 +26,7 @@ @dataclasses.dataclass(slots=True, frozen=True) class LogManager: config: Config - task_group: asyncio.TaskGroup = dataclasses.field(default_factory=asyncio.TaskGroup, repr=False) + task_group: asyncio.TaskGroup = dataclasses.field(repr=False) _has_headers: set[Path] = dataclasses.field(init=False, default_factory=set) async def write_jsonl(self, data: Iterable[dict[str, Any]]) -> None: diff --git a/cyberdrop_dl/managers/path_manager.py b/cyberdrop_dl/managers/path_manager.py deleted file mode 100644 index ff91cc3d4..000000000 --- a/cyberdrop_dl/managers/path_manager.py +++ /dev/null @@ -1,166 +0,0 @@ -from __future__ import annotations - -import os -from dataclasses import Field, field -from datetime import datetime -from pathlib import Path -from typing import TYPE_CHECKING - -from cyberdrop_dl import config, env -from cyberdrop_dl.utils.utilities import purge_dir_tree - -if TYPE_CHECKING: - from cyberdrop_dl.data_structures.url_objects import MediaItem - from cyberdrop_dl.managers import Manager - - -class PathManager: - def __init__(self, manager: Manager) -> None: - self.manager = manager - - self.download_folder: Path = field(init=False) - self.sorted_folder: Path = field(init=False) - self.scan_folder: Path | None = field(init=False) - - self.log_folder: Path = field(init=False) - - self.cache_folder: Path = field(init=False) - self.config_folder: Path = field(init=False) - - self.input_file: Path = field(init=False) - self.history_db: Path = field(init=False) - self.cache_db: Path = field(init=False) - - self._completed_downloads: set[MediaItem] = set() - self._completed_downloads_paths: set[Path] = set() - self._prev_downloads: set[MediaItem] = set() - self._prev_downloads_paths: set[Path] = set() - - self.main_log: Path = field(init=False) - self.last_forum_post_log: Path = field(init=False) - self.unsupported_urls_log: Path = field(init=False) - self.download_error_urls_log: Path = field(init=False) - self.scrape_error_urls_log: Path = field(init=False) - self.pages_folder: Path = field(init=False) - - self._logs_model_names = [ - "main_log", - "last_forum_post", - "unsupported_urls", - "download_error_urls", - "scrape_error_urls", - ] - self._appdata: Path = field(init=False) - - @property - def cwd(self) -> Path: - if env.RUNNING_IN_IDE and Path.cwd().name == "cyberdrop_dl": - return Path("..").resolve() - return Path().resolve() - - @property - def appdata(self) -> Path: - if isinstance(self._appdata, Field): - self._appdata = self.cwd / "AppData" - - return self._appdata - - def pre_startup(self) -> None: - self.cache_folder = self.appdata / "Cache" - self.config_folder = self.appdata / "Configs" - self.cookies_dir = self.appdata / "Cookies" - self.cache_db = self.cache_folder / "request_cache.db" - - self.config_folder.mkdir(parents=True, exist_ok=True) - self.cookies_dir.mkdir(parents=True, exist_ok=True) - self.cache_db.touch(exist_ok=True) - - def startup(self) -> None: - """Startup process for the Directory Manager.""" - settings_data = config.get() - current_config = self.manager.config_manager.loaded_config - - def replace(path: Path) -> Path: - path_w_config = str(path).replace("{config}", current_config) - if os.name == "nt": - return self.cwd.joinpath(Path(path_w_config)).resolve() - normalized_path_str = path_w_config.replace("\\", "/") - return self.cwd.joinpath(Path(normalized_path_str)).resolve() - - self.download_folder = replace(settings_data.files.download_folder) - self.sorted_folder = replace(settings_data.sorting.sort_folder) - self.log_folder = replace(settings_data.logs.log_folder) - self.input_file = replace(settings_data.files.input_file) - self.history_db = self.cache_folder / "cyberdrop.db" - self.scan_folder = settings_data.sorting.scan_folder - if self.scan_folder: - self.scan_folder = replace(self.scan_folder) - - self.log_folder.mkdir(parents=True, exist_ok=True) - - now = datetime.now() - self._set_output_filenames(now) - self._delete_logs_and_folders(now) - self._create_output_folders() - - if not self.input_file.is_file(): - self.input_file.touch(exist_ok=True) - self.history_db.touch(exist_ok=True) - - def _set_output_filenames(self, now: datetime) -> None: - current_time_file_iso: str = now.strftime("%Y%m%d_%H%M%S") - current_time_folder_iso: str = now.strftime("%Y_%m_%d") - log_settings_config = config.get().logs - log_files: dict[str, Path] = log_settings_config.model_dump() - - for model_name, log_file in log_files.items(): - if model_name not in self._logs_model_names: - continue - if log_settings_config.rotate_logs: - new_name = f"{log_file.stem}_{current_time_file_iso}{log_file.suffix}" - log_file: Path = log_file.parent / current_time_folder_iso / new_name - log_files[model_name] = log_file - - log_settings_config = log_settings_config.model_copy(update=log_files) - - for model_name in self._logs_model_names: - internal_name = f"{model_name.replace('_log', '')}_log" - setattr(self, internal_name, self.log_folder / getattr(log_settings_config, model_name)) - - self.pages_folder = self.main_log.parent / "cdl_responses" - - def _delete_logs_and_folders(self, now: datetime): - if config.get().logs.logs_expire_after: - for file in set(self.log_folder.rglob("*.log")) | set(self.log_folder.rglob("*.csv")): - file_date = Path(file).stat().st_ctime - t_delta = now - datetime.fromtimestamp(file_date) - if t_delta > config.get().logs.logs_expire_after: - file.unlink(missing_ok=True) - purge_dir_tree(self.log_folder) - - def _create_output_folders(self): - for model_name in self._logs_model_names: - internal_name = f"{model_name.replace('_log', '')}_log" - path: Path = getattr(self, internal_name) - path.parent.mkdir(parents=True, exist_ok=True) - - if config.get().files.save_pages_html: - self.pages_folder.mkdir(parents=True, exist_ok=True) - - def add_completed(self, media_item: MediaItem) -> None: - if media_item.is_segment: - return - self._completed_downloads.add(media_item) - self._completed_downloads_paths.add(media_item.complete_file) - - def add_prev(self, media_item: MediaItem) -> None: - self._prev_downloads.add(media_item) - self._prev_downloads_paths.add(media_item.complete_file) - - @property - def completed_downloads(self) -> set[MediaItem]: - return self._completed_downloads - - @property - def prev_downloads(self) -> set[MediaItem]: - return self._prev_downloads diff --git a/cyberdrop_dl/scrape_mapper.py b/cyberdrop_dl/scrape_mapper.py index 3b8a3bc32..ef6e5c2b8 100644 --- a/cyberdrop_dl/scrape_mapper.py +++ b/cyberdrop_dl/scrape_mapper.py @@ -112,8 +112,8 @@ async def run(self) -> None: async for item in self.get_input_items(): self.manager.task_group.create_task(self.send_to_crawler(item)) - async def get_input_items(self) -> AsyncGenerator[ScrapeItem]: - items_generator = self.load_links(self.manager.path_manager.input_file) + async def get_input_items(self, input_file) -> AsyncGenerator[ScrapeItem]: + items_generator = self.load_links(input_file) children_limits = config.get().download_options.maximum_number_of_children async for item in items_generator: @@ -128,9 +128,9 @@ async def get_input_items(self) -> AsyncGenerator[ScrapeItem]: """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - async def parse_input_file_groups(self) -> AsyncGenerator[tuple[str, list[AbsoluteHttpURL]]]: + async def parse_input_file_groups(self, input_file) -> AsyncGenerator[tuple[str, list[AbsoluteHttpURL]]]: """Split URLs from input file by their groups.""" - input_file = self.manager.path_manager.input_file + if not await asyncio.to_thread(input_file.is_file): yield ("", []) return @@ -223,7 +223,7 @@ async def send_to_crawler(self, scrape_item: ScrapeItem) -> None: success = False try: download_folder = get_download_path(self.manager, scrape_item, "jdownloader") - relative_download_dir = download_folder.relative_to(self.manager.path_manager.download_folder) + relative_download_dir = download_folder.relative_to(config.get().files.download_folder) self.jdownloader.send( scrape_item.url, scrape_item.parent_title, diff --git a/cyberdrop_dl/utils/apprise.py b/cyberdrop_dl/utils/apprise.py index dd66d8517..78283fb8f 100644 --- a/cyberdrop_dl/utils/apprise.py +++ b/cyberdrop_dl/utils/apprise.py @@ -15,7 +15,7 @@ from pydantic import ValidationError from rich.text import Text -from cyberdrop_dl import constants +from cyberdrop_dl import config, constants from cyberdrop_dl.dependencies import apprise from cyberdrop_dl.models import AppriseURLModel from cyberdrop_dl.utils.logger import log, log_debug, log_spacer @@ -193,7 +193,7 @@ async def send_apprise_notifications(manager: Manager) -> tuple[constants.Notifi for apprise_url in apprise_urls: apprise_obj.add(apprise_url.url, tag=list(apprise_url.tags)) - main_log = manager.path_manager.main_log + main_log = config.get().logs.main_log all_urls = [x.raw_url for x in apprise_urls] log_lines = [] diff --git a/cyberdrop_dl/utils/sorting.py b/cyberdrop_dl/utils/sorting.py index 277f06c73..4f3b4ea4b 100644 --- a/cyberdrop_dl/utils/sorting.py +++ b/cyberdrop_dl/utils/sorting.py @@ -19,7 +19,7 @@ from cyberdrop_dl.managers import Manager -async def get_modified_date(file: Path) -> datetime: +async def _get_modified_date(file: Path) -> datetime: stat = await asyncio.to_thread(file.stat) return datetime.fromtimestamp(stat.st_mtime).replace(microsecond=0) @@ -27,10 +27,9 @@ async def get_modified_date(file: Path) -> datetime: class Sorter: def __init__(self, manager: Manager) -> None: self.manager = manager - self.download_folder = manager.path_manager.scan_folder or manager.path_manager.download_folder - self.sorted_folder = manager.path_manager.sorted_folder + self.download_folder = config.get().sorting.scan_folder or config.get().files.download_folder + self.sorted_folder = config.get().sorting.sort_folder self.incrementer_format: str = config.get().sorting.sort_incrementer_format - self.db_manager = manager.db_manager settings = config.get().sorting self.audio_format: str | None = settings.sorted_audio @@ -48,7 +47,7 @@ def _move_file(self, old_path: Path, new_path: Path) -> bool: new_path.parent.mkdir(parents=True, exist_ok=True) try: - old_path.rename(new_path) + _ = old_path.rename(new_path) except FileExistsError: if old_path.stat().st_size == new_path.stat().st_size: old_path.unlink() @@ -57,7 +56,7 @@ def _move_file(self, old_path: Path, new_path: Path) -> bool: new_filename = f"{new_path.stem}{self.incrementer_format.format(i=auto_index)}{new_path.suffix}" possible_new_path = new_path.parent / new_filename try: - old_path.rename(possible_new_path) + _ = old_path.rename(possible_new_path) break except FileExistsError: continue @@ -204,7 +203,7 @@ async def sort_other(self, file: Path, base_name: str) -> None: self.manager.progress_manager.sorting.increment_other() async def _process_file_move(self, file: Path, base_name: str, format_str: str, **kwargs: Any) -> bool: - file_date = await get_modified_date(file) + file_date = await _get_modified_date(file) file_date_us = file_date.strftime("%Y-%d-%m") file_date_iso = file_date.strftime("%Y-%m-%d") @@ -226,7 +225,7 @@ async def _process_file_move(self, file: Path, base_name: str, format_str: str, ) new_file = Path(file_path) - return self._move_file(file, new_file) + return await asyncio.to_thread(self._move_file, file, new_file) def _subfolders(directory: Path) -> list[Path]: diff --git a/cyberdrop_dl/utils/utilities.py b/cyberdrop_dl/utils/utilities.py index d523e794a..0788697bc 100644 --- a/cyberdrop_dl/utils/utilities.py +++ b/cyberdrop_dl/utils/utilities.py @@ -256,7 +256,7 @@ def get_filename_and_ext(filename: str, forum: bool = False, mime_type: str | No def get_download_path(manager: Manager, scrape_item: ScrapeItem, domain: str) -> Path: """Returns the path to the download folder.""" - download_dir = manager.path_manager.download_folder + download_dir = config.get().files.download_folder return download_dir / scrape_item.create_download_path(domain) @@ -361,24 +361,24 @@ def _partial_files(dir: Path | str) -> Generator[Path]: def delete_partial_files(manager: Manager) -> None: """Deletes partial download files recursively.""" log_red("Deleting partial downloads...") - for file in _partial_files(manager.path_manager.download_folder): + for file in _partial_files(config.get().files.download_folder): file.unlink(missing_ok=True) def check_for_partial_files(manager: Manager) -> None: """Checks if there are partial downloads in any subdirectory and logs if found.""" log_yellow("Checking for partial downloads...") - has_partial_files = next(_partial_files(manager.path_manager.download_folder), None) + has_partial_files = next(_partial_files(config.get().files.download_folder), None) if has_partial_files: log_yellow("There are partial downloads in the downloads folder") -def delete_empty_folders(manager: Manager): +def delete_empty_folders(manager: Manager) -> None: """Deletes empty folders efficiently.""" log_yellow("Checking for empty folders...") - purge_dir_tree(manager.path_manager.download_folder) + purge_dir_tree(config.get().files.download_folder) - sorted_folder = manager.path_manager.sorted_folder + sorted_folder = config.get().sorting.sort_folder if sorted_folder and config.get().sorting.sort_downloads: purge_dir_tree(sorted_folder) diff --git a/cyberdrop_dl/utils/webhook.py b/cyberdrop_dl/utils/webhook.py index af8191c2e..2615d2cca 100644 --- a/cyberdrop_dl/utils/webhook.py +++ b/cyberdrop_dl/utils/webhook.py @@ -71,7 +71,7 @@ async def send_webhook_message(manager: Manager) -> None: rich.print("\nSending Webhook Notifications.. ") url = cast("AbsoluteHttpURL", webhook.url.get_secret_value()) - form = await _prepare_form(webhook, manager.path_manager.main_log) + form = await _prepare_form(webhook, config.get().logs.main_log) logger = log result = constants.NotificationResult.FAILED.value diff --git a/tests/conftest.py b/tests/conftest.py index 28754a473..7a73f6e1c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -69,9 +69,6 @@ def post_startup_manager(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Man downloads = str(tmp_path / "Downloads") monkeypatch.chdir(tmp_path) bare_manager = Manager(("--appdata-folder", appdata, "-d", downloads, "--download-tiktok-audios")) - bare_manager.startup() - bare_manager.path_manager.startup() - bare_manager.logs.startup() return bare_manager diff --git a/tests/crawlers/test_xenforo.py b/tests/crawlers/test_xenforo.py index 6d0103ef2..b882545f4 100644 --- a/tests/crawlers/test_xenforo.py +++ b/tests/crawlers/test_xenforo.py @@ -74,9 +74,6 @@ async def post_startup_manager(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) downloads = str(tmp_path / "Downloads") monkeypatch.chdir(tmp_path) manager = Manager(("--appdata-folder", appdata, "-d", downloads)) - manager.startup() - manager.path_manager.startup() - manager.logs.startup() await manager.async_startup() yield manager await manager.async_db_close() diff --git a/tests/test_apprise.py b/tests/test_apprise.py index 6eb780bcd..53912ef16 100644 --- a/tests/test_apprise.py +++ b/tests/test_apprise.py @@ -4,13 +4,12 @@ from pathlib import Path import pytest +from cyberdrop_dl.managers.config_manager import ConfigManager from rich.text import Text from cyberdrop_dl import constants from cyberdrop_dl.constants import NotificationResult from cyberdrop_dl.managers import Manager -from cyberdrop_dl.managers.config_manager import ConfigManager -from cyberdrop_dl.managers.path_manager import PathManager from cyberdrop_dl.utils import apprise from tests.fake_classes.managers import FakeCacheManager @@ -81,11 +80,6 @@ def test_get_apprise_urls() -> None: async def send_notification(test_case: AppriseTestCase) -> None: - FAKE_MANAGER.config_manager.apprise_urls = [] - if test_case.urls and any(test_case.urls): - FAKE_MANAGER.config_manager.apprise_urls = apprise.get_apprise_urls(urls=test_case.urls) - FAKE_MANAGER.path_manager = PathManager(FAKE_MANAGER) - FAKE_MANAGER.path_manager.main_log = test_case.file or TEST_FILES_PATH / "valid_single_url.txt" constants.LOG_OUTPUT_TEXT = Text(test_case.name) result, logs = await apprise.send_apprise_notifications(FAKE_MANAGER) assert result.value == test_case.result.value, f"Result for this case should be {test_case.result.value}" diff --git a/tests/test_cli.py b/tests/test_cli.py index 6fc36026f..c8428ddd1 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -4,6 +4,7 @@ import pytest from pydantic import ValidationError +from cyberdrop_dl import appdata, config from cyberdrop_dl.cli import parse_args from cyberdrop_dl.main import _create_director, run @@ -34,11 +35,11 @@ def test_startup_logger_should_not_be_created_on_invalid_cookies(tmp_cwd: Path) from cyberdrop_dl.utils.logger import catch_exceptions director = _create_director("--download") - cookies_file = director.manager.path_manager.cookies_dir / "cookies.txt" + cookies_file = appdata.get().cookies_dir / "cookies.txt" cookies_file.write_text("Not a cookie file", encoding="utf8") catch_exceptions(director.run)() - logs = director.manager.path_manager.main_log.read_text(encoding="utf8") + logs = config.get().logs.main_log.read_text(encoding="utf8") assert "does not look like a Netscape format cookies file" in logs startup_file = tmp_cwd / "startup.log" diff --git a/tests/test_hashing.py b/tests/test_hashing.py index 078cb5d0e..0bab7bf4e 100644 --- a/tests/test_hashing.py +++ b/tests/test_hashing.py @@ -6,6 +6,7 @@ import pytest +from cyberdrop_dl import appdata, config from cyberdrop_dl.clients.hash_client import hash_directory_scanner if TYPE_CHECKING: @@ -54,15 +55,15 @@ def test_hash_directory_scanner(manager: Manager, expected_results: set[tuple[st n_files = max(count.values()) algos = count.keys() assert len(expected_results) == len(algos) * n_files - manager.config.dupe_cleanup_options.add_md5_hash = "md5" in algos - manager.config.dupe_cleanup_options.add_sha256_hash = "sha256" in algos + config.get().dupe_cleanup_options.add_md5_hash = "md5" in algos + config.get().dupe_cleanup_options.add_sha256_hash = "sha256" in algos - manager.path_manager.download_folder.mkdir(parents=True) - db_path = manager.path_manager.history_db - hash_directory_scanner(manager, manager.path_manager.download_folder) + config.get().files.download_folder.mkdir(parents=True) + db_path = appdata.get().db_file + hash_directory_scanner(manager, config.get().files.download_folder) assert not get_hashes(db_path) - create_files(manager.path_manager.download_folder, n_files) - hash_directory_scanner(manager, manager.path_manager.download_folder) + create_files(config.get().files.download_folder, n_files) + hash_directory_scanner(manager, config.get().files.download_folder) results = get_hashes(db_path) assert len(results) == len(expected_results) assert results == expected_results From aabfd595cc8b712e797d7488c423fe0ece8eaa05 Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Mon, 23 Feb 2026 21:39:21 -0500 Subject: [PATCH 18/23] refactor: downloader and media_item --- cyberdrop_dl/clients/download_client.py | 363 ++++-------------- cyberdrop_dl/clients/hash_client.py | 2 +- cyberdrop_dl/config/__init__.py | 28 +- cyberdrop_dl/crawlers/crawler.py | 19 +- cyberdrop_dl/crawlers/gofile.py | 3 + cyberdrop_dl/crawlers/megacloud.py | 3 + cyberdrop_dl/crawlers/odnoklassniki.py | 13 + cyberdrop_dl/crawlers/pixeldrain.py | 7 +- cyberdrop_dl/crawlers/twitter_images.py | 1 - cyberdrop_dl/data_structures/url_objects.py | 73 ++-- cyberdrop_dl/downloader/downloader.py | 394 ++++---------------- cyberdrop_dl/downloader/mega_nz.py | 1 - cyberdrop_dl/managers/client_manager.py | 2 - cyberdrop_dl/managers/storage_manager.py | 2 - cyberdrop_dl/progress/_common.py | 6 +- cyberdrop_dl/progress/scrape.py | 22 +- cyberdrop_dl/progress/sorting.py | 2 +- cyberdrop_dl/scrape_mapper.py | 1 - cyberdrop_dl/utils/dates.py | 99 ++++- 19 files changed, 316 insertions(+), 725 deletions(-) diff --git a/cyberdrop_dl/clients/download_client.py b/cyberdrop_dl/clients/download_client.py index baba5c802..ab8b1746e 100644 --- a/cyberdrop_dl/clients/download_client.py +++ b/cyberdrop_dl/clients/download_client.py @@ -2,9 +2,7 @@ import asyncio import contextlib -import itertools import time -from collections.abc import Generator from http import HTTPStatus from typing import TYPE_CHECKING, Any @@ -12,30 +10,28 @@ from cyberdrop_dl import config, constants from cyberdrop_dl.clients.response import AbstractResponse -from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL -from cyberdrop_dl.exceptions import DDOSGuardError, DownloadError, InvalidContentTypeError, SlowDownloadError +from cyberdrop_dl.exceptions import DownloadError, InvalidContentTypeError, SlowDownloadError from cyberdrop_dl.utils import aio, dates from cyberdrop_dl.utils.aio import WeakAsyncLocks from cyberdrop_dl.utils.logger import log from cyberdrop_dl.utils.utilities import get_size_or_none if TYPE_CHECKING: - from collections.abc import AsyncGenerator, Callable, Coroutine, Generator, Mapping + from collections.abc import AsyncGenerator, Callable, Coroutine, Mapping from pathlib import Path from typing import Any import aiohttp - from cyberdrop_dl.data_structures.url_objects import MediaItem + from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, MediaItem from cyberdrop_dl.managers import Manager from cyberdrop_dl.managers.client_manager import ClientManager + from cyberdrop_dl.progress._common import ProgressHook _CONTENT_TYPES_OVERRIDES: dict[str, str] = {"text/vnd.trolltech.linguist": "video/MP2T"} _SLOW_DOWNLOAD_PERIOD: int = 10 # seconds -_CHROME_ANDROID_USER_AGENT: str = ( - "Mozilla/5.0 (Linux; Android 16) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.7204.180 Mobile Safari/537.36" -) + _FREE_SPACE_CHECK_PERIOD: int = 5 # Check every 5 chunks _NULL_CONTEXT: contextlib.nullcontext[None] = contextlib.nullcontext() _USE_IMPERSONATION: set[str] = {"vsco", "celebforum"} @@ -64,58 +60,20 @@ async def _track_errors(self, domain: str): await self.client_manager.manager.states.RUNNING.wait() yield - def _get_download_headers(self, domain: str, referer: AbsoluteHttpURL) -> dict[str, str]: - download_headers = { - "User-Agent": config.get().general.user_agent, - "Referer": str(referer), - } - auth_data = config.get().auth - if domain == "pixeldrain" and auth_data.pixeldrain.api_key: - download_headers["Authorization"] = self.manager.client_manager.basic_auth( - "Cyberdrop-DL", auth_data.pixeldrain.api_key - ) - elif domain == "gofile": - gofile_cookies = self.client_manager.cookies.filter_cookies(AbsoluteHttpURL("https://gofile.io")) - api_key = gofile_cookies.get("accountToken", "") - if api_key: - download_headers["Authorization"] = f"Bearer {api_key.value}" # type: ignore - elif domain == "odnoklassniki": - # TODO: Add "headers" attribute to MediaItem to use custom headers for downloads - download_headers |= { - "Accept-Language": "en-gb, en;q=0.8", - "User-Agent": _CHROME_ANDROID_USER_AGENT, - "Referer": "https://m.ok.ru/", - "Origin": "https://m.ok.ru", - } - elif domain == "megacloud": - download_headers["Referer"] = "https://megacloud.blog/" - return download_headers - async def _download(self, domain: str, media_item: MediaItem) -> bool: - """Downloads a file.""" - download_headers = self._get_download_headers(domain, media_item.referer) - downloaded_filename = await self.manager.db_manager.history_table.get_downloaded_filename(domain, media_item) - download_dir = self.get_download_dir(media_item) - if media_item.is_segment: - media_item.partial_file = media_item.complete_file = download_dir / media_item.filename - else: - media_item.partial_file = download_dir / f"{downloaded_filename}{constants.TempExt.PART}" - resume_point = 0 - if ( - self._supports_ranges - and media_item.partial_file - and (size := await asyncio.to_thread(get_size_or_none, media_item.partial_file)) - ): + if self._supports_ranges and (size := await asyncio.to_thread(get_size_or_none, media_item.partial_file)): resume_point = size - download_headers["Range"] = f"bytes={size}-" + media_item.headers["Range"] = f"bytes={size}-" await asyncio.sleep(config.get().rate_limiting_options.total_delay) def process_response(resp: aiohttp.ClientResponse | AbstractResponse): return self._process_response(media_item, domain, resume_point, resp) - return await self._request_download(media_item, download_headers, process_response) + download_url = media_item.debrid_link or media_item.url + async with self.__request_context(download_url, media_item.domain, media_item.headers) as resp: + return await process_response(resp) async def _process_response( self, @@ -127,10 +85,10 @@ async def _process_response( if resp.status == HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE: await asyncio.to_thread(media_item.partial_file.unlink) - await self.client_manager.check_http_status(resp, download=True) + _ = await self.client_manager.check_http_status(resp, download=True) if not media_item.is_segment: - _ = get_content_type(media_item.ext, resp.headers) + _ = _get_content_type(media_item.ext, resp.headers) media_item.filesize = int(resp.headers.get("Content-Length", "0")) or None if not media_item.complete_file: @@ -152,20 +110,19 @@ async def _process_response( if resp.status != HTTPStatus.PARTIAL_CONTENT: await asyncio.to_thread(media_item.partial_file.unlink, missing_ok=True) - if not media_item.is_segment and not media_item.datetime and (last_modified := get_last_modified(resp.headers)): + if ( + not media_item.is_segment + and not media_item.timestamp + and (last_modified := _get_last_modified(resp.headers)) + ): msg = f"Unable to parse upload date for {media_item.url}, using `Last-Modified` header as file datetime" log(msg, 30) - media_item.datetime = last_modified + media_item.timestamp = last_modified - task_id = media_item.task_id - if task_id is None: - size = (media_item.filesize + resume_point) if media_item.filesize is not None else None - task_id = self.manager.progress_manager.downloads.new_task( - domain=domain, filename=media_item.filename, expected_size=size - ) - media_item.set_task_id(task_id) + size = (media_item.filesize + resume_point) if media_item.filesize is not None else None - self.manager.progress_manager.downloads.advance_file(task_id, resume_point) + if not media_item.is_segment: + self.manager.progress_manager.downloads.new_hook(media_item.filename, size) await self._append_content(media_item, self._get_resp_reader(resp)) return True @@ -192,62 +149,24 @@ async def __request_context( async with self.client_manager._download_session.get(url, headers=headers) as resp: yield resp - async def _request_download( - self, - media_item: MediaItem, - download_headers: dict[str, str], - process_response: Callable[[aiohttp.ClientResponse | AbstractResponse], Coroutine[None, None, bool]], - ) -> bool: - download_url = media_item.debrid_link or media_item.url - await self.manager.states.RUNNING.wait() - fallback_url_generator = _fallback_generator(media_item) - fallback_count = 0 - - while True: - resp = None - try: - async with self.__request_context(download_url, media_item.domain, download_headers) as resp: - return await process_response(resp) - except (DownloadError, DDOSGuardError): - if resp is None: - raise - try: - next_download_url = fallback_url_generator.send(resp) - except StopIteration: - pass - else: - if not next_download_url: - raise - if media_item.debrid_link and media_item.debrid_link == download_url: - msg = f" with debrid URL {download_url} failed, retrying with fallback URL: " - elif media_item.url == download_url: - msg = " failed, retrying with fallback URL: " - else: - fallback_count += 1 - msg = f" with fallback URL #{fallback_count} {download_url} failed, retrying with new fallback URL: " - log(f"Download of {media_item.url}{msg}{next_download_url}", 40) - download_url = next_download_url - continue - raise - async def _append_content(self, media_item: MediaItem, content: aiohttp.StreamReader | AbstractResponse) -> None: """Appends content to a file.""" - assert media_item.task_id is not None check_free_space = self.make_free_space_checker(media_item) - check_download_speed = self.make_speed_checker(media_item) await check_free_space() await self._pre_download_check(media_item) - async with aiofiles.open(media_item.partial_file, mode="ab") as f: - async for chunk in content.iter_chunked(self.client_manager.speed_limiter.chunk_size): - await self.manager.states.RUNNING.wait() - await check_free_space() - chunk_size = len(chunk) - await self.client_manager.speed_limiter.acquire(chunk_size) - await f.write(chunk) - self.manager.progress_manager.downloads.advance_file(media_item.task_id, chunk_size) - check_download_speed() + with self.manager.progress_manager.downloads.current_hook as hook: + check_download_speed = self.make_speed_checker(hook) + + async with aiofiles.open(media_item.partial_file, mode="ab") as f: + async for chunk in content.iter_chunked(self.client_manager.speed_limiter.chunk_size): + await check_free_space() + chunk_size = len(chunk) + await self.client_manager.speed_limiter.acquire(chunk_size) + await f.write(chunk) + hook.advance(chunk_size) + check_download_speed() await self._post_download_check(media_item) @@ -275,21 +194,20 @@ async def check_free_space() -> None: return check_free_space - def make_speed_checker(self, media_item: MediaItem) -> Callable[[], None]: + def make_speed_checker(self, hook: ProgressHook) -> Callable[[], None]: last_slow_speed_read = None def check_download_speed() -> None: nonlocal last_slow_speed_read if not self.download_speed_threshold: return - assert media_item.task_id is not None - speed = self.manager.progress_manager.downloads.get_speed(media_item.task_id) - if speed > self.download_speed_threshold: + + if hook.speed() > self.download_speed_threshold: last_slow_speed_read = None elif not last_slow_speed_read: last_slow_speed_read = time.perf_counter() elif time.perf_counter() - last_slow_speed_read > _SLOW_DOWNLOAD_PERIOD: - raise SlowDownloadError(origin=media_item) + raise SlowDownloadError return check_download_speed @@ -313,7 +231,7 @@ async def download_file(self, domain: str, media_item: MediaItem) -> bool: if not proceed: log(f"Download Skip {media_item.url} due to runtime restrictions", 10) await asyncio.to_thread(media_item.complete_file.unlink) - await self.mark_incomplete(media_item, domain) + await self.mark_incomplete(media_item, media_item.domain) self.manager.progress_manager.files.add_skipped() return False await self.process_completed(media_item, domain) @@ -322,14 +240,13 @@ async def download_file(self, domain: str, media_item: MediaItem) -> bool: """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - async def mark_incomplete(self, media_item: MediaItem, domain: str) -> None: + async def mark_incomplete(self, media_item: MediaItem) -> None: """Marks the media item as incomplete in the database.""" if media_item.is_segment: return - await self.manager.db_manager.history_table.insert_incompleted(domain, media_item) + await self.manager.db_manager.history_table.insert_incompleted(media_item.domain, media_item) async def process_completed(self, media_item: MediaItem, domain: str) -> None: - """Marks the media item as completed in the database and adds to the completed list.""" await self.mark_completed(domain, media_item) await self.add_file_size(domain, media_item) @@ -337,8 +254,6 @@ async def mark_completed(self, domain: str, media_item: MediaItem) -> None: await self.manager.db_manager.history_table.mark_complete(domain, media_item) async def add_file_size(self, domain: str, media_item: MediaItem) -> None: - if not media_item.complete_file: - media_item.complete_file = self.get_file_location(media_item) if await asyncio.to_thread(media_item.complete_file.is_file): await self.manager.db_manager.history_table.add_filesize(domain, media_item) @@ -351,149 +266,33 @@ async def handle_media_item_completion(self, media_item: MediaItem, downloaded: except Exception: log(f"Error handling media item completion of: {media_item.complete_file}", 10, exc_info=True) - """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - def get_download_dir(self, media_item: MediaItem) -> Path: - """Returns the download directory for the media item.""" - download_folder = media_item.download_folder - - if config.get().download_options.block_download_sub_folders: - while download_folder.parent != config.get().files.download_folder: - download_folder = download_folder.parent - media_item.download_folder = download_folder - return download_folder - - def get_file_location(self, media_item: MediaItem) -> Path: - download_dir = self.get_download_dir(media_item) - return download_dir / media_item.filename - - async def get_final_file_info(self, media_item: MediaItem, domain: str) -> tuple[bool, bool]: - """Complicated checker for if a file already exists, and was already downloaded.""" - media_item.complete_file = self.get_file_location(media_item) - part_suffix = media_item.complete_file.suffix + constants.TempExt.PART - media_item.partial_file = media_item.complete_file.with_suffix(part_suffix) - - expected_size = media_item.filesize - proceed = True - skip = False - - while True: - if expected_size and not media_item.is_segment: - file_size_check = self.check_filesize_limits(media_item) - if not file_size_check: - log(f"Download Skip {media_item.url} due to filesize restrictions", 10) - proceed = False - skip = True - return proceed, skip - - if not media_item.complete_file.exists() and not media_item.partial_file.exists(): - break - - if media_item.complete_file.exists() and media_item.complete_file.stat().st_size == media_item.filesize: - log(f"Found {media_item.complete_file.name} locally, skipping download") - proceed = False - break - - downloaded_filename = await self.manager.db_manager.history_table.get_downloaded_filename( - domain, - media_item, - ) - if not downloaded_filename: - media_item.complete_file, media_item.partial_file = await self.iterate_filename( - media_item.complete_file, - media_item, - ) - break - - if media_item.filename == downloaded_filename: - if media_item.partial_file.exists(): - log(f"Found {downloaded_filename} locally, trying to resume") - assert media_item.filesize - size = media_item.partial_file.stat().st_size - if size >= media_item.filesize != 0: - log(f"Deleting partial file {media_item.partial_file}") - media_item.partial_file.unlink() - - elif size == media_item.filesize: - if media_item.complete_file.exists(): - log( - f"Found conflicting complete file '{media_item.complete_file}' locally, iterating filename", - 30, - ) - new_complete_filename, new_partial_file = await self.iterate_filename( - media_item.complete_file, - media_item, - ) - media_item.partial_file.rename(new_complete_filename) - proceed = False - - media_item.complete_file = new_complete_filename - media_item.partial_file = new_partial_file - else: - proceed = False - media_item.partial_file.rename(media_item.complete_file) - log( - f"Renaming found partial file '{media_item.partial_file}' to complete file {media_item.complete_file}" - ) - elif media_item.complete_file.exists(): - if media_item.complete_file.stat().st_size == media_item.filesize: - log(f"Found complete file '{media_item.complete_file}' locally, skipping download") - proceed = False - else: - log( - f"Found conflicting complete file '{media_item.complete_file}' locally, iterating filename", - 30, - ) - media_item.complete_file, media_item.partial_file = await self.iterate_filename( - media_item.complete_file, - media_item, - ) - break - - media_item.filename = downloaded_filename - media_item.download_filename = media_item.complete_file.name - await self.manager.db_manager.history_table.add_download_filename(domain, media_item) - return proceed, skip - - async def iterate_filename(self, complete_file: Path, media_item: MediaItem) -> tuple[Path, Path]: - """Iterates the filename until it is unique.""" - part_suffix = complete_file.suffix + constants.TempExt.PART - partial_file = complete_file.with_suffix(part_suffix) - for iteration in itertools.count(1): - filename = f"{complete_file.stem} ({iteration}){complete_file.suffix}" - temp_complete_file = media_item.download_folder / filename - if ( - not temp_complete_file.exists() - and not await self.manager.db_manager.history_table.check_filename_exists(filename) - ): - media_item.filename = filename - complete_file = media_item.download_folder / media_item.filename - partial_file = complete_file.with_suffix(part_suffix) - break - return complete_file, partial_file - - def check_filesize_limits(self, media: MediaItem) -> bool: - """Checks if the file size is within the limits.""" - file_size_limits = config.get().file_size_limits - max_video_filesize = file_size_limits.maximum_video_size or float("inf") - min_video_filesize = file_size_limits.minimum_video_size - max_image_filesize = file_size_limits.maximum_image_size or float("inf") - min_image_filesize = file_size_limits.minimum_image_size - max_other_filesize = file_size_limits.maximum_other_size or float("inf") - min_other_filesize = file_size_limits.minimum_other_size - - assert media.filesize is not None - if media.ext in constants.FileFormats.IMAGE: - proceed = min_image_filesize < media.filesize < max_image_filesize - elif media.ext in constants.FileFormats.VIDEO: - proceed = min_video_filesize < media.filesize < max_video_filesize - else: - proceed = min_other_filesize < media.filesize < max_other_filesize - - return proceed - - -def get_content_type(ext: str, headers: Mapping[str, str]) -> str | None: +def get_file_location(media_item: MediaItem) -> Path: + return media_item.download_folder / media_item.filename + + +def _check_filesize_limits(media: MediaItem) -> bool: + """Checks if the file size is within the limits.""" + file_size_limits = config.get().file_size_limits + max_video_filesize = file_size_limits.maximum_video_size or float("inf") + min_video_filesize = file_size_limits.minimum_video_size + max_image_filesize = file_size_limits.maximum_image_size or float("inf") + min_image_filesize = file_size_limits.minimum_image_size + max_other_filesize = file_size_limits.maximum_other_size or float("inf") + min_other_filesize = file_size_limits.minimum_other_size + + assert media.filesize is not None + if media.ext in constants.FileFormats.IMAGE: + proceed = min_image_filesize < media.filesize < max_image_filesize + elif media.ext in constants.FileFormats.VIDEO: + proceed = min_video_filesize < media.filesize < max_video_filesize + else: + proceed = min_other_filesize < media.filesize < max_other_filesize + + return proceed + + +def _get_content_type(ext: str, headers: Mapping[str, str]) -> str | None: content_type: str = headers.get("Content-Type", "") content_length = headers.get("Content-Length") if not content_type and not content_length: @@ -508,43 +307,17 @@ def get_content_type(ext: str, headers: Mapping[str, str]) -> str | None: content_type = override or content_type content_type = content_type.lower() - if is_html_or_text(content_type) and ext.lower() not in constants.FileFormats.TEXT: + if _is_html_or_text(content_type) and ext.lower() not in constants.FileFormats.TEXT: msg = f"Received '{content_type}', was expecting other" raise InvalidContentTypeError(message=msg) return content_type -def get_last_modified(headers: Mapping[str, str]) -> int | None: +def _get_last_modified(headers: Mapping[str, str]) -> int | None: if date_str := headers.get("Last-Modified"): return dates.parse_http(date_str) -def is_html_or_text(content_type: str) -> bool: +def _is_html_or_text(content_type: str) -> bool: return any(s in content_type for s in ("html", "text")) - - -def _fallback_generator(media_item: MediaItem): - fallbacks = media_item.fallbacks - - def gen_fallback() -> Generator[AbsoluteHttpURL | None, aiohttp.ClientResponse, None]: - response = yield - if fallbacks is None: - return - - if callable(fallbacks): - for retry in itertools.count(1): - if not response: - return - url = fallbacks(response, retry) - if not url: - return - response = yield url - - else: - for fall in fallbacks: # noqa: UP028 - yield fall - - gen = gen_fallback() - _ = next(gen) - return gen diff --git a/cyberdrop_dl/clients/hash_client.py b/cyberdrop_dl/clients/hash_client.py index d5199cf23..e4f477dee 100644 --- a/cyberdrop_dl/clients/hash_client.py +++ b/cyberdrop_dl/clients/hash_client.py @@ -81,7 +81,7 @@ async def hash_item_during_download(self, media_item: MediaItem) -> None: return if config.get().dupe_cleanup_options.hashing != Hashing.IN_PLACE: return - await self.manager.states.RUNNING.wait() + try: assert media_item.original_filename hash = await self.update_db_and_retrive_hash( diff --git a/cyberdrop_dl/config/__init__.py b/cyberdrop_dl/config/__init__.py index 477391978..3a27ffe66 100755 --- a/cyberdrop_dl/config/__init__.py +++ b/cyberdrop_dl/config/__init__.py @@ -3,8 +3,9 @@ import datetime from contextvars import ContextVar, Token from pathlib import Path -from typing import Self +from typing import Annotated, Self +from cyclopts import Parameter from pydantic import BaseModel from cyberdrop_dl.config.auth import AuthSettings @@ -12,33 +13,10 @@ from cyberdrop_dl.models import get_model_fields, merge_models _config: ContextVar[Config] = ContextVar("_config") -_appdata: ContextVar[AppData] = ContextVar("_appdata") - - -class AppData: - def __init__(self, path: Path) -> None: - self.path: Path = path - self.cookies_dir: Path = path / "cookies" - self.cache_file: Path = path / "cache.yaml" - self.default_config: Path = path / "config.yaml" - self.db_file: Path = path / "cyberdrop.db" - - def __fspath__(self) -> str: - return str(self) - - def __str__(self) -> str: - return str(self.path) - - def __repr__(self) -> str: - return f"{type(self).__name__}({vars(self)!r})" - - def mkdirs(self) -> None: - for dir in (self.cookies_dir,): - dir.mkdir(parents=True, exist_ok=True) class Config(ConfigSettings): - auth: AuthSettings = AuthSettings() + auth: Annotated[AuthSettings, Parameter(show=False)] = AuthSettings() _source: Path | None = None _token: Token[Config] | None = None diff --git a/cyberdrop_dl/crawlers/crawler.py b/cyberdrop_dl/crawlers/crawler.py index 37bfcbbbe..1fa79341e 100644 --- a/cyberdrop_dl/crawlers/crawler.py +++ b/cyberdrop_dl/crawlers/crawler.py @@ -272,7 +272,6 @@ def deep_scrape(self) -> bool: def _init_downloader(self) -> Downloader: self.downloader = dl = Downloader(self.manager, self.DOMAIN) - dl.startup() return dl @final @@ -314,7 +313,6 @@ async def run(self, scrape_item: ScrapeItem) -> None: self.waiting_items += 1 async with self._semaphore: - await self.manager.states.RUNNING.wait() self.waiting_items -= 1 og_url = scrape_item.url scrape_item.url = url = self.transform_url(scrape_item.url) @@ -437,6 +435,7 @@ async def handle_file( ext=ext, ) media_item.debrid_link = debrid_link + media_item.headers = self._get_download_headers(media_item.referer) if metadata is not None: media_item.metadata = metadata await self.handle_media_item(media_item, m3u8) @@ -472,9 +471,8 @@ async def check_complete(self, url: AbsoluteHttpURL, referer: AbsoluteHttpURL) - return check_complete async def handle_media_item(self, media_item: MediaItem, m3u8: m3u8.RenditionGroup | None = None) -> None: - await self.manager.states.RUNNING.wait() - if media_item.datetime and not isinstance(media_item.datetime, int): - msg = f"Invalid datetime from '{self.FOLDER_DOMAIN}' crawler . Got {media_item.datetime!r}, expected int." + if media_item.timestamp and not isinstance(media_item.timestamp, int): + msg = f"Invalid datetime from '{self.FOLDER_DOMAIN}' crawler . Got {media_item.timestamp!r}, expected int." log(msg, bug=True) check_complete = await self.check_complete(media_item.url, media_item.referer) @@ -756,7 +754,9 @@ async def _web_pager( page_url = self.parse_url(page_url_str, **kwargs) @error_handling_wrapper - async def direct_file(self, scrape_item: ScrapeItem, url: URL | None = None, assume_ext: str | None = None) -> None: + async def direct_file( + self, scrape_item: ScrapeItem, url: AbsoluteHttpURL | None = None, assume_ext: str | None = None + ) -> None: """Download a direct link file. Filename will be the url slug""" link = url or scrape_item.url filename, ext = self.get_filename_and_ext(link.name or link.parent.name, assume_ext=assume_ext) @@ -929,6 +929,12 @@ def handle_subs(self, scrape_item: ScrapeItem, video_filename: str, subtitles: I ) ) + def _get_download_headers(self, referer: AbsoluteHttpURL) -> dict[str, str]: + return { + "User-Agent": config.get().general.user_agent, + "Referer": str(referer), + } + def _make_scrape_mapper_keys(cls: type[Crawler] | Crawler) -> tuple[str, ...]: if cls.SUPPORTED_DOMAINS: @@ -1036,7 +1042,6 @@ def auto_task_id( @wraps(func) async def wrapper(self: _CrawlerT, scrape_item: ScrapeItem, *args: P.args, **kwargs: P.kwargs) -> R: - await self.manager.states.RUNNING.wait() with self.new_task_id(scrape_item.url): result = func(self, scrape_item, *args, **kwargs) if inspect.isawaitable(result): diff --git a/cyberdrop_dl/crawlers/gofile.py b/cyberdrop_dl/crawlers/gofile.py index e06db6166..af7cc0f51 100644 --- a/cyberdrop_dl/crawlers/gofile.py +++ b/cyberdrop_dl/crawlers/gofile.py @@ -238,6 +238,9 @@ async def _get_website_token(self) -> str: raise ScrapeError(401, "Couldn't generate GoFile websiteToken", origin=_GLOBAL_JS_URL) + def _get_download_headers(self, referer: AbsoluteHttpURL) -> dict[str, str]: + return super()._get_download_headers(referer) | self.headers + def _check_node_is_accessible(node: Node) -> TypeGuard[File | Folder]: if (type_ := node["type"]) not in ("file", "folder"): diff --git a/cyberdrop_dl/crawlers/megacloud.py b/cyberdrop_dl/crawlers/megacloud.py index b6996c0d9..dd7548ccb 100644 --- a/cyberdrop_dl/crawlers/megacloud.py +++ b/cyberdrop_dl/crawlers/megacloud.py @@ -113,6 +113,9 @@ def parse_subs(): subtitles=tuple(parse_subs()), ) + def _get_download_headers(self, referer: AbsoluteHttpURL) -> dict[str, str]: + return super()._get_download_headers(referer) | {"referer": "https://megacloud.blog/"} + _ISO639_MAP = { "arabic": "ara", diff --git a/cyberdrop_dl/crawlers/odnoklassniki.py b/cyberdrop_dl/crawlers/odnoklassniki.py index de0c7eddb..7e3538066 100644 --- a/cyberdrop_dl/crawlers/odnoklassniki.py +++ b/cyberdrop_dl/crawlers/odnoklassniki.py @@ -147,6 +147,19 @@ async def video(self, scrape_item: ScrapeItem, video_id: str): mobile_url, scrape_item, video_id + ".mp4", custom_filename=filename, debrid_link=cdn_url ) + def _get_download_headers(self, referer: AbsoluteHttpURL) -> dict[str, str]: + return super()._get_download_headers(referer) | { + "Accept-Language": "en-gb, en;q=0.8", + "User-Agent": _CHROME_ANDROID_USER_AGENT, + "Referer": "https://m.ok.ru/", + "Origin": "https://m.ok.ru", + } + + +_CHROME_ANDROID_USER_AGENT: str = ( + "Mozilla/5.0 (Linux; Android 16) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.7204.180 Mobile Safari/537.36" +) + def _get_best_src(metadata: dict[str, Any]) -> tuple[Resolution, str]: def parse(): diff --git a/cyberdrop_dl/crawlers/pixeldrain.py b/cyberdrop_dl/crawlers/pixeldrain.py index 75f3f9e28..41fb5d796 100644 --- a/cyberdrop_dl/crawlers/pixeldrain.py +++ b/cyberdrop_dl/crawlers/pixeldrain.py @@ -6,7 +6,7 @@ from pydantic import BaseModel -from cyberdrop_dl import env +from cyberdrop_dl import config, env from cyberdrop_dl.crawlers.crawler import Crawler, RateLimit, SupportedDomains, SupportedPaths, auto_task_id from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL from cyberdrop_dl.utils.utilities import error_handling_wrapper @@ -105,7 +105,7 @@ class PixelDrainCrawler(Crawler): def __post_init__(self) -> None: self._headers: dict[str, str] = {} - if api_key := self.manager.auth_config.pixeldrain.api_key: + if api_key := config.get().auth.pixeldrain.api_key: self._headers["Authorization"] = self.manager.client_manager.basic_auth( "Cyberdrop-DL", api_key, @@ -282,3 +282,6 @@ async def _text(self, scrape_item: ScrapeItem, file: File | Node) -> None: scrape_item.add_children() _file_task = auto_task_id(_file) + + def _get_download_headers(self, referer: AbsoluteHttpURL) -> dict[str, str]: + return super()._get_download_headers(referer=referer) | self._headers diff --git a/cyberdrop_dl/crawlers/twitter_images.py b/cyberdrop_dl/crawlers/twitter_images.py index 87ef71596..e19408737 100644 --- a/cyberdrop_dl/crawlers/twitter_images.py +++ b/cyberdrop_dl/crawlers/twitter_images.py @@ -42,7 +42,6 @@ async def photo(self, scrape_item: ScrapeItem, url: AbsoluteHttpURL | None = Non await self.handle_file(link, scrape_item, filename, ext) async def handle_media_item(self, media_item: MediaItem, m3u8: m3u8.RenditionGroup | None = None) -> None: - _, *media_item.fallbacks = list(_make_download_urls(media_item.url)) if media_item.referer == media_item.url and media_item.parents: media_item.referer = media_item.parents[0] await super().handle_media_item(media_item, m3u8) diff --git a/cyberdrop_dl/data_structures/url_objects.py b/cyberdrop_dl/data_structures/url_objects.py index 59da65299..8dc9c3834 100644 --- a/cyberdrop_dl/data_structures/url_objects.py +++ b/cyberdrop_dl/data_structures/url_objects.py @@ -11,11 +11,7 @@ import yarl if TYPE_CHECKING: - from collections.abc import Callable - - import aiohttp from propcache.api import under_cached_property as cached_property - from rich.progress import TaskID from cyberdrop_dl import signature from cyberdrop_dl.managers import Manager @@ -127,30 +123,24 @@ class MediaItem: download_folder: Path filename: str original_filename: str - download_filename: str | None = field(default=None) - filesize: int | None = field(default=None, compare=False) + download_filename: str | None = None + filesize: int | None = None ext: str db_path: str - - debrid_link: AbsoluteHttpURL | None = field(default=None, compare=False) - duration: float | None = field(default=None, compare=False) + debrid_link: AbsoluteHttpURL | None = None + duration: float | None = None is_segment: bool = False - fallbacks: Callable[[aiohttp.ClientResponse, int], AbsoluteHttpURL] | list[AbsoluteHttpURL] | None = field( - default=None, compare=False - ) album_id: str | None = None - datetime: int | None = field(default=None, compare=False) - parents: list[AbsoluteHttpURL] = field(default_factory=list, compare=False) - parent_threads: set[AbsoluteHttpURL] = field(default_factory=set, compare=False) - - current_attempt: int = field(default=0, compare=False) - partial_file: Path = None # type: ignore - complete_file: Path = None # type: ignore - hash: str | None = field(default=None, compare=False) - downloaded: bool = field(default=False, compare=False) - - parent_media_item: MediaItem | None = field(default=None, compare=False) - _task_id: TaskID | None = field(default=None, compare=False) + timestamp: int | None = None + + parents: list[AbsoluteHttpURL] = field(default_factory=list) + parent_threads: set[AbsoluteHttpURL] = field(default_factory=set) + current_attempt: int = field(default=0) + complete_file: Path = field(init=False) + hash: str | None = None + + headers: dict[str, str] = field(default_factory=dict, compare=False) + downloaded: bool = False metadata: object = field(init=False, default_factory=dict, compare=False) def __repr__(self) -> str: @@ -160,10 +150,16 @@ def __post_init__(self) -> None: if self.url.scheme == "metadata": self.db_path = "" + self.complete_file = self.download_folder / self.filename + + @property + def partial_file(self) -> Path: + return self.complete_file.with_suffix(self.complete_file.suffix + ".part") + def datetime_obj(self) -> datetime.datetime | None: - if self.datetime: - assert isinstance(self.datetime, int), f"Invalid {self.datetime =!r} from {self.referer}" - return datetime.datetime.fromtimestamp(self.datetime) + if self.timestamp: + assert isinstance(self.timestamp, int), f"Invalid {self.timestamp =!r} from {self.referer}" + return datetime.datetime.fromtimestamp(self.timestamp, tz=datetime.UTC) @staticmethod def from_item( @@ -189,36 +185,19 @@ def from_item( ext=ext or Path(filename).suffix, original_filename=original_filename or filename, parents=origin.parents.copy(), - datetime=origin.possible_datetime if isinstance(origin, ScrapeItem) else origin.datetime, - parent_media_item=None if isinstance(origin, ScrapeItem) else origin, + timestamp=origin.possible_datetime if isinstance(origin, ScrapeItem) else origin.timestamp, parent_threads=origin.parent_threads.copy(), ) - @property - def task_id(self) -> TaskID | None: - if self.parent_media_item is not None: - return self.parent_media_item.task_id - return self._task_id - - def set_task_id(self, task_id: TaskID | None) -> None: - if self.task_id is not None and task_id is not None: - # We already have a task_id; we can't replace it, only reset it. - # This should never happen. Calling code should always check the value before making a new task. - # We can't silently ignore it either because we will lose any reference to the created task. - raise ValueError("task_id is already set") - if self.parent_media_item is not None: - self.parent_media_item.set_task_id(task_id) - else: - self._task_id = task_id - def as_jsonable_dict(self) -> dict[str, Any]: item = asdict(self) if datetime := self.datetime_obj(): item["datetime"] = datetime item["attempts"] = item.pop("current_attempt") + item["partial_file"] = self.partial_file if self.hash: item["hash"] = f"xxh128:{self.hash}" - for name in ("fallbacks", "_task_id", "is_segment", "parent_media_item"): + for name in ("is_segment",): _ = item.pop(name) return item diff --git a/cyberdrop_dl/downloader/downloader.py b/cyberdrop_dl/downloader/downloader.py index 1fe3bcd6e..4da1ae516 100644 --- a/cyberdrop_dl/downloader/downloader.py +++ b/cyberdrop_dl/downloader/downloader.py @@ -3,19 +3,14 @@ import asyncio import contextlib import os -import shutil import subprocess -import sys -from dataclasses import field -from datetime import datetime from functools import wraps from pathlib import Path from typing import TYPE_CHECKING, NamedTuple, ParamSpec, TypeVar from aiohttp import ClientConnectorError, ClientError, ClientResponseError -from cyberdrop_dl import config, constants -from cyberdrop_dl.data_structures.url_objects import HlsSegment, MediaItem +from cyberdrop_dl import config from cyberdrop_dl.exceptions import ( DownloadError, DurationError, @@ -24,48 +19,23 @@ RestrictedDateRangeError, RestrictedFiletypeError, SkipDownloadError, - TooManyCrawlerErrors, ) -from cyberdrop_dl.utils import aio, ffmpeg +from cyberdrop_dl.utils import aio +from cyberdrop_dl.utils.dates import set_creation_time from cyberdrop_dl.utils.logger import log, log_debug -from cyberdrop_dl.utils.utilities import error_handling_wrapper, parse_url +from cyberdrop_dl.utils.utilities import error_handling_wrapper -# Windows epoch is January 1, 1601. Unix epoch is January 1, 1970 -WIN_EPOCH_OFFSET = 116444736e9 -MAC_OS_SET_FILE = None _VIDEO_HLS_BATCH_SIZE = 10 _AUDIO_HLS_BATCH_SIZE = 50 -# Try to import win32con for Windows constants, fallback to hardcoded values if unavailable -try: - import win32con # type: ignore[reportMissingModuleSource] - - FILE_WRITE_ATTRIBUTES = 256 - OPEN_EXISTING = win32con.OPEN_EXISTING - FILE_ATTRIBUTE_NORMAL = win32con.FILE_ATTRIBUTE_NORMAL - FILE_FLAG_BACKUP_SEMANTICS = win32con.FILE_FLAG_BACKUP_SEMANTICS -except ImportError: - FILE_WRITE_ATTRIBUTES = 256 - OPEN_EXISTING = 3 - FILE_ATTRIBUTE_NORMAL = 128 - FILE_FLAG_BACKUP_SEMANTICS = 33554432 - -if sys.platform == "win32": - from ctypes import byref, windll, wintypes - - -elif sys.platform == "darwin": - # SetFile is non standard in macOS. Only users that have xcode installed will have SetFile - MAC_OS_SET_FILE = shutil.which("SetFile") - - if TYPE_CHECKING: - from collections.abc import Callable, Coroutine, Generator + from collections.abc import Callable, Coroutine from cyberdrop_dl.clients.download_client import DownloadClient + from cyberdrop_dl.data_structures.url_objects import MediaItem from cyberdrop_dl.managers import Manager - from cyberdrop_dl.utils.m3u8 import M3U8, RenditionGroup + P = ParamSpec("P") R = TypeVar("R") @@ -87,10 +57,8 @@ class SegmentDownloadResult(NamedTuple): def retry(func: Callable[P, Coroutine[None, None, R]]) -> Callable[P, Coroutine[None, None, R]]: - """This function is a wrapper that handles retrying for failed downloads.""" - @wraps(func) - async def wrapper(*args, **kwargs) -> R: + async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: self: Downloader = args[0] media_item: MediaItem = args[1] while True: @@ -100,9 +68,7 @@ async def wrapper(*args, **kwargs) -> R: if not e.retry: raise - self.attempt_task_removal(media_item) - if e.status != 999: - media_item.current_attempt += 1 + media_item.current_attempt += 1 log(f"{self.log_prefix} failed: {media_item.url} with error: {e!s}", 40) if media_item.current_attempt >= self.max_attempts: @@ -118,52 +84,41 @@ async def wrapper(*args, **kwargs) -> R: class Downloader: - def __init__(self, manager: Manager, domain: str) -> None: + def __init__( + self, + config: config.Config, + manager: Manager, + client: DownloadClient, + slots: int, + ) -> None: self.manager: Manager = manager - self.domain: str = domain - self.client: DownloadClient = field(init=False) - self.log_prefix = "Download attempt (unsupported domain)" if domain in GENERIC_CRAWLERS else "Download" + self.config = config + self.client: DownloadClient = client + + self.log_prefix = "Download" self.processed_items: set[str] = set() self.waiting_items = 0 - - self._additional_headers = {} self._current_attempt_filesize: dict[str, int] = {} - self._file_lock_vault = manager.client_manager.file_locks - self._ignore_history = config.get().runtime_options.ignore_history - self._semaphore: asyncio.Semaphore = field(init=False) + self._file_lock_vault: aio.WeakAsyncLocks[str] = aio.WeakAsyncLocks() + self._ignore_history: bool = self.config.runtime_options.ignore_history + self._semaphore: asyncio.Semaphore = asyncio.Semaphore(slots) @property def max_attempts(self): - if config.get().download_options.disable_download_attempt_limit: + if self.config.download_options.disable_download_attempt_limit: return 1 - return config.get().rate_limiting_options.download_attempts - - def startup(self) -> None: - """Starts the downloader.""" - self.client = self.manager.client_manager.download_client - self._semaphore = asyncio.Semaphore(self.manager.client_manager.get_download_slots(self.domain)) - - config.get().files.download_folder.mkdir(parents=True, exist_ok=True) - if config.get().sorting.sort_downloads: - config.get().sorting.sort_folder.mkdir(parents=True, exist_ok=True) - - def update_queued_files(self, increase_total: bool = True): - queued_files = self.manager.progress_manager.downloads.get_queue_length() - self.manager.progress_manager.files.update_queued(queued_files) - self.manager.progress_manager.files.update_total(increase_total) + return self.config.rate_limiting_options.download_attempts @contextlib.asynccontextmanager - async def _download_context(self, media_item: MediaItem): - await self.manager.states.RUNNING.wait() + async def _limiter(self, media_item: MediaItem): media_item.current_attempt = 0 - await self.client.mark_incomplete(media_item, self.domain) if media_item.is_segment: yield return self.waiting_items += 1 - self.update_queued_files() + await self.client.mark_incomplete(media_item) server = (media_item.debrid_link or media_item.url).host server_limit, domain_limit, global_limit = ( @@ -173,157 +128,36 @@ async def _download_context(self, media_item: MediaItem): ) async with server_limit, domain_limit, global_limit: - await self.manager.states.RUNNING.wait() self.processed_items.add(media_item.db_path) - self.update_queued_files(increase_total=False) self.waiting_items -= 1 yield async def run(self, media_item: MediaItem) -> bool: - """Runs the download loop.""" - if media_item.url.path in self.processed_items and not self._ignore_history: return False - async with self._download_context(media_item): - return await self.start_download(media_item) - - @error_handling_wrapper - async def download_hls(self, media_item: MediaItem, m3u8_group: RenditionGroup) -> None: - if media_item.url.path in self.processed_items and not self._ignore_history: - return - - try: - ffmpeg.check_is_available() - except RuntimeError as e: - msg = f"{e} - ffmpeg and ffprobe are required for HLS downloads" - raise DownloadError("FFmpeg Error", msg, media_item) from None - - async with self._download_context(media_item): - await self._start_hls_download(media_item, m3u8_group) - - async def _start_hls_download(self, media_item: MediaItem, m3u8_group: RenditionGroup) -> None: - media_item.complete_file = media_item.download_folder / media_item.filename - # TODO: register database duration from m3u8 info - # TODO: compute approx size for UI from the m3u8 info - media_item.download_filename = media_item.complete_file.name - await self.manager.db_manager.history_table.add_download_filename(self.domain, media_item) - task_id = self.manager.progress_manager.downloads.new_task(domain=self.domain, filename=media_item.filename) - media_item.set_task_id(task_id) - video, audio, _subs = await self._download_rendition_group(media_item, m3u8_group) - if not audio: - await asyncio.to_thread(video.rename, media_item.complete_file) - else: - # TODO: add remux method to ffmpeg to create an mkv file instead of mp4 - # Subtitles format may be incompatible with mp4 and they will be silently dropped by ffmpeg - # so we leave them as independent files for now - ffmpeg_result = await ffmpeg.merge((video, audio), media_item.complete_file) - - if not ffmpeg_result.success: - raise DownloadError("FFmpeg Concat Error", ffmpeg_result.stderr, media_item) - - await self.client.process_completed(media_item, self.domain) - await self.client.handle_media_item_completion(media_item, downloaded=True) - await self.finalize_download(media_item, downloaded=True) - - async def _download_rendition_group( - self, media_item: MediaItem, m3u8_group: RenditionGroup - ) -> tuple[Path, Path | None, Path | None]: - async def download(m3u8: M3U8): - assert m3u8.media_type - if not m3u8.segments: - raise DownloadError(204, f"{m3u8.media_type} m3u8 manifest ({m3u8.base_uri}) has no valid segments") - - download_folder = media_item.complete_file.with_suffix(constants.TempExt.HLS) / m3u8.media_type - coros = self._prepare_hls_downloads(media_item, m3u8, download_folder) - n_segmets = len(m3u8.segments) - if n_segmets > 1: - suffix = f".{m3u8.media_type}.ts" - else: - suffix = media_item.complete_file.suffix + parse_url(m3u8.segments[0].absolute_uri).suffix - - output = media_item.complete_file.with_suffix(suffix) - if await asyncio.to_thread(output.is_file): - return output - - batch_size = _VIDEO_HLS_BATCH_SIZE if m3u8.media_type == "video" else _AUDIO_HLS_BATCH_SIZE - tasks_results = await aio.gather(coros, batch_size=batch_size) - n_successful = sum(1 for result in tasks_results if result.downloaded) - - if n_successful != n_segmets: - msg = f"Download of some segments failed. Successful: {n_successful:,}/{n_segmets:,} " - raise DownloadError("HLS Seg Error", msg, media_item) - - seg_paths = [result.item.complete_file for result in tasks_results] - - if n_segmets > 1: - ffmpeg_result = await ffmpeg.concat(seg_paths, output) - if not ffmpeg_result.success: - raise DownloadError("FFmpeg Concat Error", ffmpeg_result.stderr, media_item) - else: - await asyncio.to_thread(seg_paths[0].rename, output) - return output - - audio = subtitles = None - if m3u8_group.subtitle: - try: - subtitles = await download(m3u8_group.subtitle) - except Exception as e: - log(f"Unable to download subtitles for {media_item.url}, Skipping. {e!r}", 40) - else: - log( - f"Found subtitles for {media_item.url}, but CDL is currently unable to merge them. Subtitle were saved at {subtitles} ", - 30, - ) + async with self._limiter(media_item): + if not media_item.is_segment: + log(f"{self.log_prefix} starting: {media_item.url}", 20) - if m3u8_group.audio: - audio = await download(m3u8_group.audio) - video = await download(m3u8_group.video) - return video, audio, subtitles - - def _prepare_hls_downloads( - self, media_item: MediaItem, m3u8: M3U8, download_folder: Path - ) -> list[Coroutine[None, None, SegmentDownloadResult]]: - padding = max(5, len(str(len(m3u8.segments)))) - - def create_segments() -> Generator[HlsSegment]: - for index, segment in enumerate(m3u8.segments, 1): - assert segment.uri - name = f"{index:0{padding}d}{constants.TempExt.HLS}" - yield HlsSegment(segment.title, name, parse_url(segment.absolute_uri)) - - async def download_segment(segment: HlsSegment): - # TODO: segments download should bypass the downloads slots limits. - # They count as a single download - seg_media_item = MediaItem.from_item( - media_item, - segment.url, - media_item.domain, - db_path=media_item.db_path, - download_folder=download_folder, - filename=segment.name, - ext=media_item.ext, - ) - seg_media_item.is_segment = True - return SegmentDownloadResult( - seg_media_item, - await self.start_download(seg_media_item), - ) - - return [download_segment(segment) for segment in create_segments()] + async with self._file_lock_vault[media_item.filename]: + log_debug(f"Lock for {media_item.filename!r} acquired", 20) + try: + return bool(await self.download(media_item)) + finally: + log_debug(f"Lock for {media_item.filename!r} released", 20) async def finalize_download(self, media_item: MediaItem, downloaded: bool) -> None: if downloaded: await asyncio.to_thread(Path.chmod, media_item.complete_file, 0o666) - await self.set_file_datetime(media_item, media_item.complete_file) - self.attempt_task_removal(media_item) + await _set_file_datetime(media_item, media_item.complete_file) + self.manager.progress_manager.files.add_completed() log(f"Download finished: {media_item.url}", 20) """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" async def check_file_can_download(self, media_item: MediaItem) -> None: - """Checks if the file can be downloaded.""" await self.manager.storage_manager.check_free_space(media_item) if not self.manager.client_manager.check_allowed_filetype(media_item): raise RestrictedFiletypeError(origin=media_item) @@ -332,135 +166,30 @@ async def check_file_can_download(self, media_item: MediaItem) -> None: if not self.manager.client_manager.check_allowed_date_range(media_item): raise RestrictedDateRangeError(origin=media_item) - async def set_file_datetime(self, media_item: MediaItem, complete_file: Path) -> None: - """Sets the file's datetime.""" - if media_item.is_segment: - return - - if config.get().download_options.disable_file_timestamps: - return - if not media_item.datetime: - log(f"Unable to parse upload date for {media_item.url}, using current datetime as file datetime", 30) - return - - # TODO: Make this entire method async (run in another thread) - - # 1. try setting creation date - try: - if sys.platform == "win32": - - def set_win_time(): - nano_ts: float = media_item.datetime * 1e7 # Windows uses nano seconds for dates - timestamp = int(nano_ts + WIN_EPOCH_OFFSET) - - # Windows dates are 64bits, split into 2 32bits unsigned ints (dwHighDateTime , dwLowDateTime) - # XOR to get the date as bytes, then shift to get the first 32 bits (dwHighDateTime) - ctime = wintypes.FILETIME(timestamp & 0xFFFFFFFF, timestamp >> 32) - access_mode = FILE_WRITE_ATTRIBUTES - sharing_mode = 0 # Exclusive access - security_mode = None # Use default security attributes - creation_disposition = OPEN_EXISTING - - # FILE_FLAG_BACKUP_SEMANTICS allows access to directories - flags = FILE_ATTRIBUTE_NORMAL | FILE_FLAG_BACKUP_SEMANTICS - template_file = None - - params = ( - access_mode, - sharing_mode, - security_mode, - creation_disposition, - flags, - template_file, - ) - - handle = windll.kernel32.CreateFileW(str(complete_file), *params) - windll.kernel32.SetFileTime( - handle, - byref(ctime), # Creation time - None, # Access time - None, # Modification time - ) - windll.kernel32.CloseHandle(handle) - - await asyncio.to_thread(set_win_time) - - elif sys.platform == "darwin" and MAC_OS_SET_FILE: - date_string = datetime.fromtimestamp(media_item.datetime).strftime("%m/%d/%Y %H:%M:%S") - cmd = ["-d", date_string, complete_file] - process = await asyncio.subprocess.create_subprocess_exec( - MAC_OS_SET_FILE, *cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL - ) - _ = await process.wait() - - except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError, ValueError): - pass - - # 2. try setting modification and access date - try: - await asyncio.to_thread(os.utime, complete_file, (media_item.datetime, media_item.datetime)) - except OSError: - pass - - def attempt_task_removal(self, media_item: MediaItem) -> None: - """Attempts to remove the task from the progress bar.""" - if media_item.is_segment: - return - if media_item.task_id is not None: - try: - self.manager.progress_manager.downloads.remove_task(media_item.task_id) - except ValueError: - pass - - media_item.set_task_id(None) - - """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - async def start_download(self, media_item: MediaItem) -> bool: - try: - self.client.client_manager.check_domain_errors(self.domain) - except TooManyCrawlerErrors: - return False - - if not media_item.is_segment: - log(f"{self.log_prefix} starting: {media_item.url}", 20) - - async with self._file_lock_vault[media_item.filename]: - log_debug(f"Lock for {media_item.filename} acquired", 20) - try: - return bool(await self.download(media_item)) - finally: - log_debug(f"Lock for {media_item.filename} released", 20) - @error_handling_wrapper @retry async def download(self, media_item: MediaItem) -> bool | None: - """Downloads the media item.""" - url_as_str = str(media_item.url) - if url_as_str in KNOWN_BAD_URLS: - raise DownloadError(KNOWN_BAD_URLS[url_as_str]) try: - await self.manager.states.RUNNING.wait() - self.client.client_manager.check_domain_errors(self.domain) - media_item.current_attempt = media_item.current_attempt or 1 if not media_item.is_segment: - media_item.duration = await self.manager.db_manager.history_table.get_duration(self.domain, media_item) + media_item.duration = await self.manager.db_manager.history_table.get_duration( + media_item.domain, media_item + ) await self.check_file_can_download(media_item) - downloaded = await self.client.download_file(self.domain, media_item) + + downloaded = await self.client.download_file(media_item.domain, media_item) if downloaded: await asyncio.to_thread(Path.chmod, media_item.complete_file, 0o666) if not media_item.is_segment: - await self.set_file_datetime(media_item, media_item.complete_file) + await _set_file_datetime(media_item, media_item.complete_file) self.manager.progress_manager.files.add_completed() log(f"Download finished: {media_item.url}", 20) - self.attempt_task_removal(media_item) + return downloaded except SkipDownloadError as e: if not media_item.is_segment: log(f"Download skip {media_item.url}: {e}", 10) self.manager.progress_manager.files.add_skipped() - self.attempt_task_removal(media_item) except (DownloadError, ClientResponseError, InvalidContentTypeError): raise @@ -474,13 +203,6 @@ async def download(self, media_item: MediaItem) -> bool | None: ClientConnectorError, ) as e: ui_message = getattr(e, "status", type(e).__name__) - if size := await aio.get_size(media_item.partial_file): - if self._current_attempt_filesize.get(media_item.filename, 0) >= size: - raise DownloadError(ui_message, message=f"{self.log_prefix} failed", retry=True) from None - - self._current_attempt_filesize[media_item.filename] = size - raise DownloadError(status=999, message="Download timeout reached, retrying", retry=True) from None - message = str(e) raise DownloadError(ui_message, message, retry=True) from e @@ -490,9 +212,33 @@ def write_download_error( error_log_msg: ErrorLogMessage, exc_info: Exception | None = None, ) -> None: - self.attempt_task_removal(media_item) full_message = f"{self.log_prefix} Failed: {media_item.url} ({error_log_msg.main_log_msg}) \n -> Referer: {media_item.referer}" log(full_message, 40, exc_info=exc_info) self.manager.logs.write_download_error_log(media_item, error_log_msg.csv_log_msg) self.manager.progress_manager.download_errors.add_failure(error_log_msg.ui_failure) self.manager.progress_manager.files.add_failed() + + +async def _set_file_datetime(media_item: MediaItem, complete_file: Path) -> None: + if media_item.is_segment: + return + + if config.get().download_options.disable_file_timestamps: + return + + if not media_item.timestamp: + log(f"Unable to parse upload date for {media_item.url}, using current datetime as file datetime", 30) + return + + # 1. try setting creation date + try: + await set_creation_time(media_item.complete_file, media_item.timestamp) + + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError, ValueError): + pass + + # 2. try setting modification and access date + try: + await asyncio.to_thread(os.utime, complete_file, (media_item.timestamp, media_item.timestamp)) + except OSError: + pass diff --git a/cyberdrop_dl/downloader/mega_nz.py b/cyberdrop_dl/downloader/mega_nz.py index 0ee3d774e..e934c270d 100644 --- a/cyberdrop_dl/downloader/mega_nz.py +++ b/cyberdrop_dl/downloader/mega_nz.py @@ -40,7 +40,6 @@ async def _append_content(self, media_item: MediaItem, content: aiohttp.StreamRe async with aiofiles.open(media_item.partial_file, mode="ab") as f: for _, chunk_size in get_chunks(file_size): - await self.manager.states.RUNNING.wait() raw_chunk = await content.readexactly(chunk_size) chunk = chunk_decryptor.read(raw_chunk) await check_free_space() diff --git a/cyberdrop_dl/managers/client_manager.py b/cyberdrop_dl/managers/client_manager.py index 510ab128b..f88fceb9c 100644 --- a/cyberdrop_dl/managers/client_manager.py +++ b/cyberdrop_dl/managers/client_manager.py @@ -23,7 +23,6 @@ from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, MediaItem from cyberdrop_dl.exceptions import DDOSGuardError, DownloadError, ScrapeError, TooManyCrawlerErrors from cyberdrop_dl.managers import Manager -from cyberdrop_dl.utils.aio import WeakAsyncLocks from cyberdrop_dl.utils.ffmpeg import probe from cyberdrop_dl.utils.logger import log, log_debug, log_spacer @@ -116,7 +115,6 @@ def __init__(self, manager: Manager) -> None: self.speed_limiter: DownloadSpeedLimiter = DownloadSpeedLimiter(rate_limits.download_speed_limit) self.download_client: DownloadClient = DownloadClient(manager, self) self.flaresolverr: FlareSolverr = FlareSolverr(manager) - self.file_locks: WeakAsyncLocks[str] = WeakAsyncLocks() self._session: aiohttp.ClientSession self._download_session: aiohttp.ClientSession diff --git a/cyberdrop_dl/managers/storage_manager.py b/cyberdrop_dl/managers/storage_manager.py index 5726fcf9d..d51d05805 100644 --- a/cyberdrop_dl/managers/storage_manager.py +++ b/cyberdrop_dl/managers/storage_manager.py @@ -90,7 +90,6 @@ def _mount_stats(self) -> Generator[MountStats]: async def check_free_space(self, media_item: MediaItem) -> None: """Checks if there is enough free space to download this item.""" - await self.manager.states.RUNNING.wait() if not await self._has_sufficient_space(media_item.download_folder): raise InsufficientFreeSpaceError(origin=media_item) @@ -202,7 +201,6 @@ async def _check_free_space_loop(self) -> None: last_check = -1 while True: - await self.manager.states.RUNNING.wait() self._updated.clear() last_check += 1 if self._used_mounts: diff --git a/cyberdrop_dl/progress/_common.py b/cyberdrop_dl/progress/_common.py index 689c5ee14..a91dd6e6e 100644 --- a/cyberdrop_dl/progress/_common.py +++ b/cyberdrop_dl/progress/_common.py @@ -1,7 +1,7 @@ from __future__ import annotations import dataclasses -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Self if TYPE_CHECKING: from collections.abc import Callable @@ -38,8 +38,8 @@ class ProgressHook: done: Callable[[], None] speed: Callable[[], float] - def __enter__(self) -> Callable[[int], None]: - return self.advance + def __enter__(self) -> Self: + return self def __exit__(self, *_) -> None: self.done() diff --git a/cyberdrop_dl/progress/scrape.py b/cyberdrop_dl/progress/scrape.py index 8384a89f5..448fb3a70 100644 --- a/cyberdrop_dl/progress/scrape.py +++ b/cyberdrop_dl/progress/scrape.py @@ -1,5 +1,6 @@ from __future__ import annotations +from contextvars import ContextVar from typing import TYPE_CHECKING, ClassVar from rich.console import Group @@ -20,6 +21,9 @@ _COLOR: str = "plum3" +_links: ContextVar[ProgressHook] = ContextVar("_links") +_downloads: ContextVar[ProgressHook] = ContextVar("_downloads") + class OverFlow(ProgressProxy): _desc: ClassVar[str] = "[{color}]... and {number:,} other {name}" @@ -67,7 +71,7 @@ def __rich__(self) -> Panel: def _redraw(self) -> None: self._overflow.update(count=len(self._tasks) - self._limit) - def add_task(self, description: str, total: float | None = None) -> TaskID: + def _add_task(self, description: str, total: float | None = None) -> TaskID: task_id = self._progress.add_task( self._desc_fmt.format(color=_COLOR, description=description), total=total, @@ -81,7 +85,7 @@ def remove_task(self, task_id: TaskID) -> None: self._redraw() def new_hook(self, description: object, total: float | None = None) -> ProgressHook: - task_id = self.add_task(str(description), total) + task_id = self._add_task(str(description), total) def advance(amount: int) -> None: self._advance(task_id, amount) @@ -110,7 +114,7 @@ def __init__(self) -> None: super().__init__(visible_tasks_limit=5) def new_task(self, url: URL) -> TaskID: # type: ignore[reportIncompatibleMethodOverride] - return self.add_task(str(url)) + return self._add_task(str(url)) class DownloadsPanel(UIPanel): @@ -132,9 +136,15 @@ def __init__(self) -> None: self.total_data_written: int = 0 super().__init__(visible_tasks_limit=10) - def new_task(self, *, domain: str, filename: str, expected_size: int | None = None) -> TaskID: # type: ignore[reportIncompatibleMethodOverride] - description = self._clean_task_desc(filename.rsplit("/", 1)[-1]) - return self.add_task(description, expected_size) + @property + def current_hook(self) -> ProgressHook: + return _downloads.get() + + def new(self, filename: str, size: float | None = None) -> ProgressHook: + description = self._clean_task_desc(str(filename).rsplit("/", 1)[-1]) + hook = self.new_hook(description, size) + _ = _downloads.set(hook) + return hook def _advance(self, task_id: TaskID, amount: int) -> None: self.total_data_written += amount diff --git a/cyberdrop_dl/progress/sorting.py b/cyberdrop_dl/progress/sorting.py index 3651ea52c..2d4128f48 100644 --- a/cyberdrop_dl/progress/sorting.py +++ b/cyberdrop_dl/progress/sorting.py @@ -26,7 +26,7 @@ def __init__(self) -> None: def new_task(self, folder: str, expected_size: int | None) -> TaskID: description = self._clean_task_desc(folder) - return super().add_task(description, expected_size) + return super()._add_task(description, expected_size) def advance_folder(self, task_id: TaskID, amount: int = 1) -> None: self._advance(task_id, amount) diff --git a/cyberdrop_dl/scrape_mapper.py b/cyberdrop_dl/scrape_mapper.py index ef6e5c2b8..6a1a68408 100644 --- a/cyberdrop_dl/scrape_mapper.py +++ b/cyberdrop_dl/scrape_mapper.py @@ -117,7 +117,6 @@ async def get_input_items(self, input_file) -> AsyncGenerator[ScrapeItem]: children_limits = config.get().download_options.maximum_number_of_children async for item in items_generator: - await self.manager.states.RUNNING.wait() item.children_limits = children_limits if self.should_scrape(item): yield item diff --git a/cyberdrop_dl/utils/dates.py b/cyberdrop_dl/utils/dates.py index c77771a24..c5cf291ac 100644 --- a/cyberdrop_dl/utils/dates.py +++ b/cyberdrop_dl/utils/dates.py @@ -1,7 +1,11 @@ from __future__ import annotations +import asyncio import datetime import email.utils +import shutil +import subprocess +import sys import warnings from functools import lru_cache from typing import TYPE_CHECKING, Literal, NewType, ParamSpec, TypeAlias, TypeVar @@ -10,6 +14,94 @@ if TYPE_CHECKING: from collections.abc import Callable + from pathlib import Path + +try: + import tzlocal + +except (ImportError, LookupError): + tzlocal = None + +_TIMEZONE = tzlocal.get_localzone if tzlocal else None + + +if sys.platform == "win32": + # Try to import win32con for Windows constants, fallback to hardcoded values if unavailable + try: + import win32con # type: ignore[reportMissingModuleSource] # pyright: ignore[reportMissingModuleSource] + + except ImportError: + win32con = None + + FILE_WRITE_ATTRIBUTES = 256 + OPEN_EXISTING = win32con.OPEN_EXISTING if win32con else 3 + FILE_ATTRIBUTE_NORMAL = win32con.FILE_ATTRIBUTE_NORMAL if win32con else 128 + FILE_FLAG_BACKUP_SEMANTICS = win32con.FILE_FLAG_BACKUP_SEMANTICS if win32con else 33554432 + + # Windows epoch is January 1, 1601. Unix epoch is January 1, 1970 + WIN_EPOCH_OFFSET = 116444736e9 + + from ctypes import byref, windll, wintypes + + def _set_win_time(file: Path, datetime: float) -> None: + nano_ts: float = datetime * 1e7 # Windows uses nano seconds for dates + timestamp = int(nano_ts + WIN_EPOCH_OFFSET) + + # Windows dates are 64bits, split into 2 32bits unsigned ints (dwHighDateTime , dwLowDateTime) + # XOR to get the date as bytes, then shift to get the first 32 bits (dwHighDateTime) + ctime = wintypes.FILETIME(timestamp & 0xFFFFFFFF, timestamp >> 32) + access_mode = FILE_WRITE_ATTRIBUTES + sharing_mode = 0 # Exclusive access + security_mode = None # Use default security attributes + creation_disposition = OPEN_EXISTING + + # FILE_FLAG_BACKUP_SEMANTICS allows access to directories + flags = FILE_ATTRIBUTE_NORMAL | FILE_FLAG_BACKUP_SEMANTICS + template_file = None + + params = ( + access_mode, + sharing_mode, + security_mode, + creation_disposition, + flags, + template_file, + ) + + handle = windll.kernel32.CreateFileW(str(file), *params) + windll.kernel32.SetFileTime( + handle, + byref(ctime), # Creation time + None, # Access time + None, # Modification time + ) + windll.kernel32.CloseHandle(handle) + + async def set_creation_time(file: Path, timestamp: float) -> None: + return await asyncio.to_thread(_set_win_time, file, timestamp) + + +elif sys.platform == "darwin": + # SetFile is non standard in macOS. Only users that have xcode installed will have SetFile + MAC_OS_SET_FILE = shutil.which("SetFile") + + async def set_creation_time(file: Path, timestamp: float) -> None: + if MAC_OS_SET_FILE: + time_string = datetime.datetime.fromtimestamp(timestamp).strftime("%m/%d/%Y %H:%M:%S") + process = await asyncio.subprocess.create_subprocess_exec( + MAC_OS_SET_FILE, + "-d", + time_string, + file, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + _ = await process.wait() + +else: + + async def set_creation_time(file: Path, timestamp: float) -> None: ... + TimeStamp = NewType("TimeStamp", int) DateOrder: TypeAlias = Literal["DMY", "DYM", "MDY", "MYD", "YDM", "YMD"] @@ -22,13 +114,6 @@ _DEFAULT_PARSERS: list[ParserKind] = ["relative-time", "custom-formats", "absolute-time", "no-spaces-time"] _DEFAULT_DATE_ORDER = "MDY" -try: - from tzlocal import get_localzone - - _TIMEZONE = get_localzone() -except (ImportError, LookupError): - _TIMEZONE = None - def _coerce_to_list(value: _S | set[_S] | list[_S] | tuple[_S, ...] | None) -> list[_S]: if value is None: From 3c69b8b3714942f34ec55cc57d9533d5de9faedb Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Mon, 23 Feb 2026 22:46:30 -0500 Subject: [PATCH 19/23] refactor: update downloaders --- cyberdrop_dl/clients/download_client.py | 154 +++++++++----------- cyberdrop_dl/clients/flaresolverr.py | 2 +- cyberdrop_dl/clients/scraper_client.py | 4 +- cyberdrop_dl/config/settings.py | 3 +- cyberdrop_dl/crawlers/crawler.py | 6 +- cyberdrop_dl/downloader/downloader.py | 185 +++++++++--------------- cyberdrop_dl/downloader/mega_nz.py | 2 +- cyberdrop_dl/managers/__init__.py | 6 +- cyberdrop_dl/managers/client_manager.py | 37 +---- cyberdrop_dl/managers/log_manager.py | 2 +- 10 files changed, 156 insertions(+), 245 deletions(-) diff --git a/cyberdrop_dl/clients/download_client.py b/cyberdrop_dl/clients/download_client.py index ab8b1746e..e40ae8177 100644 --- a/cyberdrop_dl/clients/download_client.py +++ b/cyberdrop_dl/clients/download_client.py @@ -2,8 +2,11 @@ import asyncio import contextlib +import logging +import os import time from http import HTTPStatus +from pathlib import Path from typing import TYPE_CHECKING, Any import aiofiles @@ -12,101 +15,65 @@ from cyberdrop_dl.clients.response import AbstractResponse from cyberdrop_dl.exceptions import DownloadError, InvalidContentTypeError, SlowDownloadError from cyberdrop_dl.utils import aio, dates -from cyberdrop_dl.utils.aio import WeakAsyncLocks from cyberdrop_dl.utils.logger import log from cyberdrop_dl.utils.utilities import get_size_or_none if TYPE_CHECKING: from collections.abc import AsyncGenerator, Callable, Coroutine, Mapping - from pathlib import Path from typing import Any import aiohttp from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, MediaItem from cyberdrop_dl.managers import Manager - from cyberdrop_dl.managers.client_manager import ClientManager + from cyberdrop_dl.managers.client_manager import HttpClient from cyberdrop_dl.progress._common import ProgressHook _CONTENT_TYPES_OVERRIDES: dict[str, str] = {"text/vnd.trolltech.linguist": "video/MP2T"} _SLOW_DOWNLOAD_PERIOD: int = 10 # seconds - _FREE_SPACE_CHECK_PERIOD: int = 5 # Check every 5 chunks -_NULL_CONTEXT: contextlib.nullcontext[None] = contextlib.nullcontext() _USE_IMPERSONATION: set[str] = {"vsco", "celebforum"} +logger = logging.getLogger(__name__) + + class DownloadClient: - """AIOHTTP operations for downloading.""" + """Low level class to that performs the actual download + database updates""" - def __init__(self, manager: Manager, client_manager: ClientManager) -> None: + def __init__(self, manager: Manager, http_client: HttpClient) -> None: self.manager = manager - self.client_manager = client_manager + self.http_client = http_client self.download_speed_threshold = config.get().runtime_options.slow_download_speed - self._server_locks = WeakAsyncLocks[str]() - self.server_locked_domains: set[str] = set() self._supports_ranges: bool = True - def server_limiter(self, domain: str, server: str) -> asyncio.Lock | contextlib.nullcontext[None]: - if domain not in self.server_locked_domains: - return _NULL_CONTEXT - - return self._server_locks[server] - - @contextlib.asynccontextmanager - async def _track_errors(self, domain: str): - with self.client_manager.request_context(domain): - await self.client_manager.manager.states.RUNNING.wait() - yield - - async def _download(self, domain: str, media_item: MediaItem) -> bool: + async def _download(self, media_item: MediaItem) -> bool: resume_point = 0 if self._supports_ranges and (size := await asyncio.to_thread(get_size_or_none, media_item.partial_file)): resume_point = size media_item.headers["Range"] = f"bytes={size}-" - await asyncio.sleep(config.get().rate_limiting_options.total_delay) - - def process_response(resp: aiohttp.ClientResponse | AbstractResponse): - return self._process_response(media_item, domain, resume_point, resp) - + await asyncio.sleep(config.get().rate_limits.total_delay) download_url = media_item.debrid_link or media_item.url async with self.__request_context(download_url, media_item.domain, media_item.headers) as resp: - return await process_response(resp) + return await self._process_response(media_item, resume_point, resp) async def _process_response( self, media_item: MediaItem, - domain: str, resume_point: int, resp: aiohttp.ClientResponse | AbstractResponse, ) -> bool: if resp.status == HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE: await asyncio.to_thread(media_item.partial_file.unlink) - _ = await self.client_manager.check_http_status(resp, download=True) + _ = await self.http_client.check_http_status(resp, download=True) if not media_item.is_segment: _ = _get_content_type(media_item.ext, resp.headers) media_item.filesize = int(resp.headers.get("Content-Length", "0")) or None - if not media_item.complete_file: - proceed, skip = await self.get_final_file_info(media_item, domain) - self.client_manager.check_content_length(resp.headers) - if skip: - self.manager.progress_manager.files.add_skipped() - return False - if not proceed: - if media_item.is_segment: - return True - log(f"Skipping {media_item.url} as it has already been downloaded", 10) - self.manager.progress_manager.files.add_previously_completed(False) - await self.process_completed(media_item, domain) - await self.handle_media_item_completion(media_item, downloaded=False) - - return False - if resp.status != HTTPStatus.PARTIAL_CONTENT: await asyncio.to_thread(media_item.partial_file.unlink, missing_ok=True) @@ -139,14 +106,14 @@ async def __request_context( self, url: AbsoluteHttpURL, domain: str, headers: dict[str, str] ) -> AsyncGenerator[AbstractResponse | aiohttp.ClientResponse]: if domain in _USE_IMPERSONATION: - resp = await self.client_manager._curl_session.get(str(url), stream=True, headers=headers) + resp = await self.http_client._curl_session.get(str(url), stream=True, headers=headers) try: yield AbstractResponse.from_resp(resp) finally: await resp.aclose() return - async with self.client_manager._download_session.get(url, headers=headers) as resp: + async with self.http_client._download_session.get(url, headers=headers) as resp: yield resp async def _append_content(self, media_item: MediaItem, content: aiohttp.StreamReader | AbstractResponse) -> None: @@ -160,10 +127,10 @@ async def _append_content(self, media_item: MediaItem, content: aiohttp.StreamRe check_download_speed = self.make_speed_checker(hook) async with aiofiles.open(media_item.partial_file, mode="ab") as f: - async for chunk in content.iter_chunked(self.client_manager.speed_limiter.chunk_size): + async for chunk in content.iter_chunked(self.http_client.speed_limiter.chunk_size): await check_free_space() chunk_size = len(chunk) - await self.client_manager.speed_limiter.acquire(chunk_size) + await self.http_client.speed_limiter.acquire(chunk_size) await f.write(chunk) hook.advance(chunk_size) check_download_speed() @@ -211,33 +178,45 @@ def check_download_speed() -> None: return check_download_speed - async def download_file(self, domain: str, media_item: MediaItem) -> bool: + async def download_file(self, media_item: MediaItem) -> bool: """Starts a file.""" if config.get().download_options.skip_download_mark_completed and not media_item.is_segment: - log(f"Download Removed {media_item.url} due to mark completed option", 10) + log(f"Download skipped {media_item.url} due to mark completed option", 10) self.manager.progress_manager.files.add_skipped() - # set completed path - await self.process_completed(media_item, domain) + await self.mark_completed(media_item.domain, media_item) return False - async with self._track_errors(domain): - downloaded = await self._download(domain, media_item) + downloaded = await self._download(media_item) if downloaded: _ = await asyncio.to_thread(media_item.partial_file.rename, media_item.complete_file) if not media_item.is_segment: - proceed = await self.client_manager.check_file_duration(media_item) - await self.manager.db_manager.history_table.add_duration(domain, media_item) - if not proceed: - log(f"Download Skip {media_item.url} due to runtime restrictions", 10) + has_valid_duration = await self.http_client.check_file_duration(media_item) + await self.manager.db_manager.history_table.add_duration(media_item.domain, media_item) + await self.manager.db_manager.history_table.add_filesize(media_item.domain, media_item) + if not has_valid_duration: await asyncio.to_thread(media_item.complete_file.unlink) - await self.mark_incomplete(media_item, media_item.domain) + logger.warning(f"Download deleted {media_item.url} due to runtime restrictions") + await self.mark_incomplete(media_item) self.manager.progress_manager.files.add_skipped() return False - await self.process_completed(media_item, domain) - await self.handle_media_item_completion(media_item, downloaded=True) + + await self._finalize_download(media_item) + return downloaded + async def _finalize_download(self, media_item: MediaItem) -> None: + await asyncio.to_thread(Path.chmod, media_item.complete_file, 0o666) + if media_item.is_segment: + return + + media_item.downloaded = True + await self.manager.hash_manager.hash_client.hash_item_during_download(media_item) + self.manager.add_completed(media_item) + await self.mark_completed(media_item.domain, media_item) + await _set_file_datetime(media_item, media_item.complete_file) + log(f"Download finished: {media_item.url}") + """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" async def mark_incomplete(self, media_item: MediaItem) -> None: @@ -246,30 +225,10 @@ async def mark_incomplete(self, media_item: MediaItem) -> None: return await self.manager.db_manager.history_table.insert_incompleted(media_item.domain, media_item) - async def process_completed(self, media_item: MediaItem, domain: str) -> None: - await self.mark_completed(domain, media_item) - await self.add_file_size(domain, media_item) - async def mark_completed(self, domain: str, media_item: MediaItem) -> None: + self.manager.progress_manager.files.add_completed() await self.manager.db_manager.history_table.mark_complete(domain, media_item) - async def add_file_size(self, domain: str, media_item: MediaItem) -> None: - if await asyncio.to_thread(media_item.complete_file.is_file): - await self.manager.db_manager.history_table.add_filesize(domain, media_item) - - async def handle_media_item_completion(self, media_item: MediaItem, downloaded: bool = False) -> None: - """Sends to hash client to handle hashing and marks as completed/current download.""" - try: - media_item.downloaded = downloaded - await self.manager.hash_manager.hash_client.hash_item_during_download(media_item) - self.manager.add_completed(media_item) - except Exception: - log(f"Error handling media item completion of: {media_item.complete_file}", 10, exc_info=True) - - -def get_file_location(media_item: MediaItem) -> Path: - return media_item.download_folder / media_item.filename - def _check_filesize_limits(media: MediaItem) -> bool: """Checks if the file size is within the limits.""" @@ -321,3 +280,28 @@ def _get_last_modified(headers: Mapping[str, str]) -> int | None: def _is_html_or_text(content_type: str) -> bool: return any(s in content_type for s in ("html", "text")) + + +async def _set_file_datetime(media_item: MediaItem, complete_file: Path) -> None: + if media_item.is_segment: + return + + if config.get().download_options.disable_file_timestamps: + return + + if not media_item.timestamp: + logger.warning(f"Unable to parse upload date for {media_item.url}, using current datetime as file datetime") + return + + # 1. try setting creation date + try: + await dates.set_creation_time(media_item.complete_file, media_item.timestamp) + + except (OSError, ValueError): + pass + + # 2. try setting modification and access date + try: + await asyncio.to_thread(os.utime, complete_file, (media_item.timestamp, media_item.timestamp)) + except OSError: + pass diff --git a/cyberdrop_dl/clients/flaresolverr.py b/cyberdrop_dl/clients/flaresolverr.py index 8677b6690..73d0894d5 100644 --- a/cyberdrop_dl/clients/flaresolverr.py +++ b/cyberdrop_dl/clients/flaresolverr.py @@ -137,7 +137,7 @@ async def _request(self, command: _Command, /, data: Any = None, **kwargs: Any) if not self.url: raise DDOSGuardError("Found DDoS challenge, but FlareSolverr is not configured") - timeout = config.get().rate_limiting_options._aiohttp_timeout + timeout = config.get().rate_limits._aiohttp_timeout if command is _Command.CREATE_SESSION: timeout = aiohttp.ClientTimeout(total=5 * 60, connect=60) # 5 minutes to create session diff --git a/cyberdrop_dl/clients/scraper_client.py b/cyberdrop_dl/clients/scraper_client.py index 3774954fb..4137dc2f4 100644 --- a/cyberdrop_dl/clients/scraper_client.py +++ b/cyberdrop_dl/clients/scraper_client.py @@ -21,13 +21,13 @@ from curl_cffi.requests.session import HttpMethod from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL - from cyberdrop_dl.managers.client_manager import ClientManager + from cyberdrop_dl.managers.client_manager import HttpClient class ScraperClient: """AIOHTTP / CURL operations for scraping.""" - def __init__(self, client_manager: ClientManager) -> None: + def __init__(self, client_manager: HttpClient) -> None: self.client_manager = client_manager @contextlib.asynccontextmanager diff --git a/cyberdrop_dl/config/settings.py b/cyberdrop_dl/config/settings.py index a5f8bd5a5..e3d40d41e 100755 --- a/cyberdrop_dl/config/settings.py +++ b/cyberdrop_dl/config/settings.py @@ -61,7 +61,6 @@ def _validate_format(cls, value: str, valid_keys: set[str]) -> None: class DownloadOptions(FormatValidator, SettingsGroup): block_download_sub_folders: bool = False - disable_download_attempt_limit: bool = False disable_file_timestamps: bool = False include_album_id_in_folder_name: bool = False include_thread_id_in_folder_name: bool = False @@ -396,7 +395,7 @@ class ConfigSettings(Settings): ignore_options: IgnoreOptions = IgnoreOptions() logs: Logs = Logs() media_duration_limits: MediaDurationLimits = MediaDurationLimits() - rate_limiting_options: RateLimiting = RateLimiting() + rate_limits: RateLimiting = RateLimiting() runtime_options: Runtime = Runtime() sorting: Sorting = Sorting() ui_options: UIOptions = UIOptions() diff --git a/cyberdrop_dl/crawlers/crawler.py b/cyberdrop_dl/crawlers/crawler.py index 1fa79341e..2122a97ca 100644 --- a/cyberdrop_dl/crawlers/crawler.py +++ b/cyberdrop_dl/crawlers/crawler.py @@ -492,15 +492,15 @@ async def check_skip_by_config(self, media_item: MediaItem) -> bool: media_host = media_item.url.host if (hosts := config.get().ignore_options.skip_hosts) and any(host in media_host for host in hosts): - log(f"Download skip {media_item.url} due to skip_hosts config", 10) + log(f"Download skipped{media_item.url} due to skip_hosts config", 10) return True if (hosts := config.get().ignore_options.only_hosts) and not any(host in media_host for host in hosts): - log(f"Download skip {media_item.url} due to only_hosts config", 10) + log(f"Download skipped{media_item.url} due to only_hosts config", 10) return True if (regex := config.get().ignore_options.filename_regex_filter) and re.search(regex, media_item.filename): - log(f"Download skip {media_item.url} due to filename regex filter config", 10) + log(f"Download skipped{media_item.url} due to filename regex filter config", 10) return True return False diff --git a/cyberdrop_dl/downloader/downloader.py b/cyberdrop_dl/downloader/downloader.py index 4da1ae516..5ccef2bc7 100644 --- a/cyberdrop_dl/downloader/downloader.py +++ b/cyberdrop_dl/downloader/downloader.py @@ -2,15 +2,12 @@ import asyncio import contextlib -import os -import subprocess -from functools import wraps -from pathlib import Path -from typing import TYPE_CHECKING, NamedTuple, ParamSpec, TypeVar +import functools +import logging +from typing import TYPE_CHECKING, ParamSpec, TypeVar from aiohttp import ClientConnectorError, ClientError, ClientResponseError -from cyberdrop_dl import config from cyberdrop_dl.exceptions import ( DownloadError, DurationError, @@ -21,8 +18,6 @@ SkipDownloadError, ) from cyberdrop_dl.utils import aio -from cyberdrop_dl.utils.dates import set_creation_time -from cyberdrop_dl.utils.logger import log, log_debug from cyberdrop_dl.utils.utilities import error_handling_wrapper _VIDEO_HLS_BATCH_SIZE = 10 @@ -32,58 +27,49 @@ if TYPE_CHECKING: from collections.abc import Callable, Coroutine + from cyberdrop_dl import config from cyberdrop_dl.clients.download_client import DownloadClient + from cyberdrop_dl.config import Config from cyberdrop_dl.data_structures.url_objects import MediaItem from cyberdrop_dl.managers import Manager + P = ParamSpec("P") + R = TypeVar("R") -P = ParamSpec("P") -R = TypeVar("R") +logger = logging.getLogger(__name__) -class SegmentDownloadResult(NamedTuple): - item: MediaItem - downloaded: bool - -KNOWN_BAD_URLS = { - "https://i.imgur.com/removed.png": 404, - "https://saint2.su/assets/notfound.gif": 404, - "https://bnkr.b-cdn.net/maintenance-vid.mp4": 503, - "https://bnkr.b-cdn.net/maintenance.mp4": 503, - "https://c.bunkr-cache.se/maintenance-vid.mp4": 503, - "https://c.bunkr-cache.se/maintenance.jpg": 503, -} - - -def retry(func: Callable[P, Coroutine[None, None, R]]) -> Callable[P, Coroutine[None, None, R]]: - @wraps(func) - async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: - self: Downloader = args[0] - media_item: MediaItem = args[1] +def retry( + func: Callable[[Downloader, MediaItem], Coroutine[None, None, R]], +) -> Callable[[Downloader, MediaItem], Coroutine[None, None, R]]: + @functools.wraps(func) + async def wrapper(self: Downloader, media_item: MediaItem) -> R: while True: try: - return await func(*args, **kwargs) + return await func(self, media_item) except DownloadError as e: if not e.retry: raise media_item.current_attempt += 1 - - log(f"{self.log_prefix} failed: {media_item.url} with error: {e!s}", 40) - if media_item.current_attempt >= self.max_attempts: + logger.error(f"Download failed: {media_item.url} with error: {e!s}") + if media_item.current_attempt >= self.config.rate_limits.download_attempts: raise - retry_msg = f"Retrying {self.log_prefix.lower()}: {media_item.url} , retry attempt: {media_item.current_attempt + 1}" - log(retry_msg, 20) + retry_msg = f"Retrying download: {media_item.url}, attempt: {media_item.current_attempt + 1}" + logger.info(retry_msg) return wrapper -GENERIC_CRAWLERS = ".", "no_crawler" +_file_lock_vault: aio.WeakAsyncLocks[str] = aio.WeakAsyncLocks() +_NULL_CONTEXT: contextlib.nullcontext[None] = contextlib.nullcontext() class Downloader: + """High level class to handle download retries, limiters and post-download chores""" + def __init__( self, config: config.Config, @@ -92,27 +78,37 @@ def __init__( slots: int, ) -> None: self.manager: Manager = manager - - self.config = config + self.config: Config = config self.client: DownloadClient = client - - self.log_prefix = "Download" self.processed_items: set[str] = set() - self.waiting_items = 0 + self.waiting_items: int = 0 self._current_attempt_filesize: dict[str, int] = {} - self._file_lock_vault: aio.WeakAsyncLocks[str] = aio.WeakAsyncLocks() - self._ignore_history: bool = self.config.runtime_options.ignore_history self._semaphore: asyncio.Semaphore = asyncio.Semaphore(slots) + self._server_locks: aio.WeakAsyncLocks[str] = aio.WeakAsyncLocks[str]() + self._server_locked_domains: set[str] = set() + self._hardcoded_limits: dict[str, int] = {} + self._site_sems: dict[str, asyncio.Semaphore] = {} + self._global_limiter: asyncio.Semaphore = asyncio.Semaphore(self.config.rate_limits.max_simultaneous_downloads) - @property - def max_attempts(self): - if self.config.download_options.disable_download_attempt_limit: - return 1 - return self.config.rate_limiting_options.download_attempts + def _domain_limiter(self, domain: str) -> asyncio.Semaphore: + if sem := self._site_sems.get(domain): + return sem + + limit = self.config.rate_limits.max_simultaneous_downloads_per_domain + if hardcoded_limit := self._hardcoded_limits.get(domain): + limit = min(limit, hardcoded_limit) + + self._site_sems[domain] = sem = asyncio.Semaphore(limit) + return sem + + def _server_lock(self, domain: str, server: str) -> asyncio.Lock | contextlib.nullcontext[None]: + if domain not in self._server_locked_domains: + return _NULL_CONTEXT + + return self._server_locks[server] @contextlib.asynccontextmanager async def _limiter(self, media_item: MediaItem): - media_item.current_attempt = 0 if media_item.is_segment: yield return @@ -121,44 +117,36 @@ async def _limiter(self, media_item: MediaItem): await self.client.mark_incomplete(media_item) server = (media_item.debrid_link or media_item.url).host - server_limit, domain_limit, global_limit = ( - self.client.server_limiter(media_item.domain, server), - self._semaphore, - self.manager.client_manager.global_download_slots, - ) - async with server_limit, domain_limit, global_limit: + async with ( + self._server_lock(media_item.domain, server), + self._domain_limiter(media_item.domain), + self._global_limiter, + _file_lock_vault[media_item.filename], + ): + logger.debug(f"Lock for {media_item.filename!r} acquired") self.processed_items.add(media_item.db_path) self.waiting_items -= 1 - yield + try: + yield + finally: + logger.debug(f"Lock for {media_item.filename!r} released") async def run(self, media_item: MediaItem) -> bool: - if media_item.url.path in self.processed_items and not self._ignore_history: + if media_item.url.path in self.processed_items and not self.config.runtime_options.ignore_history: return False async with self._limiter(media_item): if not media_item.is_segment: - log(f"{self.log_prefix} starting: {media_item.url}", 20) - - async with self._file_lock_vault[media_item.filename]: - log_debug(f"Lock for {media_item.filename!r} acquired", 20) - try: - return bool(await self.download(media_item)) - finally: - log_debug(f"Lock for {media_item.filename!r} released", 20) + logger.info(f"Download starting: {media_item.url}") - async def finalize_download(self, media_item: MediaItem, downloaded: bool) -> None: - if downloaded: - await asyncio.to_thread(Path.chmod, media_item.complete_file, 0o666) - await _set_file_datetime(media_item, media_item.complete_file) + return bool(await self._download(media_item)) - self.manager.progress_manager.files.add_completed() - log(f"Download finished: {media_item.url}", 20) - - """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - async def check_file_can_download(self, media_item: MediaItem) -> None: + async def _check_file_can_download(self, media_item: MediaItem) -> None: await self.manager.storage_manager.check_free_space(media_item) + if media_item.is_segment: + return + if not self.manager.client_manager.check_allowed_filetype(media_item): raise RestrictedFiletypeError(origin=media_item) if not await self.manager.client_manager.check_file_duration(media_item): @@ -168,27 +156,19 @@ async def check_file_can_download(self, media_item: MediaItem) -> None: @error_handling_wrapper @retry - async def download(self, media_item: MediaItem) -> bool | None: + async def _download(self, media_item: MediaItem) -> bool | None: try: if not media_item.is_segment: media_item.duration = await self.manager.db_manager.history_table.get_duration( media_item.domain, media_item ) - await self.check_file_can_download(media_item) - - downloaded = await self.client.download_file(media_item.domain, media_item) - if downloaded: - await asyncio.to_thread(Path.chmod, media_item.complete_file, 0o666) - if not media_item.is_segment: - await _set_file_datetime(media_item, media_item.complete_file) - self.manager.progress_manager.files.add_completed() - log(f"Download finished: {media_item.url}", 20) + await self._check_file_can_download(media_item) - return downloaded + return await self.client.download_file(media_item) except SkipDownloadError as e: if not media_item.is_segment: - log(f"Download skip {media_item.url}: {e}", 10) + logger.info(f"Download skipped {media_item.url}: {e}") self.manager.progress_manager.files.add_skipped() except (DownloadError, ClientResponseError, InvalidContentTypeError): @@ -212,33 +192,8 @@ def write_download_error( error_log_msg: ErrorLogMessage, exc_info: Exception | None = None, ) -> None: - full_message = f"{self.log_prefix} Failed: {media_item.url} ({error_log_msg.main_log_msg}) \n -> Referer: {media_item.referer}" - log(full_message, 40, exc_info=exc_info) - self.manager.logs.write_download_error_log(media_item, error_log_msg.csv_log_msg) + msg = f"Download failed: {media_item.url} ({error_log_msg.main_log_msg}) \n -> Referer: {media_item.referer}" + logger.error(msg, exc_info=exc_info) + self.manager.logs.write_download_error(media_item, error_log_msg.csv_log_msg) self.manager.progress_manager.download_errors.add_failure(error_log_msg.ui_failure) self.manager.progress_manager.files.add_failed() - - -async def _set_file_datetime(media_item: MediaItem, complete_file: Path) -> None: - if media_item.is_segment: - return - - if config.get().download_options.disable_file_timestamps: - return - - if not media_item.timestamp: - log(f"Unable to parse upload date for {media_item.url}, using current datetime as file datetime", 30) - return - - # 1. try setting creation date - try: - await set_creation_time(media_item.complete_file, media_item.timestamp) - - except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError, ValueError): - pass - - # 2. try setting modification and access date - try: - await asyncio.to_thread(os.utime, complete_file, (media_item.timestamp, media_item.timestamp)) - except OSError: - pass diff --git a/cyberdrop_dl/downloader/mega_nz.py b/cyberdrop_dl/downloader/mega_nz.py index e934c270d..d6c4de028 100644 --- a/cyberdrop_dl/downloader/mega_nz.py +++ b/cyberdrop_dl/downloader/mega_nz.py @@ -45,7 +45,7 @@ async def _append_content(self, media_item: MediaItem, content: aiohttp.StreamRe await check_free_space() chunk_size = len(chunk) - await self.client_manager.speed_limiter.acquire(chunk_size) + await self.http_client.speed_limiter.acquire(chunk_size) await f.write(chunk) self.manager.progress_manager.downloads.advance_file(media_item.task_id, chunk_size) check_download_speed() diff --git a/cyberdrop_dl/managers/__init__.py b/cyberdrop_dl/managers/__init__.py index f0d7ba3a4..dd170e72a 100644 --- a/cyberdrop_dl/managers/__init__.py +++ b/cyberdrop_dl/managers/__init__.py @@ -9,7 +9,7 @@ from cyberdrop_dl import __version__, appdata, config, constants from cyberdrop_dl.database import Database -from cyberdrop_dl.managers.client_manager import ClientManager +from cyberdrop_dl.managers.client_manager import HttpClient from cyberdrop_dl.managers.hash_manager import HashManager from cyberdrop_dl.managers.live_manager import LiveManager from cyberdrop_dl.managers.log_manager import LogManager @@ -39,7 +39,7 @@ class Manager: def __init__(self) -> None: self.hash_manager: HashManager = field(init=False) self.db_manager: Database = field(init=False) - self.client_manager: ClientManager = field(init=False) + self.client_manager: HttpClient = field(init=False) self.storage_manager: StorageManager = field(init=False) self.progress_manager: ProgressManager = ProgressManager(self, portrait=False) @@ -84,7 +84,7 @@ def prev_downloads(self) -> set[MediaItem]: async def async_startup(self) -> None: """Async startup process for the manager.""" self.states = AsyncioEvents(asyncio.Event(), asyncio.Event()) - self.client_manager = ClientManager(self) + self.client_manager = HttpClient(self) await self.client_manager.startup() self.storage_manager = StorageManager(self) diff --git a/cyberdrop_dl/managers/client_manager.py b/cyberdrop_dl/managers/client_manager.py index f88fceb9c..95f206146 100644 --- a/cyberdrop_dl/managers/client_manager.py +++ b/cyberdrop_dl/managers/client_manager.py @@ -21,14 +21,13 @@ from cyberdrop_dl.clients.scraper_client import ScraperClient from cyberdrop_dl.cookies import get_cookies_from_browsers, read_netscape_files from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, MediaItem -from cyberdrop_dl.exceptions import DDOSGuardError, DownloadError, ScrapeError, TooManyCrawlerErrors +from cyberdrop_dl.exceptions import DownloadError, ScrapeError from cyberdrop_dl.managers import Manager from cyberdrop_dl.utils.ffmpeg import probe from cyberdrop_dl.utils.logger import log, log_debug, log_spacer if TYPE_CHECKING: - from asyncio.locks import Semaphore - from collections.abc import Callable, Generator, Iterable, Mapping + from collections.abc import Callable, Iterable, Mapping from http.cookies import BaseCookie from bs4 import BeautifulSoup @@ -97,7 +96,7 @@ def _create_ssl(): return ctx -class ClientManager: +class HttpClient: """Creates a 'client' that can be referenced by scraping or download sessions.""" def __init__(self, manager: Manager) -> None: @@ -107,10 +106,9 @@ def __init__(self, manager: Manager) -> None: self.rate_limits: dict[str, AsyncLimiter] = {} self.download_slots: dict[str, int] = {} - rate_limits = config.get().rate_limiting_options + rate_limits = config.get().rate_limits self.global_rate_limiter: AsyncLimiter = AsyncLimiter(rate_limits.rate_limit, 1) - self.global_download_slots: Semaphore = asyncio.Semaphore(rate_limits.max_simultaneous_downloads) self.scraper_client: ScraperClient = ScraperClient(self) self.speed_limiter: DownloadSpeedLimiter = DownloadSpeedLimiter(rate_limits.download_speed_limit) self.download_client: DownloadClient = DownloadClient(manager, self) @@ -145,7 +143,7 @@ async def __aexit__(self, *args) -> None: @property def rate_limiting_options(self): - return config.get().rate_limiting_options + return config.get().rate_limits def get_download_slots(self, domain: str) -> int: """Returns the download limit for a domain.""" @@ -267,31 +265,6 @@ def _new_tcp_connector(self) -> aiohttp.TCPConnector: conn._resolver_owner = True return conn - def check_domain_errors(self, domain: str) -> None: - if _crawler_errors[domain] >= env.MAX_CRAWLER_ERRORS: - if crawler := self.manager.scrape_mapper.disable_crawler(domain): - msg = ( - f"{crawler.__class__.__name__} has been disabled after too many errors. " - f"URLs from the following domains will be ignored: {crawler.SCRAPE_MAPPER_KEYS}" - ) - log(msg, 40) - raise TooManyCrawlerErrors - - @contextlib.contextmanager - def request_context(self, domain: str) -> Generator[None]: - self.check_domain_errors(domain) - try: - yield - except DDOSGuardError: - _crawler_errors[domain] += 1 - raise - else: - # we could potentially reset the counter here - # _crawler_errors[domain] = 0 - pass - finally: - pass - async def load_cookie_files(self) -> None: if config.get().browser_cookies.auto_import: assert config.get().browser_cookies.browser diff --git a/cyberdrop_dl/managers/log_manager.py b/cyberdrop_dl/managers/log_manager.py index cb25f149e..de9626930 100644 --- a/cyberdrop_dl/managers/log_manager.py +++ b/cyberdrop_dl/managers/log_manager.py @@ -62,7 +62,7 @@ def write_unsupported(self, url: URL, origin: ScrapeItem | URL | None = None) -> self._write_to_csv(self.config.logs.unsupported_urls, url=url, origin=get_origin(origin)) ) - def write_download_error_log(self, media_item: MediaItem, error_message: str) -> None: + def write_download_error(self, media_item: MediaItem, error_message: str) -> None: _ = self.task_group.create_task( self._write_to_csv( self.config.logs.download_error_urls, From b57b7ecb801d03ceeea9dc5f0a0d147b292f6739 Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Tue, 24 Feb 2026 14:51:30 -0500 Subject: [PATCH 20/23] refactor: update storage --- cyberdrop_dl/managers/storage_manager.py | 253 -------------------- cyberdrop_dl/storage.py | 287 +++++++++++++++++++++++ tests/conftest.py | 2 +- tests/test_storage.py | 47 ++-- 4 files changed, 321 insertions(+), 268 deletions(-) delete mode 100644 cyberdrop_dl/managers/storage_manager.py create mode 100644 cyberdrop_dl/storage.py diff --git a/cyberdrop_dl/managers/storage_manager.py b/cyberdrop_dl/managers/storage_manager.py deleted file mode 100644 index d51d05805..000000000 --- a/cyberdrop_dl/managers/storage_manager.py +++ /dev/null @@ -1,253 +0,0 @@ -from __future__ import annotations - -import asyncio -import functools -import itertools -from collections import defaultdict -from dataclasses import asdict, dataclass, field -from pathlib import Path -from typing import TYPE_CHECKING, Final, NamedTuple - -import psutil -from pydantic import ByteSize - -from cyberdrop_dl import config -from cyberdrop_dl.exceptions import InsufficientFreeSpaceError -from cyberdrop_dl.utils.logger import log, log_debug - -if TYPE_CHECKING: - from collections.abc import Generator - - from psutil._ntuples import sdiskpart - - from cyberdrop_dl.data_structures.url_objects import MediaItem - from cyberdrop_dl.managers import Manager - - -@dataclass(frozen=True, slots=True, order=True) -class DiskPartition: - mountpoint: Path - device: Path = field(compare=False) - fstype: str = field(compare=False) - opts: str = field(compare=False) - - @staticmethod - def from_psutil(diskpart: sdiskpart) -> DiskPartition: - # Resolve converts any mapped drive to UNC paths (windows) - return DiskPartition( - Path(diskpart.mountpoint).resolve(), - Path(diskpart.device).resolve(), - diskpart.fstype, - diskpart.opts, - ) - - -class MountStats(NamedTuple): - partition: DiskPartition - free_space: ByteSize - - def __str__(self) -> str: - free_space = self.free_space.human_readable(decimal=True) - stats_as_dict = asdict(self.partition) | {"free_space": free_space} - return ", ".join(f"'{k}': '{v}'" for k, v in stats_as_dict.items()) - - -_CHECK_PERIOD: Final = 2 # how often the check_free_space_loop will run (in seconds) -_LOG_PERIOD: Final = 10 # log storage details every loops, AKA log every 20 (2x10) seconds, - - -class StorageManager: - """Runs an infinite loop to keep an updated value of the available space on all storage devices.""" - - def __init__(self, manager: Manager): - self.manager: Manager = manager - self.total_data_written: int = 0 - self._used_mounts: set[Path] = set() - self._free_space: dict[Path, int] = {} - self._mount_addition_locks: dict[Path, asyncio.Lock] = defaultdict(asyncio.Lock) - self._updated: asyncio.Event = asyncio.Event() - self._partitions = list(_get_disk_partitions()) - self._loop = asyncio.create_task(self._check_free_space_loop()) - self._unavailable_mounts: set[Path] = set() - - @property - def mounts(self) -> tuple[Path, ...]: - return tuple(p.mountpoint for p in self._partitions) - - @property - def _simplified_stats(self) -> str: - stats_as_str = "\n".join(f" {mount_stats!s}" for mount_stats in self._mount_stats()) - return f"Storage status:\n {stats_as_str}" - - def _mount_stats(self) -> Generator[MountStats]: - """Returns information of every used mount + its free space.""" - - for partition in self._partitions: - free_space = self._free_space.get(partition.mountpoint) - if free_space is not None: - yield MountStats(partition, ByteSize(free_space)) - - async def check_free_space(self, media_item: MediaItem) -> None: - """Checks if there is enough free space to download this item.""" - - if not await self._has_sufficient_space(media_item.download_folder): - raise InsufficientFreeSpaceError(origin=media_item) - - async def reset(self) -> None: - # This is causing lockups - # await self._updated.wait() # Make sure a query is not running right now - self.total_data_written = 0 - self._used_mounts = set() - self._free_space = {} - - async def close(self) -> None: - await self.reset() - try: - self._loop.cancel() - await self._loop - except asyncio.CancelledError: - pass - - async def _check_nt_network_drive(self, folder: Path) -> None: - """Checks is the drive of this folder is a Windows network drive (UNC or unknown mapped drive) and exists.""" - # See: https://github.com/jbsparrow/CyberDropDownloader/issues/860 - if not psutil.WINDOWS: - return - - # We can discard mapped drives because they would have been converted to UNC path at startup - # calling resolve on a mapped network drive returns its UNC path - # it would only still be a mapped drive is the network address is not available - is_mapped_drive = ":" in folder.drive and len(folder.drive) == 2 - is_unc_path = folder.drive.startswith("\\\\") - if is_mapped_drive or not is_unc_path: - return - - folder_drive = _drive_as_path(folder.drive) - async with self._mount_addition_locks[folder_drive]: - if folder_drive in itertools.chain(self._unavailable_mounts, self.mounts): - return - - msg = f"Checking new possible network_drive: '{folder_drive}' for folder '{folder}'" - log_debug(msg) - - try: - is_dir = await asyncio.to_thread(folder_drive.is_dir) - except OSError: - is_dir = False - - if is_dir: - net_drive = DiskPartition(folder_drive, folder_drive, "network_drive", "") - self._partitions.append(net_drive) - - else: - self._unavailable_mounts.add(folder_drive) - - async def _has_sufficient_space(self, folder: Path) -> bool: - """Checks if there is enough free space to download to this folder. - - `folder` must be an absolute path""" - - await self._check_nt_network_drive(folder) - mount = _get_mount_point(folder, self.mounts) - if not mount: - return False - - async with self._mount_addition_locks[mount]: - if mount not in self._free_space: - # Manually query this mount now. Next time it will be part of the loop - - self._free_space[mount] = await self._get_free_space(mount) - self._used_mounts.add(mount) - log(f"A new mountpoint ('{mount!s}') will be used for '{folder}'") - log(self._simplified_stats) - - free_space = self._free_space[mount] - if free_space == -1: - return True - return free_space > config.get().general.required_free_space - - async def _get_free_space(self, mount: Path) -> int: - exc_info = None - free_space = 0 - - try: - result = await asyncio.to_thread(psutil.disk_usage, str(mount)) - free_space = result.free - except OSError as e: - if "operation not supported" in str(e).casefold(): - exc_info = e - else: - raise - - if exc_info or (free_space == 0 and self._is_fuse_fs(mount)): - msg = f"Unable to get free space from mount point ('{mount!s}')'. Skipping free space check" - log(msg, 40, exc_info=exc_info) - free_space = -1 - - return free_space - - def _get_partition(self, mount: Path) -> DiskPartition | None: - for partition in self._partitions: - if mount.is_relative_to(partition.mountpoint): - return partition - - def _is_fuse_fs(self, mount: Path) -> bool: - if partition := self._get_partition(mount): - return "fuse" in partition.fstype - return False - - async def _check_free_space_loop(self) -> None: - """Infinite loop to get free space of all used mounts and update internal dict""" - - last_check = -1 - while True: - self._updated.clear() - last_check += 1 - if self._used_mounts: - used_mounts = sorted(mount for mount in self._used_mounts if self._free_space[mount] != -1) - tasks = [self._get_free_space(mount) for mount in used_mounts] - results = await asyncio.gather(*tasks) - for mount, free_space in zip(used_mounts, results, strict=True): - self._free_space[mount] = free_space - if last_check % _LOG_PERIOD == 0: - log_debug(self._simplified_stats) - - self._updated.set() - await asyncio.sleep(_CHECK_PERIOD) - - -@functools.lru_cache -def _get_mount_point(folder: Path, all_mounts: tuple[Path, ...]) -> Path | None: - # Cached for performance. - # It's not an expensive operation nor IO blocking, but it's very common for multiple files to share the same download folder - # ex: HLS downloads could have over a thousand segments. All of them will go to the same folder - assert folder.is_absolute() - possible_mountpoints = (mount for mount in all_mounts if folder.is_relative_to(mount)) - - # Get the closest mountpoint to `folder` - # mount_a = /home/user/ -> points to an internal SSD - # mount_b = /home/user/USB -> points to an external USB drive - # If `folder`` is `/home/user/USB/videos`, the correct mountpoint is mount_b - if mount_point := max(possible_mountpoints, key=lambda path: len(path.parts), default=None): - return mount_point - - # Mount point for this path does not exists - # This will only happen on Windows, ex: an USB drive (`D:`) that is not currently available (AKA disconnected) - # On Unix there's always at least 1 mountpoint, root (`/`) - msg = f"No available mountpoint found for '{folder}'" - msg += f"\n -> drive = '{_drive_as_path(folder.drive)}' , last_parent = '{folder.parents[-1]}'" - log(msg, 40) - - -def _drive_as_path(drive: str) -> Path: - is_mapped_drive = ":" in drive and len(drive) == 2 - return Path(f"{drive}/" if is_mapped_drive else drive) - - -def _get_disk_partitions() -> Generator[DiskPartition]: - for p in psutil.disk_partitions(all=True): - try: - yield DiskPartition.from_psutil(p) - except OSError as e: - msg = f"Unable to get information about {p.mountpoint}. All files with that mountpoint as target will be skipped: {e!r}" - log(msg, 40) diff --git a/cyberdrop_dl/storage.py b/cyberdrop_dl/storage.py new file mode 100644 index 000000000..be789e7fd --- /dev/null +++ b/cyberdrop_dl/storage.py @@ -0,0 +1,287 @@ +from __future__ import annotations + +import asyncio +import dataclasses +import functools +import logging +from collections import defaultdict +from contextvars import ContextVar +from pathlib import Path +from typing import TYPE_CHECKING, Final, Self + +import psutil +from pydantic import ByteSize + +from cyberdrop_dl.exceptions import InsufficientFreeSpaceError + +if TYPE_CHECKING: + from collections.abc import Awaitable, Callable, Generator + + from psutil._ntuples import sdiskpart + + from cyberdrop_dl.data_structures.url_objects import MediaItem + + +logger = logging.getLogger(__name__) + + +@dataclasses.dataclass(frozen=True, slots=True, order=True) +class DiskPartition: + mountpoint: Path + device: Path = dataclasses.field(compare=False) + fstype: str = dataclasses.field(compare=False) + opts: str = dataclasses.field(compare=False) + + @staticmethod + def from_psutil(diskpart: sdiskpart) -> DiskPartition: + # Resolve converts any mapped drive to UNC paths (windows) + return DiskPartition( + Path(diskpart.mountpoint).resolve(), + Path(diskpart.device).resolve(), + diskpart.fstype, + diskpart.opts, + ) + + +@dataclasses.dataclass(frozen=True, slots=True, order=True) +class DiskPartitionStats: + partition: DiskPartition + free_space: ByteSize + + def __str__(self) -> str: + free_space = self.free_space.human_readable(decimal=True) + stats_as_dict = dataclasses.asdict(self.partition) | {"free_space": free_space} + return ", ".join(f"'{k}': '{v}'" for k, v in stats_as_dict.items()) + + +_storage: ContextVar[StorageChecker] = ContextVar("_storage") +_PARTITIONS: list[DiskPartition] = [] +_UNAVAILABLE: set[Path] = set() +_LOCKS: dict[Path, asyncio.Lock] = defaultdict(asyncio.Lock) +_CHECK_PERIOD: Final = 2 # how often the check_free_space_loop will run (in seconds) +_LOG_PERIOD: Final = 10 # log storage details every loops, AKA log every 20 (2x10) seconds, + + +@dataclasses.dataclass(slots=True) +class StorageChecker: + """Runs an infinite loop to keep an updated value of the available space on all storage devices.""" + + required_free_space: int + _free_space_map: dict[Path, int] = dataclasses.field(init=False, default_factory=dict) + _loop: asyncio.Task[None] | None = None + + def __str__(self) -> str: + info = "\n".join(f" {stats!s}" for stats in self._partition_stats()) + return f"Storage status:\n {info}" + + def _partition_stats(self) -> Generator[DiskPartitionStats]: + for partition in partitions(): + free_space = self._free_space_map.get(partition.mountpoint) + if free_space is not None: + yield DiskPartitionStats(partition, ByteSize(free_space)) + + async def _has_sufficient_space(self, folder: Path) -> bool: + await _check_nt_network_drive(folder) + mount = _get_mount_point(folder) + if not mount: + return False + + if mount not in self._free_space_map: + async with _LOCKS[mount]: + if mount not in self._free_space_map: + # Manually query this mount now. Next time it will be part of the loop + + self._free_space_map[mount] = await get_free_space(mount) + logger.info(f"A new mountpoint ('{mount!s}') will be used for '{folder}'") + logger.info(self) + + if self._loop is None: + self._loop = asyncio.create_task(self._check_free_space_loop()) + + free_space = self._free_space_map[mount] + return free_space == -1 or free_space > self.required_free_space + + async def _check_free_space_loop(self) -> None: + """Infinite loop to get free space of all used mounts and update internal dict""" + + last_check = -1 + assert len(self._free_space_map) >= 1 + while True: + last_check += 1 + mountpoints = sorted(mount for mount, free_space in self._free_space_map.items() if free_space != -1) + if mountpoints: + results = await asyncio.gather(*(get_free_space(mount) for mount in mountpoints)) + self._free_space_map.update(zip(mountpoints, results, strict=True)) + + if last_check % _LOG_PERIOD == 0: + logger.debug(self) + + await asyncio.sleep(_CHECK_PERIOD) + + async def check_free_space(self, media_item: MediaItem) -> None: + """Checks if there is enough free space to download this item.""" + + if not await self._has_sufficient_space(media_item.download_folder): + raise InsufficientFreeSpaceError(origin=media_item) + + async def close(self) -> None: + self._free_space_map.clear() + if self._loop is None: + return + try: + _ = self._loop.cancel() + await self._loop + except asyncio.CancelledError: + pass + + async def __aenter__(self) -> Self: + _ = _storage.set(self) + return self + + async def __aexit__(self, *_) -> None: + await self.close() + + +@functools.lru_cache +def _get_mount_point(folder: Path) -> Path | None: + # Cached for performance. + # It's not an expensive operation nor IO blocking, but it's very common for multiple files to share the same download folder + # ex: HLS downloads could have over a thousand segments. All of them will go to the same folder + assert folder.is_absolute() + possible_mountpoints = (mount for mount in mountpoints() if folder.is_relative_to(mount)) + + # Get the closest mountpoint to `folder` + # mount_a = /home/user/ -> points to an internal SSD + # mount_b = /home/user/USB -> points to an external USB drive + # If `folder`` is `/home/user/USB/videos`, the correct mountpoint is mount_b + if mount_point := max(possible_mountpoints, key=lambda path: len(path.parts), default=None): + return mount_point + + # Mount point for this path does not exists + # This will only happen on Windows, ex: an USB drive (`D:`) that is not currently available (AKA disconnected) + # On Unix there's always at least 1 mountpoint, root (`/`) + msg = f"No available mountpoint found for '{folder}'" + msg += f"\n -> drive = '{_drive_as_path(folder.drive)}' , last_parent = '{folder.parents[-1]}'" + logger.error(msg) + + +def _drive_as_path(drive: str) -> Path: + is_mapped_drive = ":" in drive and len(drive) == 2 + return Path(f"{drive}/" if is_mapped_drive else drive) + + +def _get_disk_partitions() -> Generator[DiskPartition]: + for p in psutil.disk_partitions(all=True): + try: + yield DiskPartition.from_psutil(p) + except OSError as e: + logger.error( + f"Unable to get information about {p.mountpoint}. All files with that mountpoint as target will be skipped: {e!r}" + ) + + +def find_partition(path: Path) -> DiskPartition | None: + for partition in partitions(): + if path.is_relative_to(partition.mountpoint): + return partition + + +def is_fuse_fs(path: Path) -> bool: + if partition := find_partition(path): + return "fuse" in partition.fstype + return False + + +async def _check_nt_network_drive(folder: Path) -> None: + """Checks is the drive of this folder is a Windows network drive (UNC or unknown mapped drive) and exists.""" + # See: https://github.com/jbsparrow/CyberDropDownloader/issues/860 + if not psutil.WINDOWS: + return + + # We can discard mapped drives because they would have been converted to UNC path at startup + # calling resolve on a mapped network drive returns its UNC path + # it would only still be a mapped drive is the network address is not available + is_mapped_drive = ":" in folder.drive and len(folder.drive) == 2 + is_unc_path = folder.drive.startswith("\\\\") + if is_mapped_drive or not is_unc_path: + return + + folder_drive = _drive_as_path(folder.drive) + + if folder_drive in _UNAVAILABLE: + return + + mounts = mountpoints() + if folder_drive in mounts: + return + + async with _LOCKS[folder_drive]: + if folder_drive in _UNAVAILABLE or folder_drive in mounts: + return + + logger.debug(f"Checking new possible network_drive: '{folder_drive}' for folder '{folder}'") + + try: + is_dir = await asyncio.to_thread(folder_drive.is_dir) + except OSError: + is_dir = False + + if is_dir: + _PARTITIONS.append(DiskPartition(folder_drive, folder_drive, "network_drive", "")) + _get_mount_point.cache_clear() + + else: + _UNAVAILABLE.add(folder_drive) + + +async def get_free_space(path: Path) -> int: + unsupported = None + free_space = 0 + + try: + result = await asyncio.to_thread(psutil.disk_usage, str(path)) + free_space = result.free + except OSError as e: + if "operation not supported" not in str(e).casefold(): + raise + + unsupported = e + + if unsupported or (free_space == 0 and is_fuse_fs(path)): + logger.error( + f"Unable to get free space from mount point ('{path!s}')'. Skipping free space check", + exc_info=unsupported, + ) + free_space = -1 + + return free_space + + +def create_free_space_checker(media_item: MediaItem, *, frecuency: int = 5) -> Callable[[], Awaitable[None]]: + current_chunk = 0 + check = _storage.get().check_free_space + + async def check_every() -> None: + nonlocal current_chunk + if current_chunk % frecuency == 0: + await check(media_item) + current_chunk += 1 + + return check_every + + +def partitions() -> tuple[DiskPartition, ...]: + if not _PARTITIONS: + _PARTITIONS.extend(_get_disk_partitions()) + return tuple(_PARTITIONS) + + +def mountpoints() -> tuple[Path, ...]: + return tuple(p.mountpoint for p in partitions()) + + +def clear_cache() -> None: + _PARTITIONS.clear() + _UNAVAILABLE.clear() + _LOCKS.clear() + _get_mount_point.cache_clear() diff --git a/tests/conftest.py b/tests/conftest.py index 7a73f6e1c..7c65fe6a8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,8 +4,8 @@ from typing import TYPE_CHECKING import pytest -from cyberdrop_dl.scraper import scrape_mapper +from cyberdrop_dl import scrape_mapper from cyberdrop_dl.managers import Manager if TYPE_CHECKING: diff --git a/tests/test_storage.py b/tests/test_storage.py index 885ba8799..f1d74d4e3 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -1,45 +1,64 @@ +from __future__ import annotations + import asyncio import dataclasses -from collections.abc import AsyncGenerator from pathlib import Path +from typing import TYPE_CHECKING from unittest import mock import pytest -from cyberdrop_dl.managers import Manager -from cyberdrop_dl.managers.storage_manager import StorageManager +from cyberdrop_dl import storage +from cyberdrop_dl.storage import StorageChecker + +if TYPE_CHECKING: + from collections.abc import AsyncGenerator @pytest.fixture -async def storage(running_manager: Manager) -> AsyncGenerator[StorageManager]: - yield StorageManager(running_manager) +async def storag() -> AsyncGenerator[StorageChecker]: + async with StorageChecker(required_free_space=512_000_000) as m: + yield m -async def test_unsupported_fs_should_not_return_zero(storage: StorageManager) -> None: +async def test_unsupported_fs_should_not_return_zero() -> None: cwd = await asyncio.to_thread(Path().resolve) - free_space = await storage._get_free_space(cwd) + free_space = await storage.get_free_space(cwd) assert free_space > 0 with mock.patch("psutil.disk_usage", side_effect=OSError(None, "operation not supported")): - free_space = await storage._get_free_space(cwd) + free_space = await storage.get_free_space(cwd) assert free_space == -1 with mock.patch("psutil.disk_usage", side_effect=OSError(None, "another error")): with pytest.raises(OSError): - await storage._get_free_space(cwd) + _ = await storage.get_free_space(cwd) -async def test_fuse_filesystem_should_not_return_zero(storage: StorageManager) -> None: +async def test_fuse_filesystem_should_not_return_zero() -> None: cwd = await asyncio.to_thread(Path().resolve) - partition = storage._get_partition(cwd) + partition = storage.find_partition(cwd) assert partition - storage._partitions = [dataclasses.replace(partition, fstype="fuse")] + assert not storage.is_fuse_fs(cwd) + storage._PARTITIONS = [dataclasses.replace(partition, fstype="fuse")] # pyright: ignore[reportPrivateUsage] + assert storage.is_fuse_fs(cwd) - free_space = await storage._get_free_space(cwd) + free_space = await storage.get_free_space(cwd) assert free_space > 0 class NullUsage: free = 0 with mock.patch("psutil.disk_usage", return_value=NullUsage()): - free_space = await storage._get_free_space(cwd) + free_space = await storage.get_free_space(cwd) assert free_space == -1 + + +def test_storage_only_work_with_abs_paths() -> None: + cwd = Path() + assert storage.find_partition(cwd) is None + assert storage.find_partition(cwd.resolve()) + + with pytest.raises(AssertionError): + storage._get_mount_point(cwd) + + assert storage._get_mount_point(cwd.resolve()) From 3f393e8e69849e9b79f026f92152717befa3af6e Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Tue, 24 Feb 2026 14:52:18 -0500 Subject: [PATCH 21/23] refactor: update downloader --- cyberdrop_dl/clients/download_client.py | 213 ++++++++++---------- cyberdrop_dl/clients/hash_client.py | 4 +- cyberdrop_dl/clients/jdownloader.py | 6 +- cyberdrop_dl/clients/scraper_client.py | 4 +- cyberdrop_dl/config/settings.py | 85 +++++++- cyberdrop_dl/crawlers/crawler.py | 4 +- cyberdrop_dl/data_structures/url_objects.py | 6 +- cyberdrop_dl/director.py | 4 +- cyberdrop_dl/downloader/downloader.py | 12 +- cyberdrop_dl/downloader/mega_nz.py | 6 +- cyberdrop_dl/managers/__init__.py | 144 +------------ cyberdrop_dl/managers/_manager.py | 143 +++++++++++++ cyberdrop_dl/managers/client_manager.py | 71 ++----- cyberdrop_dl/scrape_mapper.py | 2 +- cyberdrop_dl/utils/markdown.py | 2 +- cyberdrop_dl/utils/utilities.py | 2 +- tests/test_database.py | 4 +- tests/test_startup.py | 2 - 18 files changed, 364 insertions(+), 350 deletions(-) create mode 100644 cyberdrop_dl/managers/_manager.py diff --git a/cyberdrop_dl/clients/download_client.py b/cyberdrop_dl/clients/download_client.py index e40ae8177..a9a8f568d 100644 --- a/cyberdrop_dl/clients/download_client.py +++ b/cyberdrop_dl/clients/download_client.py @@ -4,19 +4,21 @@ import contextlib import logging import os +import shutil import time from http import HTTPStatus from pathlib import Path from typing import TYPE_CHECKING, Any import aiofiles +import aiohttp +from aiolimiter import AsyncLimiter -from cyberdrop_dl import config, constants +from cyberdrop_dl import config, constants, storage from cyberdrop_dl.clients.response import AbstractResponse +from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, MediaItem from cyberdrop_dl.exceptions import DownloadError, InvalidContentTypeError, SlowDownloadError from cyberdrop_dl.utils import aio, dates -from cyberdrop_dl.utils.logger import log -from cyberdrop_dl.utils.utilities import get_size_or_none if TYPE_CHECKING: from collections.abc import AsyncGenerator, Callable, Coroutine, Mapping @@ -30,32 +32,75 @@ from cyberdrop_dl.progress._common import ProgressHook +class DownloadSpeedLimiter(AsyncLimiter): + __slots__ = () + + def __init__(self, max_rate: int) -> None: + super().__init__(max_rate, time_period=1) + + async def acquire(self, amount: float = 1) -> None: + if self.max_rate <= 0: + return + await super().acquire(amount) + + _CONTENT_TYPES_OVERRIDES: dict[str, str] = {"text/vnd.trolltech.linguist": "video/MP2T"} _SLOW_DOWNLOAD_PERIOD: int = 10 # seconds -_FREE_SPACE_CHECK_PERIOD: int = 5 # Check every 5 chunks _USE_IMPERSONATION: set[str] = {"vsco", "celebforum"} logger = logging.getLogger(__name__) -class DownloadClient: +class StreamDownloader: """Low level class to that performs the actual download + database updates""" - def __init__(self, manager: Manager, http_client: HttpClient) -> None: + def __init__(self, manager: Manager, http_client: HttpClient, config: config.Config) -> None: self.manager = manager self.http_client = http_client - self.download_speed_threshold = config.get().runtime_options.slow_download_speed + self.config = config + self._slow_download_threshold = config.runtime.slow_download_speed self._supports_ranges: bool = True + self.chunk_size: int = 1024 * 1024 * 10 # 10MB + if config.rate_limits.download_speed_limit: + self.chunk_size = min(self.chunk_size, config.rate_limits.download_speed_limit) + + self._speed_limiter: DownloadSpeedLimiter = DownloadSpeedLimiter(config.rate_limits.download_speed_limit) + + async def download(self, media_item: MediaItem) -> bool: + """Starts a file.""" + if self.config.download_options.skip_download_mark_completed and not media_item.is_segment: + logger.info(f"Download skipped {media_item.url} due to mark completed option", 10) + self.manager.progress_manager.files.add_skipped() + await self.mark_completed(media_item.domain, media_item) + return False + + downloaded = await self._download(media_item) + + if downloaded: + _ = await asyncio.to_thread(shutil.move, media_item.partial_file, media_item.complete_file) + if not media_item.is_segment: + has_valid_duration = await self.http_client.check_file_duration(media_item) + await self.manager.db_manager.history_table.add_filesize(media_item.domain, media_item) + if not has_valid_duration: + await asyncio.to_thread(media_item.complete_file.unlink) + logger.warning(f"Download deleted {media_item.url} due to runtime restrictions") + await self.mark_incomplete(media_item) + self.manager.progress_manager.files.add_skipped() + return False + + await self._finalize_download(media_item) + + return downloaded async def _download(self, media_item: MediaItem) -> bool: resume_point = 0 - if self._supports_ranges and (size := await asyncio.to_thread(get_size_or_none, media_item.partial_file)): + if self._supports_ranges and (size := await aio.get_size(media_item.partial_file)): resume_point = size media_item.headers["Range"] = f"bytes={size}-" - await asyncio.sleep(config.get().rate_limits.total_delay) - download_url = media_item.debrid_link or media_item.url + await asyncio.sleep(self.config.rate_limits.total_delay) + download_url = media_item.debrid_url or media_item.url async with self.__request_context(download_url, media_item.domain, media_item.headers) as resp: return await self._process_response(media_item, resume_point, resp) @@ -71,7 +116,7 @@ async def _process_response( _ = await self.http_client.check_http_status(resp, download=True) if not media_item.is_segment: - _ = _get_content_type(media_item.ext, resp.headers) + _ = _check_content_type(media_item.ext, resp.headers) media_item.filesize = int(resp.headers.get("Content-Length", "0")) or None if resp.status != HTTPStatus.PARTIAL_CONTENT: @@ -82,16 +127,16 @@ async def _process_response( and not media_item.timestamp and (last_modified := _get_last_modified(resp.headers)) ): - msg = f"Unable to parse upload date for {media_item.url}, using `Last-Modified` header as file datetime" - log(msg, 30) + logger.warning( + f"Unable to parse upload date for {media_item.url}, using `Last-Modified` header as file datetime" + ) media_item.timestamp = last_modified - size = (media_item.filesize + resume_point) if media_item.filesize is not None else None - - if not media_item.is_segment: - self.manager.progress_manager.downloads.new_hook(media_item.filename, size) + hook = self.manager.progress_manager.downloads.new_hook(media_item.filename, media_item.filesize) + if resume_point: + hook.advance(resume_point) - await self._append_content(media_item, self._get_resp_reader(resp)) + await self._read_stream(media_item, self._get_resp_reader(resp), hook) return True def _get_resp_reader( @@ -106,36 +151,47 @@ async def __request_context( self, url: AbsoluteHttpURL, domain: str, headers: dict[str, str] ) -> AsyncGenerator[AbstractResponse | aiohttp.ClientResponse]: if domain in _USE_IMPERSONATION: - resp = await self.http_client._curl_session.get(str(url), stream=True, headers=headers) + resp = await self.http_client.curl_session.get(str(url), stream=True, headers=headers) try: yield AbstractResponse.from_resp(resp) finally: await resp.aclose() return - async with self.http_client._download_session.get(url, headers=headers) as resp: + async with self.http_client.dl_session.get(url, headers=headers) as resp: yield resp - async def _append_content(self, media_item: MediaItem, content: aiohttp.StreamReader | AbstractResponse) -> None: + async def _read_stream( + self, + media_item: MediaItem, + content: aiohttp.StreamReader | AbstractResponse, + progress_hook: ProgressHook, + ) -> None: """Appends content to a file.""" - check_free_space = self.make_free_space_checker(media_item) + check_free_space = storage.create_free_space_checker(media_item) + await check_free_space() await self._pre_download_check(media_item) - with self.manager.progress_manager.downloads.current_hook as hook: - check_download_speed = self.make_speed_checker(hook) + empty = True + with progress_hook: + check_speed = self._create_speed_checker(progress_hook) async with aiofiles.open(media_item.partial_file, mode="ab") as f: - async for chunk in content.iter_chunked(self.http_client.speed_limiter.chunk_size): + async for chunk in content.iter_chunked(self.chunk_size): await check_free_space() - chunk_size = len(chunk) - await self.http_client.speed_limiter.acquire(chunk_size) - await f.write(chunk) - hook.advance(chunk_size) - check_download_speed() - - await self._post_download_check(media_item) + n_bytes = len(chunk) + await self._speed_limiter.acquire(n_bytes) + _ = await f.write(chunk) + if empty: + empty = not bool(n_bytes) + progress_hook.advance(n_bytes) + check_speed() + + if empty: + await aio.unlink(media_item.partial_file, missing_ok=True) + raise DownloadError(HTTPStatus.INTERNAL_SERVER_ERROR, "File is empty") def _pre_download_check(self, media_item: MediaItem) -> Coroutine[Any, Any, None]: def prepare() -> None: @@ -145,31 +201,15 @@ def prepare() -> None: return asyncio.to_thread(prepare) - async def _post_download_check(self, media_item: MediaItem, *_) -> None: - if not await aio.get_size(media_item.partial_file): - await aio.unlink(media_item.partial_file, missing_ok=True) - raise DownloadError(HTTPStatus.INTERNAL_SERVER_ERROR, message="File is empty") - - def make_free_space_checker(self, media_item: MediaItem) -> Callable[[], Coroutine[Any, Any, None]]: - current_chunk = 0 - - async def check_free_space() -> None: - nonlocal current_chunk - if current_chunk % _FREE_SPACE_CHECK_PERIOD == 0: - await self.manager.storage_manager.check_free_space(media_item) - current_chunk += 1 - - return check_free_space - - def make_speed_checker(self, hook: ProgressHook) -> Callable[[], None]: + def _create_speed_checker(self, hook: ProgressHook) -> Callable[[], None]: last_slow_speed_read = None def check_download_speed() -> None: nonlocal last_slow_speed_read - if not self.download_speed_threshold: + if not self._slow_download_threshold: return - if hook.speed() > self.download_speed_threshold: + if hook.speed() > self._slow_download_threshold: last_slow_speed_read = None elif not last_slow_speed_read: last_slow_speed_read = time.perf_counter() @@ -178,33 +218,6 @@ def check_download_speed() -> None: return check_download_speed - async def download_file(self, media_item: MediaItem) -> bool: - """Starts a file.""" - if config.get().download_options.skip_download_mark_completed and not media_item.is_segment: - log(f"Download skipped {media_item.url} due to mark completed option", 10) - self.manager.progress_manager.files.add_skipped() - await self.mark_completed(media_item.domain, media_item) - return False - - downloaded = await self._download(media_item) - - if downloaded: - _ = await asyncio.to_thread(media_item.partial_file.rename, media_item.complete_file) - if not media_item.is_segment: - has_valid_duration = await self.http_client.check_file_duration(media_item) - await self.manager.db_manager.history_table.add_duration(media_item.domain, media_item) - await self.manager.db_manager.history_table.add_filesize(media_item.domain, media_item) - if not has_valid_duration: - await asyncio.to_thread(media_item.complete_file.unlink) - logger.warning(f"Download deleted {media_item.url} due to runtime restrictions") - await self.mark_incomplete(media_item) - self.manager.progress_manager.files.add_skipped() - return False - - await self._finalize_download(media_item) - - return downloaded - async def _finalize_download(self, media_item: MediaItem) -> None: await asyncio.to_thread(Path.chmod, media_item.complete_file, 0o666) if media_item.is_segment: @@ -215,7 +228,7 @@ async def _finalize_download(self, media_item: MediaItem) -> None: self.manager.add_completed(media_item) await self.mark_completed(media_item.domain, media_item) await _set_file_datetime(media_item, media_item.complete_file) - log(f"Download finished: {media_item.url}") + logger.info(f"Download finished: {media_item.url}") """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" @@ -232,56 +245,32 @@ async def mark_completed(self, domain: str, media_item: MediaItem) -> None: def _check_filesize_limits(media: MediaItem) -> bool: """Checks if the file size is within the limits.""" - file_size_limits = config.get().file_size_limits - max_video_filesize = file_size_limits.maximum_video_size or float("inf") - min_video_filesize = file_size_limits.minimum_video_size - max_image_filesize = file_size_limits.maximum_image_size or float("inf") - min_image_filesize = file_size_limits.minimum_image_size - max_other_filesize = file_size_limits.maximum_other_size or float("inf") - min_other_filesize = file_size_limits.minimum_other_size - + limits = config.get().file_size_limits.ranges assert media.filesize is not None if media.ext in constants.FileFormats.IMAGE: - proceed = min_image_filesize < media.filesize < max_image_filesize - elif media.ext in constants.FileFormats.VIDEO: - proceed = min_video_filesize < media.filesize < max_video_filesize - else: - proceed = min_other_filesize < media.filesize < max_other_filesize + return media.filesize in limits.image + if media.ext in constants.FileFormats.VIDEO: + return media.filesize in limits.video + return media.filesize in limits.other - return proceed - -def _get_content_type(ext: str, headers: Mapping[str, str]) -> str | None: +def _check_content_type(ext: str, headers: Mapping[str, str]) -> None: content_type: str = headers.get("Content-Type", "") - content_length = headers.get("Content-Length") - if not content_type and not content_length: - msg = "No content type in response headers" - raise InvalidContentTypeError(message=msg) - if not content_type: - return None + return override_key = next((name for name in _CONTENT_TYPES_OVERRIDES if name in content_type), "") - override: str | None = _CONTENT_TYPES_OVERRIDES.get(override_key) - content_type = override or content_type - content_type = content_type.lower() - - if _is_html_or_text(content_type) and ext.lower() not in constants.FileFormats.TEXT: + content_type = (_CONTENT_TYPES_OVERRIDES.get(override_key) or content_type).lower() + if ("html" in content_type or "text" in content_type) and ext.lower() not in constants.FileFormats.TEXT: msg = f"Received '{content_type}', was expecting other" raise InvalidContentTypeError(message=msg) - return content_type - def _get_last_modified(headers: Mapping[str, str]) -> int | None: if date_str := headers.get("Last-Modified"): return dates.parse_http(date_str) -def _is_html_or_text(content_type: str) -> bool: - return any(s in content_type for s in ("html", "text")) - - async def _set_file_datetime(media_item: MediaItem, complete_file: Path) -> None: if media_item.is_segment: return diff --git a/cyberdrop_dl/clients/hash_client.py b/cyberdrop_dl/clients/hash_client.py index e4f477dee..3ab2be6e5 100644 --- a/cyberdrop_dl/clients/hash_client.py +++ b/cyberdrop_dl/clients/hash_client.py @@ -9,7 +9,6 @@ from cyberdrop_dl import config, constants from cyberdrop_dl.constants import Hashing -from cyberdrop_dl.ui.prompts.basic_prompts import enter_to_continue from cyberdrop_dl.utils.logger import log from cyberdrop_dl.utils.utilities import get_size_or_none @@ -23,7 +22,6 @@ def hash_directory_scanner(manager: Manager, path: Path) -> None: asyncio.run(_hash_directory_scanner_helper(manager, path)) - enter_to_continue() async def _hash_directory_scanner_helper(manager: Manager, path: Path) -> None: @@ -160,7 +158,7 @@ async def cleanup_dupes_after_download(self) -> None: return if not config.get().dupe_cleanup_options.auto_dedupe: return - if config.get().runtime_options.ignore_history: + if config.get().runtime.ignore_history: return with self.manager.live_manager.get_hash_live(stop=True): file_hashes_dict = await self.get_file_hashes_dict() diff --git a/cyberdrop_dl/clients/jdownloader.py b/cyberdrop_dl/clients/jdownloader.py index 4b01513bd..47e774a7e 100644 --- a/cyberdrop_dl/clients/jdownloader.py +++ b/cyberdrop_dl/clients/jdownloader.py @@ -32,14 +32,14 @@ class JDownloaderConfig: @staticmethod def from_config(config: Config) -> JDownloaderConfig: - download_dir = config.runtime_options.jdownloader_download_dir or config.files.download_folder + download_dir = config.runtime.jdownloader_download_dir or config.files.download_folder return JDownloaderConfig( - enabled=config.runtime_options.send_unsupported_to_jdownloader, + enabled=config.runtime.send_unsupported_to_jdownloader, device=config.auth.jdownloader.device, username=config.auth.jdownloader.username, password=config.auth.jdownloader.password, download_dir=download_dir.resolve(), - autostart=config.runtime_options.jdownloader_autostart, + autostart=config.runtime.jdownloader_autostart, ) diff --git a/cyberdrop_dl/clients/scraper_client.py b/cyberdrop_dl/clients/scraper_client.py index 4137dc2f4..f71725943 100644 --- a/cyberdrop_dl/clients/scraper_client.py +++ b/cyberdrop_dl/clients/scraper_client.py @@ -88,7 +88,7 @@ def __sync_session_cookies(self, url: AbsoluteHttpURL) -> None: The reverse (sync `aiohttp` -> `curl`) is not needed at the moment, so it is skipped """ now = time.time() - for cookie in self.client_manager._curl_session.cookies.jar: + for cookie in self.client_manager.curl_session.cookies.jar: simple_cookie = make_simple_cookie(cookie, now) self.client_manager.cookies.update_cookies(simple_cookie, url) @@ -107,7 +107,7 @@ async def __request_context( if impersonate is True: impersonate = "chrome" request_params["impersonate"] = impersonate - curl_resp = await self.client_manager._curl_session.request(method, str(url), stream=True, **request_params) + curl_resp = await self.client_manager.curl_session.request(method, str(url), stream=True, **request_params) try: yield AbstractResponse.from_resp(curl_resp) self.__sync_session_cookies(url) diff --git a/cyberdrop_dl/config/settings.py b/cyberdrop_dl/config/settings.py index e3d40d41e..ed38e96ef 100755 --- a/cyberdrop_dl/config/settings.py +++ b/cyberdrop_dl/config/settings.py @@ -1,4 +1,5 @@ # ruff: noqa: RUF012 +import dataclasses import random import re from datetime import date, datetime, timedelta @@ -15,6 +16,7 @@ NonNegativeInt, PositiveFloat, PositiveInt, + computed_field, field_serializer, field_validator, ) @@ -148,21 +150,82 @@ def delete_old_logs_and_folders(self, now: datetime | None = None) -> None: _ = purge_dir_tree(self.log_folder) +@dataclasses.dataclass(slots=True) +class Range: + min: float + max: float + + def __post_init__(self) -> None: + if not self.max: + self.max = float("inf") + + def __contains__(self, value: float, /) -> bool: + if not (self.min and self.max): + return True + return self.min <= value <= self.max + + +@dataclasses.dataclass(slots=True, frozen=True) +class FileSizeRanges: + video: Range + image: Range + other: Range + + class FileSizeLimits(SettingsGroup): - maximum_image_size: ByteSizeSerilized = ByteSize(0) - maximum_other_size: ByteSizeSerilized = ByteSize(0) - maximum_video_size: ByteSizeSerilized = ByteSize(0) - minimum_image_size: ByteSizeSerilized = ByteSize(0) - minimum_other_size: ByteSizeSerilized = ByteSize(0) - minimum_video_size: ByteSizeSerilized = ByteSize(0) + max_image_size: ByteSizeSerilized = ByteSize(0) + max_other_size: ByteSizeSerilized = ByteSize(0) + max_video_size: ByteSizeSerilized = ByteSize(0) + min_image_size: ByteSizeSerilized = ByteSize(0) + min_other_size: ByteSizeSerilized = ByteSize(0) + min_video_size: ByteSizeSerilized = ByteSize(0) + + @computed_field + @property + def ranges(self) -> FileSizeRanges: + return FileSizeRanges( + video=Range( + self.min_video_size, + self.max_video_size, + ), + image=Range( + self.min_image_size, + self.max_image_size, + ), + other=Range( + self.min_other_size, + self.max_other_size, + ), + ) + + +@dataclasses.dataclass(slots=True, frozen=True) +class MediaDurationRanges: + video: Range + audio: Range class MediaDurationLimits(SettingsGroup): - maximum_video_duration: timedelta = timedelta(seconds=0) - maximum_audio_duration: timedelta = timedelta(seconds=0) - minimum_video_duration: timedelta = timedelta(seconds=0) - minimum_audio_duration: timedelta = timedelta(seconds=0) + max_video_duration: timedelta = timedelta(seconds=0) + max_audio_duration: timedelta = timedelta(seconds=0) + min_video_duration: timedelta = timedelta(seconds=0) + min_audio_duration: timedelta = timedelta(seconds=0) + @computed_field + @property + def ranges(self) -> MediaDurationRanges: + return MediaDurationRanges( + video=Range( + self.min_video_duration.total_seconds(), + self.max_video_duration.total_seconds(), + ), + audio=Range( + self.min_audio_duration.total_seconds(), + self.max_audio_duration.total_seconds(), + ), + ) + + @cached_property @field_validator("*", mode="before") @staticmethod def parse_runtime_duration(input_date: timedelta | str | int | None) -> timedelta | str: @@ -396,6 +459,6 @@ class ConfigSettings(Settings): logs: Logs = Logs() media_duration_limits: MediaDurationLimits = MediaDurationLimits() rate_limits: RateLimiting = RateLimiting() - runtime_options: Runtime = Runtime() + runtime: Runtime = Runtime() sorting: Sorting = Sorting() ui_options: UIOptions = UIOptions() diff --git a/cyberdrop_dl/crawlers/crawler.py b/cyberdrop_dl/crawlers/crawler.py index 2122a97ca..89ca2c0d3 100644 --- a/cyberdrop_dl/crawlers/crawler.py +++ b/cyberdrop_dl/crawlers/crawler.py @@ -268,7 +268,7 @@ def allow_no_extension(self) -> bool: @property def deep_scrape(self) -> bool: - return config.get().runtime_options.deep_scrape + return config.get().runtime.deep_scrape def _init_downloader(self) -> Downloader: self.downloader = dl = Downloader(self.manager, self.DOMAIN) @@ -434,7 +434,7 @@ async def handle_file( original_filename=original_filename, ext=ext, ) - media_item.debrid_link = debrid_link + media_item.debrid_url = debrid_link media_item.headers = self._get_download_headers(media_item.referer) if metadata is not None: media_item.metadata = metadata diff --git a/cyberdrop_dl/data_structures/url_objects.py b/cyberdrop_dl/data_structures/url_objects.py index 8dc9c3834..bd607fec4 100644 --- a/cyberdrop_dl/data_structures/url_objects.py +++ b/cyberdrop_dl/data_structures/url_objects.py @@ -127,7 +127,7 @@ class MediaItem: filesize: int | None = None ext: str db_path: str - debrid_link: AbsoluteHttpURL | None = None + debrid_url: AbsoluteHttpURL | None = None duration: float | None = None is_segment: bool = False album_id: str | None = None @@ -152,6 +152,10 @@ def __post_init__(self) -> None: self.complete_file = self.download_folder / self.filename + @property + def real_url(self) -> AbsoluteHttpURL: + return self.debrid_url or self.url + @property def partial_file(self) -> Path: return self.complete_file.with_suffix(self.complete_file.suffix + ".part") diff --git a/cyberdrop_dl/director.py b/cyberdrop_dl/director.py index 9c582cbf4..c25b37f8c 100644 --- a/cyberdrop_dl/director.py +++ b/cyberdrop_dl/director.py @@ -128,7 +128,7 @@ def _setup_debug_logger(manager: Manager) -> Path | None: debug_logger = logging.getLogger("cyberdrop_dl_debug") log_level = 10 settings_data = config.get() - settings_data.runtime_options.log_level = log_level + settings_data.runtime.log_level = log_level debug_logger.setLevel(log_level) debug_log_file_path = Path(__file__).parents[1] / "cyberdrop_dl_debug.log" if env.DEBUG_LOG_FOLDER: @@ -156,7 +156,7 @@ def _setup_debug_logger(manager: Manager) -> Path | None: def _setup_main_logger(manager: Manager) -> None: logger = logging.getLogger("cyberdrop_dl") file_io = config.get().logs.main_log.open("w", encoding="utf8") - log_level = config.get().runtime_options.log_level + log_level = config.get().runtime.log_level logger.setLevel(log_level) logger.addHandler(constants.console_handler) diff --git a/cyberdrop_dl/downloader/downloader.py b/cyberdrop_dl/downloader/downloader.py index 5ccef2bc7..e85d6b29b 100644 --- a/cyberdrop_dl/downloader/downloader.py +++ b/cyberdrop_dl/downloader/downloader.py @@ -28,7 +28,7 @@ from collections.abc import Callable, Coroutine from cyberdrop_dl import config - from cyberdrop_dl.clients.download_client import DownloadClient + from cyberdrop_dl.clients.download_client import StreamDownloader from cyberdrop_dl.config import Config from cyberdrop_dl.data_structures.url_objects import MediaItem from cyberdrop_dl.managers import Manager @@ -74,12 +74,12 @@ def __init__( self, config: config.Config, manager: Manager, - client: DownloadClient, + client: StreamDownloader, slots: int, ) -> None: self.manager: Manager = manager self.config: Config = config - self.client: DownloadClient = client + self.client: StreamDownloader = client self.processed_items: set[str] = set() self.waiting_items: int = 0 self._current_attempt_filesize: dict[str, int] = {} @@ -116,7 +116,7 @@ async def _limiter(self, media_item: MediaItem): self.waiting_items += 1 await self.client.mark_incomplete(media_item) - server = (media_item.debrid_link or media_item.url).host + server = (media_item.debrid_url or media_item.url).host async with ( self._server_lock(media_item.domain, server), @@ -133,7 +133,7 @@ async def _limiter(self, media_item: MediaItem): logger.debug(f"Lock for {media_item.filename!r} released") async def run(self, media_item: MediaItem) -> bool: - if media_item.url.path in self.processed_items and not self.config.runtime_options.ignore_history: + if media_item.url.path in self.processed_items and not self.config.runtime.ignore_history: return False async with self._limiter(media_item): @@ -164,7 +164,7 @@ async def _download(self, media_item: MediaItem) -> bool | None: ) await self._check_file_can_download(media_item) - return await self.client.download_file(media_item) + return await self.client.download(media_item) except SkipDownloadError as e: if not media_item.is_segment: diff --git a/cyberdrop_dl/downloader/mega_nz.py b/cyberdrop_dl/downloader/mega_nz.py index d6c4de028..0f5664893 100644 --- a/cyberdrop_dl/downloader/mega_nz.py +++ b/cyberdrop_dl/downloader/mega_nz.py @@ -6,7 +6,7 @@ import aiofiles from mega.chunker import MegaChunker, get_chunks -from cyberdrop_dl.clients.download_client import DownloadClient +from cyberdrop_dl.clients.download_client import StreamDownloader from cyberdrop_dl.downloader.downloader import Downloader if TYPE_CHECKING: @@ -20,7 +20,7 @@ from cyberdrop_dl.managers import Manager -class MegaDownloadClient(DownloadClient): +class MegaDownloadClient(StreamDownloader): def __init__(self, manager: Manager) -> None: super().__init__(manager, manager.client_manager) self._decrypt_mapping: dict[URL, tuple[Crypto, int]] = {} @@ -31,7 +31,7 @@ async def _append_content(self, media_item: MediaItem, content: aiohttp.StreamRe assert media_item.task_id is not None check_free_space = self.make_free_space_checker(media_item) - check_download_speed = self.make_speed_checker(media_item) + check_download_speed = self._create_speed_checker(media_item) await check_free_space() await self._pre_download_check(media_item) diff --git a/cyberdrop_dl/managers/__init__.py b/cyberdrop_dl/managers/__init__.py index dd170e72a..9eded0be1 100644 --- a/cyberdrop_dl/managers/__init__.py +++ b/cyberdrop_dl/managers/__init__.py @@ -1,143 +1,3 @@ -from __future__ import annotations +from ._manager import Manager -import asyncio -import json -import logging -from dataclasses import field -from time import perf_counter -from typing import TYPE_CHECKING, NamedTuple - -from cyberdrop_dl import __version__, appdata, config, constants -from cyberdrop_dl.database import Database -from cyberdrop_dl.managers.client_manager import HttpClient -from cyberdrop_dl.managers.hash_manager import HashManager -from cyberdrop_dl.managers.live_manager import LiveManager -from cyberdrop_dl.managers.log_manager import LogManager -from cyberdrop_dl.managers.storage_manager import StorageManager -from cyberdrop_dl.progress import ProgressManager -from cyberdrop_dl.utils import ffmpeg -from cyberdrop_dl.utils.logger import LogHandler, QueuedLogger -from cyberdrop_dl.utils.utilities import close_if_defined, get_system_information - -if TYPE_CHECKING: - from asyncio import TaskGroup - from pathlib import Path - - from cyberdrop_dl.data_structures.url_objects import MediaItem - from cyberdrop_dl.scrape_mapper import ScrapeMapper - - -class AsyncioEvents(NamedTuple): - SHUTTING_DOWN: asyncio.Event - RUNNING: asyncio.Event - - -logger = logging.getLogger(__name__) - - -class Manager: - def __init__(self) -> None: - self.hash_manager: HashManager = field(init=False) - self.db_manager: Database = field(init=False) - self.client_manager: HttpClient = field(init=False) - self.storage_manager: StorageManager = field(init=False) - - self.progress_manager: ProgressManager = ProgressManager(self, portrait=False) - self.live_manager: LiveManager = field(init=False) - - self.task_group: TaskGroup = asyncio.TaskGroup() - self.scrape_mapper: ScrapeMapper = field(init=False) - - self.start_time: float = perf_counter() - self.loggers: dict[str, QueuedLogger] = {} - self.states: AsyncioEvents - - constants.console_handler = LogHandler(level=constants.CONSOLE_LEVEL) - - self.logs: LogManager = LogManager(config.get(), self.task_group) - log_app_state() - self._completed_downloads: set[MediaItem] = set() - self._completed_downloads_paths: set[Path] = set() - self._prev_downloads: set[MediaItem] = set() - self._prev_downloads_paths: set[Path] = set() - - def add_completed(self, media_item: MediaItem) -> None: - if media_item.is_segment: - return - self._completed_downloads.add(media_item) - self._completed_downloads_paths.add(media_item.complete_file) - - def add_prev(self, media_item: MediaItem) -> None: - self._prev_downloads.add(media_item) - self._prev_downloads_paths.add(media_item.complete_file) - - @property - def completed_downloads(self) -> set[MediaItem]: - return self._completed_downloads - - @property - def prev_downloads(self) -> set[MediaItem]: - return self._prev_downloads - - """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - - async def async_startup(self) -> None: - """Async startup process for the manager.""" - self.states = AsyncioEvents(asyncio.Event(), asyncio.Event()) - self.client_manager = HttpClient(self) - await self.client_manager.startup() - self.storage_manager = StorageManager(self) - - await self.async_db_hash_startup() - - constants.MAX_NAME_LENGTHS["FILE"] = config.get().general.max_file_name_length - constants.MAX_NAME_LENGTHS["FOLDER"] = config.get().general.max_folder_name_length - - async def async_db_hash_startup(self) -> None: - self.db_manager = Database( - appdata.get().db_file, - config.get().runtime_options.ignore_history, - ) - await self.db_manager.startup() - self.hash_manager = HashManager(self) - self.live_manager = LiveManager(self) - - async def async_db_close(self) -> None: - "Partial shutdown for managers used for hash directory scanner" - self.db_manager = await close_if_defined(self.db_manager) - self.hash_manager = constants.NOT_DEFINED - - async def close(self) -> None: - """Closes the manager.""" - self.states.RUNNING.clear() - - await self.async_db_close() - - self.client_manager = await close_if_defined(self.client_manager) - self.storage_manager = await close_if_defined(self.storage_manager) - - while self.loggers: - _, queued_logger = self.loggers.popitem() - queued_logger.stop() - - -def log_app_state() -> None: - auth = {} - - config_ = config.get() - app_data = appdata.get() - for site, auth_entries in config_.auth.model_dump().items(): # pyright: ignore[reportAny] - auth[site] = all(auth_entries.values()) # pyright: ignore[reportAny] - - # f"Using Input File: {self.path_manager.input_file}", - stats = dict( # noqa: C408 - version=__version__, - system=get_system_information(), - ffmpeg=ffmpeg.get_ffmpeg_version(), - ffprobe=ffmpeg.get_ffprobe_version(), - database=app_data.db_file, - config_file=config_.source, - auth=auth, - config=config_.model_dump_json(indent=2, exclude={"auth"}), - ) - logger.debug(json.dumps(stats, indent=2, ensure_ascii=False)) +__all__ = ["Manager"] diff --git a/cyberdrop_dl/managers/_manager.py b/cyberdrop_dl/managers/_manager.py new file mode 100644 index 000000000..04fdf019c --- /dev/null +++ b/cyberdrop_dl/managers/_manager.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +import asyncio +import json +import logging +from dataclasses import field +from time import perf_counter +from typing import TYPE_CHECKING, NamedTuple + +from cyberdrop_dl import __version__, appdata, config, constants +from cyberdrop_dl.database import Database +from cyberdrop_dl.managers.client_manager import HttpClient +from cyberdrop_dl.managers.hash_manager import HashManager +from cyberdrop_dl.managers.live_manager import LiveManager +from cyberdrop_dl.managers.log_manager import LogManager +from cyberdrop_dl.progress import ProgressManager +from cyberdrop_dl.storage import StorageChecker +from cyberdrop_dl.utils import ffmpeg +from cyberdrop_dl.utils.logger import LogHandler, QueuedLogger +from cyberdrop_dl.utils.utilities import close_if_defined, get_system_information + +if TYPE_CHECKING: + from asyncio import TaskGroup + from pathlib import Path + + from cyberdrop_dl.data_structures.url_objects import MediaItem + from cyberdrop_dl.scrape_mapper import ScrapeMapper + + +class AsyncioEvents(NamedTuple): + SHUTTING_DOWN: asyncio.Event + RUNNING: asyncio.Event + + +logger = logging.getLogger(__name__) + + +class Manager: + def __init__(self) -> None: + self.hash_manager: HashManager = field(init=False) + self.db_manager: Database = field(init=False) + self.client_manager: HttpClient = field(init=False) + self.storage_manager: StorageChecker = field(init=False) + + self.progress_manager: ProgressManager = ProgressManager(self, portrait=False) + self.live_manager: LiveManager = field(init=False) + + self.task_group: TaskGroup = asyncio.TaskGroup() + self.scrape_mapper: ScrapeMapper = field(init=False) + + self.start_time: float = perf_counter() + self.loggers: dict[str, QueuedLogger] = {} + self.states: AsyncioEvents + + constants.console_handler = LogHandler(level=constants.CONSOLE_LEVEL) + + self.logs: LogManager = LogManager(config.get(), self.task_group) + log_app_state() + self._completed_downloads: set[MediaItem] = set() + self._completed_downloads_paths: set[Path] = set() + self._prev_downloads: set[MediaItem] = set() + self._prev_downloads_paths: set[Path] = set() + + def add_completed(self, media_item: MediaItem) -> None: + if media_item.is_segment: + return + self._completed_downloads.add(media_item) + self._completed_downloads_paths.add(media_item.complete_file) + + def add_prev(self, media_item: MediaItem) -> None: + self._prev_downloads.add(media_item) + self._prev_downloads_paths.add(media_item.complete_file) + + @property + def completed_downloads(self) -> set[MediaItem]: + return self._completed_downloads + + @property + def prev_downloads(self) -> set[MediaItem]: + return self._prev_downloads + + """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" + + async def async_startup(self) -> None: + """Async startup process for the manager.""" + self.states = AsyncioEvents(asyncio.Event(), asyncio.Event()) + self.client_manager = HttpClient(self) + await self.client_manager.startup() + self.storage_manager = StorageChecker(self) + + await self.async_db_hash_startup() + + constants.MAX_NAME_LENGTHS["FILE"] = config.get().general.max_file_name_length + constants.MAX_NAME_LENGTHS["FOLDER"] = config.get().general.max_folder_name_length + + async def async_db_hash_startup(self) -> None: + self.db_manager = Database( + appdata.get().db_file, + config.get().runtime.ignore_history, + ) + await self.db_manager.startup() + self.hash_manager = HashManager(self) + self.live_manager = LiveManager(self) + + async def async_db_close(self) -> None: + "Partial shutdown for managers used for hash directory scanner" + self.db_manager = await close_if_defined(self.db_manager) + self.hash_manager = constants.NOT_DEFINED + + async def close(self) -> None: + """Closes the manager.""" + self.states.RUNNING.clear() + + await self.async_db_close() + + self.client_manager = await close_if_defined(self.client_manager) + self.storage_manager = await close_if_defined(self.storage_manager) + + while self.loggers: + _, queued_logger = self.loggers.popitem() + queued_logger.stop() + + +def log_app_state() -> None: + auth = {} + + config_ = config.get() + app_data = appdata.get() + for site, auth_entries in config_.auth.model_dump().items(): # pyright: ignore[reportAny] + auth[site] = all(auth_entries.values()) # pyright: ignore[reportAny] + + # f"Using Input File: {self.path_manager.input_file}", + stats = dict( # noqa: C408 + version=__version__, + system=get_system_information(), + ffmpeg=ffmpeg.get_ffmpeg_version(), + ffprobe=ffmpeg.get_ffprobe_version(), + database=app_data.db_file, + config_file=config_.source, + auth=auth, + config=config_.model_dump_json(indent=2, exclude={"auth"}), + ) + logger.debug(json.dumps(stats, indent=2, ensure_ascii=False)) diff --git a/cyberdrop_dl/managers/client_manager.py b/cyberdrop_dl/managers/client_manager.py index 95f206146..ba16fb4b3 100644 --- a/cyberdrop_dl/managers/client_manager.py +++ b/cyberdrop_dl/managers/client_manager.py @@ -15,14 +15,13 @@ from aiolimiter import AsyncLimiter from cyberdrop_dl import appdata, config, constants, ddos_guard, env -from cyberdrop_dl.clients.download_client import DownloadClient +from cyberdrop_dl.clients.download_client import StreamDownloader from cyberdrop_dl.clients.flaresolverr import FlareSolverr from cyberdrop_dl.clients.response import AbstractResponse from cyberdrop_dl.clients.scraper_client import ScraperClient from cyberdrop_dl.cookies import get_cookies_from_browsers, read_netscape_files from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, MediaItem from cyberdrop_dl.exceptions import DownloadError, ScrapeError -from cyberdrop_dl.managers import Manager from cyberdrop_dl.utils.ffmpeg import probe from cyberdrop_dl.utils.logger import log, log_debug, log_spacer @@ -60,26 +59,6 @@ _null_context = contextlib.nullcontext() -class DownloadSpeedLimiter(AsyncLimiter): - __slots__ = ("chunk_size",) - - def __init__(self, speed_limit: int) -> None: - self.chunk_size: int = 1024 * 1024 * 10 # 10MB - if speed_limit: - self.chunk_size = min(self.chunk_size, speed_limit) - super().__init__(speed_limit, 1) - - async def acquire(self, amount: float | None = None) -> None: - if self.max_rate <= 0: - return - if not amount: - amount = self.chunk_size - await super().acquire(amount) - - def __repr__(self): - return f"{type(self).__name__}(speed_limit={self.max_rate!r}, chunk_size={self.chunk_size!r})" - - def _create_ssl(): ssl_context = config.get().general.ssl_context @@ -110,22 +89,21 @@ def __init__(self, manager: Manager) -> None: self.global_rate_limiter: AsyncLimiter = AsyncLimiter(rate_limits.rate_limit, 1) self.scraper_client: ScraperClient = ScraperClient(self) - self.speed_limiter: DownloadSpeedLimiter = DownloadSpeedLimiter(rate_limits.download_speed_limit) - self.download_client: DownloadClient = DownloadClient(manager, self) + self.download_client: StreamDownloader = StreamDownloader(manager, self) self.flaresolverr: FlareSolverr = FlareSolverr(manager) self._session: aiohttp.ClientSession - self._download_session: aiohttp.ClientSession - self._curl_session: AsyncSession[CurlResponse] + self.dl_session: aiohttp.ClientSession + self.curl_session: AsyncSession[CurlResponse] self._json_response_checks: dict[str, Callable[[Any], None]] = {} def _startup(self) -> None: self._session = self.new_scrape_session() - self._download_session = self.new_download_session() + self.dl_session = self.new_download_session() if _curl_import_error is not None: return - self._curl_session = self.new_curl_cffi_session() + self.curl_session = self.new_curl_cffi_session() async def __aenter__(self) -> Self: self._startup() @@ -133,11 +111,11 @@ async def __aenter__(self) -> Self: async def __aexit__(self, *args) -> None: await self._session.close() - await self._download_session.close() + await self.dl_session.close() if _curl_import_error is not None: return try: - await self._curl_session.close() + await self.curl_session.close() except Exception: pass @@ -348,29 +326,13 @@ async def check_file_duration(self, media_item: MediaItem) -> bool: if not (is_video or is_audio): return True - duration_limits = config.get().media_duration_limits - min_video_duration: float = duration_limits.minimum_video_duration.total_seconds() - max_video_duration: float = duration_limits.maximum_video_duration.total_seconds() - min_audio_duration: float = duration_limits.minimum_audio_duration.total_seconds() - max_audio_duration: float = duration_limits.maximum_audio_duration.total_seconds() - video_duration_limits = min_video_duration, max_video_duration - audio_duration_limits = min_audio_duration, max_audio_duration - - if is_video and not any(video_duration_limits): - return True - if is_audio and not any(audio_duration_limits): - return True + duration_limits = config.get().media_duration_limits.ranges async def get_duration() -> float | None: - if media_item.duration: - return media_item.duration - if media_item.downloaded: properties = await probe(media_item.complete_file) - else: - headers = self.download_client._get_download_headers(media_item.domain, media_item.referer) - properties = await probe(media_item.url, headers=headers) + properties = await probe(media_item.url, headers=media_item.headers) if properties.format.duration: return properties.format.duration @@ -379,21 +341,18 @@ async def get_duration() -> float | None: if is_audio and properties.audio: return properties.audio.duration - duration: float | None = await get_duration() - media_item.duration = duration + if media_item.duration is None: + media_item.duration = await get_duration() - if duration is None: + if media_item.duration is None: return True await self.manager.db_manager.history_table.add_duration(media_item.domain, media_item) if is_video: - max_video_duration = max_video_duration or float("inf") - - return min_video_duration <= duration <= max_video_duration + return media_item.duration in duration_limits.video - max_audio_duration = max_audio_duration or float("inf") - return min_audio_duration <= duration <= max_audio_duration + return media_item.duration in duration_limits.audio async def close(self) -> None: await self.flaresolverr.close() diff --git a/cyberdrop_dl/scrape_mapper.py b/cyberdrop_dl/scrape_mapper.py index 6a1a68408..ec3329721 100644 --- a/cyberdrop_dl/scrape_mapper.py +++ b/cyberdrop_dl/scrape_mapper.py @@ -61,7 +61,7 @@ def __init__(self, manager: Manager) -> None: self.existing_crawlers: dict[str, Crawler] = {} self.direct_crawler = DirectHttpFile(self.manager) self.jdownloader = JDownloader.new(config.get()) - self.jdownloader_whitelist = config.get().runtime_options.jdownloader_whitelist + self.jdownloader_whitelist = config.get().runtime.jdownloader_whitelist self.using_input_file = False self.groups = set() self.count = 0 diff --git a/cyberdrop_dl/utils/markdown.py b/cyberdrop_dl/utils/markdown.py index 7abb69603..c508806ad 100644 --- a/cyberdrop_dl/utils/markdown.py +++ b/cyberdrop_dl/utils/markdown.py @@ -47,7 +47,7 @@ def _make_html_rows() -> list[MarkdownRowDict]: def _get_crawlers_info_cols_and_rows() -> tuple[list[str], Generator[tuple[str, ...]]]: - from cyberdrop_dl.scraper.scrape_mapper import get_unique_crawlers + from cyberdrop_dl.scrape_mapper import get_unique_crawlers def make_colunm(field_name: str) -> str: return field_name.replace("_", " ").title().replace("Url", "URL") diff --git a/cyberdrop_dl/utils/utilities.py b/cyberdrop_dl/utils/utilities.py index 0788697bc..314f12cc1 100644 --- a/cyberdrop_dl/utils/utilities.py +++ b/cyberdrop_dl/utils/utilities.py @@ -332,7 +332,7 @@ def purge_dir_tree(dirname: Path | str) -> bool: def check_partials_and_empty_folders(manager: Manager) -> None: """Checks for partial downloads, deletes partial files and empty folders.""" - settings = config.get().runtime_options + settings = config.get().runtime if settings.delete_partial_files: delete_partial_files(manager) if not settings.skip_check_for_partial_files: diff --git a/tests/test_database.py b/tests/test_database.py index 9610970a5..55eded9f8 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -5,10 +5,10 @@ from typing import TYPE_CHECKING, cast import pytest +from cyberdrop_dl.scraper import scrape_mapper from cyberdrop_dl.data_structures.url_objects import AbsoluteHttpURL, ScrapeItem -from cyberdrop_dl.scraper import scrape_mapper -from cyberdrop_dl.scraper.scrape_mapper import _create_item_from_row +from cyberdrop_dl.scrape_mapper import _create_item_from_row from cyberdrop_dl.utils.utilities import parse_url if TYPE_CHECKING: diff --git a/tests/test_startup.py b/tests/test_startup.py index 30ae20b69..524a54a6f 100644 --- a/tests/test_startup.py +++ b/tests/test_startup.py @@ -1,7 +1,6 @@ from pathlib import Path import pytest -from cyberdrop_dl.ui.program_ui import ProgramUI from cyberdrop_dl.main import run @@ -13,7 +12,6 @@ def test_startup(tmp_cwd: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest. def main_ui(*_) -> None: print(msg) - monkeypatch.setattr(ProgramUI, "__init__", main_ui) run(()) captured = capsys.readouterr() output = captured.out From 6b483d150b8f853bc6f84fd77b32fc8996d75bf0 Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Tue, 24 Feb 2026 16:17:09 -0500 Subject: [PATCH 22/23] fix: find partition --- cyberdrop_dl/config/settings.py | 8 ++------ cyberdrop_dl/progress/_common.py | 16 ++++++---------- cyberdrop_dl/storage.py | 29 +++++++++++++++-------------- tests/test_storage.py | 26 ++++++++++++++++++++++---- 4 files changed, 45 insertions(+), 34 deletions(-) diff --git a/cyberdrop_dl/config/settings.py b/cyberdrop_dl/config/settings.py index ed38e96ef..c734db738 100755 --- a/cyberdrop_dl/config/settings.py +++ b/cyberdrop_dl/config/settings.py @@ -16,7 +16,6 @@ NonNegativeInt, PositiveFloat, PositiveInt, - computed_field, field_serializer, field_validator, ) @@ -180,8 +179,7 @@ class FileSizeLimits(SettingsGroup): min_other_size: ByteSizeSerilized = ByteSize(0) min_video_size: ByteSizeSerilized = ByteSize(0) - @computed_field - @property + @cached_property def ranges(self) -> FileSizeRanges: return FileSizeRanges( video=Range( @@ -211,8 +209,7 @@ class MediaDurationLimits(SettingsGroup): min_video_duration: timedelta = timedelta(seconds=0) min_audio_duration: timedelta = timedelta(seconds=0) - @computed_field - @property + @cached_property def ranges(self) -> MediaDurationRanges: return MediaDurationRanges( video=Range( @@ -225,7 +222,6 @@ def ranges(self) -> MediaDurationRanges: ), ) - @cached_property @field_validator("*", mode="before") @staticmethod def parse_runtime_duration(input_date: timedelta | str | int | None) -> timedelta | str: diff --git a/cyberdrop_dl/progress/_common.py b/cyberdrop_dl/progress/_common.py index a91dd6e6e..6af184b6d 100644 --- a/cyberdrop_dl/progress/_common.py +++ b/cyberdrop_dl/progress/_common.py @@ -1,7 +1,8 @@ from __future__ import annotations import dataclasses -from typing import TYPE_CHECKING, Self +from types import MappingProxyType +from typing import TYPE_CHECKING, ClassVar, Self if TYPE_CHECKING: from collections.abc import Callable @@ -10,16 +11,8 @@ from rich.progress import TaskID -from types import MappingProxyType -from typing import TYPE_CHECKING, ClassVar - from rich.markup import escape -from rich.progress import ( - Progress, - ProgressColumn, - Task, - TaskID, -) +from rich.progress import Progress, ProgressColumn, Task, TaskID def truncate(s: str, length: int = 40, placeholder: str = "...") -> str: @@ -44,6 +37,9 @@ def __enter__(self) -> Self: def __exit__(self, *_) -> None: self.done() + def as_segment(self) -> ProgressHook: + return ProgressHook(self.advance, lambda: None, self.speed) + class ProgressProxy: _columns: ClassVar[tuple[ProgressColumn | str, ...]] diff --git a/cyberdrop_dl/storage.py b/cyberdrop_dl/storage.py index be789e7fd..5d1823e65 100644 --- a/cyberdrop_dl/storage.py +++ b/cyberdrop_dl/storage.py @@ -147,15 +147,8 @@ def _get_mount_point(folder: Path) -> Path | None: # Cached for performance. # It's not an expensive operation nor IO blocking, but it's very common for multiple files to share the same download folder # ex: HLS downloads could have over a thousand segments. All of them will go to the same folder - assert folder.is_absolute() - possible_mountpoints = (mount for mount in mountpoints() if folder.is_relative_to(mount)) - - # Get the closest mountpoint to `folder` - # mount_a = /home/user/ -> points to an internal SSD - # mount_b = /home/user/USB -> points to an external USB drive - # If `folder`` is `/home/user/USB/videos`, the correct mountpoint is mount_b - if mount_point := max(possible_mountpoints, key=lambda path: len(path.parts), default=None): - return mount_point + if partition := find_partition(folder): + return partition.mountpoint # Mount point for this path does not exists # This will only happen on Windows, ex: an USB drive (`D:`) that is not currently available (AKA disconnected) @@ -181,9 +174,17 @@ def _get_disk_partitions() -> Generator[DiskPartition]: def find_partition(path: Path) -> DiskPartition | None: - for partition in partitions(): - if path.is_relative_to(partition.mountpoint): - return partition + if not path.is_absolute(): + raise ValueError(f"{path!r} is not absolute") + + possible_partitions = (p for p in partitions() if path.is_relative_to(p.mountpoint)) + + # Get the closest mountpoint to `folder` + # mount_a = /home/user/ -> points to an internal SSD + # mount_b = /home/user/USB -> points to an external USB drive + # If `folder`` is `/home/user/USB/videos`, the correct mountpoint is mount_b + if partition := max(possible_partitions, key=lambda p: len(p.mountpoint.parts), default=None): + return partition def is_fuse_fs(path: Path) -> bool: @@ -261,13 +262,13 @@ def create_free_space_checker(media_item: MediaItem, *, frecuency: int = 5) -> C current_chunk = 0 check = _storage.get().check_free_space - async def check_every() -> None: + async def checker() -> None: nonlocal current_chunk if current_chunk % frecuency == 0: await check(media_item) current_chunk += 1 - return check_every + return checker def partitions() -> tuple[DiskPartition, ...]: diff --git a/tests/test_storage.py b/tests/test_storage.py index f1d74d4e3..56b99095f 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -55,10 +55,28 @@ class NullUsage: def test_storage_only_work_with_abs_paths() -> None: cwd = Path() - assert storage.find_partition(cwd) is None + with pytest.raises(ValueError): + _ = storage.find_partition(cwd) + assert storage.find_partition(cwd.resolve()) - with pytest.raises(AssertionError): - storage._get_mount_point(cwd) - assert storage._get_mount_point(cwd.resolve()) +async def test_find_partition_finds_the_correct_partition() -> None: + def part(path: str) -> storage.DiskPartition: + return storage.DiskPartition(Path(path), Path(path), "", "") + + root, home, usb, external_ssd = partitions = [ + part("/"), + part("/home"), + part("/mnt/USB"), + part("/home/external_SSD"), + ] + + storage._PARTITIONS = partitions # pyright: ignore[reportPrivateUsage] + + assert storage.find_partition(Path("/swap_file")) is root + assert storage.find_partition(Path("/home/user/.bash_rc")) is home + assert storage.find_partition(Path("/home/external_SSD/song.mp3")) is external_ssd + assert storage.find_partition(Path("mnt/USB")) is None + assert storage.find_partition(Path("/mnt/USB")) is usb + assert storage.find_partition(Path("/mnt")) is root From 18d97f99add429178b6e45a11cd0c9d8beb7d883 Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Tue, 24 Feb 2026 17:25:57 -0500 Subject: [PATCH 23/23] refactor: update option names --- cyberdrop_dl/clients/download_client.py | 4 +- cyberdrop_dl/clients/hash_client.py | 12 +++--- cyberdrop_dl/config/settings.py | 22 +++++----- cyberdrop_dl/crawlers/_forum.py | 6 +-- cyberdrop_dl/crawlers/crawler.py | 18 ++++---- cyberdrop_dl/crawlers/kemono.py | 4 +- cyberdrop_dl/downloader/downloader.py | 1 - cyberdrop_dl/managers/client_manager.py | 4 +- cyberdrop_dl/progress/hashing.py | 4 +- cyberdrop_dl/scrape_mapper.py | 6 +-- cyberdrop_dl/storage.py | 57 ++++++++++--------------- cyberdrop_dl/utils/utilities.py | 2 +- tests/test_hashing.py | 4 +- tests/test_storage.py | 16 ++++--- 14 files changed, 73 insertions(+), 87 deletions(-) diff --git a/cyberdrop_dl/clients/download_client.py b/cyberdrop_dl/clients/download_client.py index a9a8f568d..9f65836aa 100644 --- a/cyberdrop_dl/clients/download_client.py +++ b/cyberdrop_dl/clients/download_client.py @@ -69,7 +69,7 @@ def __init__(self, manager: Manager, http_client: HttpClient, config: config.Con async def download(self, media_item: MediaItem) -> bool: """Starts a file.""" - if self.config.download_options.skip_download_mark_completed and not media_item.is_segment: + if self.config.download.skip_download_mark_completed and not media_item.is_segment: logger.info(f"Download skipped {media_item.url} due to mark completed option", 10) self.manager.progress_manager.files.add_skipped() await self.mark_completed(media_item.domain, media_item) @@ -275,7 +275,7 @@ async def _set_file_datetime(media_item: MediaItem, complete_file: Path) -> None if media_item.is_segment: return - if config.get().download_options.disable_file_timestamps: + if config.get().download.disable_file_timestamps: return if not media_item.timestamp: diff --git a/cyberdrop_dl/clients/hash_client.py b/cyberdrop_dl/clients/hash_client.py index 3ab2be6e5..482a6d254 100644 --- a/cyberdrop_dl/clients/hash_client.py +++ b/cyberdrop_dl/clients/hash_client.py @@ -53,7 +53,7 @@ def _deleted_file_suffix(self) -> Literal["Sent to trash", "Permanently deleted" @property def dupe_cleanup_options(self) -> Dedupe: - return config.get().dupe_cleanup_options + return config.get().dedupe async def hash_directory(self, path: Path) -> None: path = Path(path) @@ -77,7 +77,7 @@ async def hash_item(self, media_item: MediaItem) -> None: async def hash_item_during_download(self, media_item: MediaItem) -> None: if media_item.is_segment: return - if config.get().dupe_cleanup_options.hashing != Hashing.IN_PLACE: + if config.get().dedupe.hashing != Hashing.IN_PLACE: return try: @@ -101,9 +101,9 @@ async def update_db_and_retrive_hash( return hash = await self._update_db_and_retrive_hash_helper(file, original_filename, referer, hash_type=self.xxhash) - if config.get().dupe_cleanup_options.add_md5_hash: + if config.get().dedupe.add_md5_hash: await self._update_db_and_retrive_hash_helper(file, original_filename, referer, hash_type=self.md5) - if config.get().dupe_cleanup_options.add_sha256_hash: + if config.get().dedupe.add_sha256_hash: await self._update_db_and_retrive_hash_helper(file, original_filename, referer, hash_type=self.sha256) return hash @@ -154,9 +154,9 @@ async def save_hash_data(self, media_item: MediaItem, hash: str | None) -> None: self.hashes_dict[hash][size].add(absolute_path) async def cleanup_dupes_after_download(self) -> None: - if config.get().dupe_cleanup_options.hashing == Hashing.OFF: + if config.get().dedupe.hashing == Hashing.OFF: return - if not config.get().dupe_cleanup_options.auto_dedupe: + if not config.get().dedupe.auto_dedupe: return if config.get().runtime.ignore_history: return diff --git a/cyberdrop_dl/config/settings.py b/cyberdrop_dl/config/settings.py index c734db738..011fc5cc7 100755 --- a/cyberdrop_dl/config/settings.py +++ b/cyberdrop_dl/config/settings.py @@ -1,10 +1,10 @@ # ruff: noqa: RUF012 import dataclasses +import logging import random import re from datetime import date, datetime, timedelta from functools import cached_property -from logging import DEBUG from pathlib import Path from typing import Literal @@ -60,20 +60,20 @@ def _validate_format(cls, value: str, valid_keys: set[str]) -> None: validate_format_string(value, valid_keys) -class DownloadOptions(FormatValidator, SettingsGroup): +class Downloads(FormatValidator, SettingsGroup): block_download_sub_folders: bool = False disable_file_timestamps: bool = False include_album_id_in_folder_name: bool = False include_thread_id_in_folder_name: bool = False - maximum_number_of_children: ListNonNegativeInt = [] + max_children: ListNonNegativeInt = [] remove_domains_from_folder_names: bool = False remove_generated_id_from_filenames: bool = False scrape_single_forum_post: bool = False separate_posts_format: NonEmptyStr = "{default}" separate_posts: bool = False skip_download_mark_completed: bool = False - maximum_thread_depth: NonNegativeInt = 0 - maximum_thread_folder_depth: NonNegativeInt | None = None + max_thread_depth: NonNegativeInt = 0 + max_thread_folder_depth: NonNegativeInt | None = None @field_validator("separate_posts_format", mode="after") @classmethod @@ -159,8 +159,6 @@ def __post_init__(self) -> None: self.max = float("inf") def __contains__(self, value: float, /) -> bool: - if not (self.min and self.max): - return True return self.min <= value <= self.max @@ -236,7 +234,7 @@ def parse_runtime_duration(input_date: timedelta | str | int | None) -> timedelt return to_timedelta(input_date) -class IgnoreOptions(SettingsGroup): +class Ignore(SettingsGroup): exclude_audio: bool = False exclude_images: bool = False exclude_other: bool = False @@ -270,7 +268,7 @@ class Runtime(SettingsGroup): jdownloader_autostart: bool = False jdownloader_download_dir: PathOrNone = None jdownloader_whitelist: ListNonEmptyStr = [] - log_level: NonNegativeInt = DEBUG + log_level: NonNegativeInt = logging.DEBUG send_unsupported_to_jdownloader: bool = False skip_check_for_empty_folders: bool = False skip_check_for_partial_files: bool = False @@ -445,13 +443,13 @@ class GenericCrawlerInstances(SettingsGroup): class ConfigSettings(Settings): browser_cookies: Cookies = Cookies() - download_options: DownloadOptions = DownloadOptions() - dupe_cleanup_options: Dedupe = Dedupe() + download: Downloads = Downloads() + dedupe: Dedupe = Dedupe() file_size_limits: FileSizeLimits = FileSizeLimits() files: Files = Files() general: General = General() generic_crawlers_instances: GenericCrawlerInstances = GenericCrawlerInstances() - ignore_options: IgnoreOptions = IgnoreOptions() + ignore: Ignore = Ignore() logs: Logs = Logs() media_duration_limits: MediaDurationLimits = MediaDurationLimits() rate_limits: RateLimiting = RateLimiting() diff --git a/cyberdrop_dl/crawlers/_forum.py b/cyberdrop_dl/crawlers/_forum.py index f595de7f2..604d9c8b2 100644 --- a/cyberdrop_dl/crawlers/_forum.py +++ b/cyberdrop_dl/crawlers/_forum.py @@ -209,17 +209,17 @@ async def login(self) -> None: @final @property def scrape_single_forum_post(self) -> bool: - return config.get().download_options.scrape_single_forum_post + return config.get().download.scrape_single_forum_post @final @property def max_thread_depth(self) -> int: - return config.get().download_options.maximum_thread_depth + return config.get().download.max_thread_depth @final @property def max_thread_folder_depth(self): - return config.get().download_options.maximum_thread_folder_depth + return config.get().download.max_thread_folder_depth async def fetch(self, scrape_item: ScrapeItem) -> None: if not self.logged_in and self.login_required is True: diff --git a/cyberdrop_dl/crawlers/crawler.py b/cyberdrop_dl/crawlers/crawler.py index 89ca2c0d3..eb693c430 100644 --- a/cyberdrop_dl/crawlers/crawler.py +++ b/cyberdrop_dl/crawlers/crawler.py @@ -264,7 +264,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: ... @final @property def allow_no_extension(self) -> bool: - return not config.get().ignore_options.exclude_files_with_no_extension + return not config.get().ignore.exclude_files_with_no_extension @property def deep_scrape(self) -> bool: @@ -491,15 +491,15 @@ async def handle_media_item(self, media_item: MediaItem, m3u8: m3u8.RenditionGro async def check_skip_by_config(self, media_item: MediaItem) -> bool: media_host = media_item.url.host - if (hosts := config.get().ignore_options.skip_hosts) and any(host in media_host for host in hosts): + if (hosts := config.get().ignore.skip_hosts) and any(host in media_host for host in hosts): log(f"Download skipped{media_item.url} due to skip_hosts config", 10) return True - if (hosts := config.get().ignore_options.only_hosts) and not any(host in media_host for host in hosts): + if (hosts := config.get().ignore.only_hosts) and not any(host in media_host for host in hosts): log(f"Download skipped{media_item.url} due to only_hosts config", 10) return True - if (regex := config.get().ignore_options.filename_regex_filter) and re.search(regex, media_item.filename): + if (regex := config.get().ignore.filename_regex_filter) and re.search(regex, media_item.filename): log(f"Download skipped{media_item.url} due to filename regex filter config", 10) return True @@ -577,13 +577,13 @@ def create_title(self, title: str, album_id: str | None = None, thread_id: int | title = "Untitled" title = title.strip() - if album_id and config.get().download_options.include_album_id_in_folder_name: + if album_id and config.get().download.include_album_id_in_folder_name: title = f"{title} {album_id}" - if thread_id and config.get().download_options.include_thread_id_in_folder_name: + if thread_id and config.get().download.include_thread_id_in_folder_name: title = f"{title} {thread_id}" - if not config.get().download_options.remove_domains_from_folder_names: + if not config.get().download.remove_domains_from_folder_names: title = f"{title} ({self.FOLDER_DOMAIN})" # Remove double spaces @@ -595,7 +595,7 @@ def create_title(self, title: str, album_id: str | None = None, thread_id: int | @property def separate_posts(self) -> bool: - return config.get().download_options.separate_posts + return config.get().download.separate_posts def create_separate_post_title( self, @@ -606,7 +606,7 @@ def create_separate_post_title( ) -> str: if not self.separate_posts: return "" - title_format = config.get().download_options.separate_posts_format + title_format = config.get().download.separate_posts_format if title_format.strip().casefold() == "{default}": title_format = self.DEFAULT_POST_TITLE_FORMAT if isinstance(date, int): diff --git a/cyberdrop_dl/crawlers/kemono.py b/cyberdrop_dl/crawlers/kemono.py index d8d0dde9d..0b3441c59 100644 --- a/cyberdrop_dl/crawlers/kemono.py +++ b/cyberdrop_dl/crawlers/kemono.py @@ -216,11 +216,11 @@ def session_cookie(self) -> str: @property def ignore_content(self) -> bool: - return config.get().ignore_options.ignore_coomer_post_content + return config.get().ignore.ignore_coomer_post_content @property def ignore_ads(self) -> bool: - return config.get().ignore_options.ignore_coomer_ads + return config.get().ignore.ignore_coomer_ads async def async_startup(self) -> None: if getattr(self, "API_ENTRYPOINT", None): diff --git a/cyberdrop_dl/downloader/downloader.py b/cyberdrop_dl/downloader/downloader.py index e85d6b29b..6ef111570 100644 --- a/cyberdrop_dl/downloader/downloader.py +++ b/cyberdrop_dl/downloader/downloader.py @@ -143,7 +143,6 @@ async def run(self, media_item: MediaItem) -> bool: return bool(await self._download(media_item)) async def _check_file_can_download(self, media_item: MediaItem) -> None: - await self.manager.storage_manager.check_free_space(media_item) if media_item.is_segment: return diff --git a/cyberdrop_dl/managers/client_manager.py b/cyberdrop_dl/managers/client_manager.py index ba16fb4b3..f377107ef 100644 --- a/cyberdrop_dl/managers/client_manager.py +++ b/cyberdrop_dl/managers/client_manager.py @@ -150,7 +150,7 @@ def basic_auth(username: str, password: str) -> str: def check_allowed_filetype(self, media_item: MediaItem) -> bool: """Checks if the file type is allowed to download.""" - ignore_options = config.get().ignore_options + ignore_options = config.get().ignore ext = media_item.ext.lower() if ext in constants.FileFormats.IMAGE and ignore_options.exclude_images: @@ -169,7 +169,7 @@ def check_allowed_date_range(self, media_item: MediaItem) -> bool: return True item_date = datetime.date() - ignore_options = config.get().ignore_options + ignore_options = config.get().ignore if ignore_options.exclude_before and item_date < ignore_options.exclude_before: return False diff --git a/cyberdrop_dl/progress/hashing.py b/cyberdrop_dl/progress/hashing.py index 28ac2f131..8ed0afef3 100644 --- a/cyberdrop_dl/progress/hashing.py +++ b/cyberdrop_dl/progress/hashing.py @@ -22,9 +22,9 @@ def _get_enabled_hashes(): yield "xxh128" - if config.get().dupe_cleanup_options.add_md5_hash: + if config.get().dedupe.add_md5_hash: yield "md5" - if config.get().dupe_cleanup_options.add_sha256_hash: + if config.get().dedupe.add_sha256_hash: yield "sha256" diff --git a/cyberdrop_dl/scrape_mapper.py b/cyberdrop_dl/scrape_mapper.py index ec3329721..84314b997 100644 --- a/cyberdrop_dl/scrape_mapper.py +++ b/cyberdrop_dl/scrape_mapper.py @@ -114,7 +114,7 @@ async def run(self) -> None: async def get_input_items(self, input_file) -> AsyncGenerator[ScrapeItem]: items_generator = self.load_links(input_file) - children_limits = config.get().download_options.maximum_number_of_children + children_limits = config.get().download.max_children async for item in items_generator: item.children_limits = children_limits @@ -254,12 +254,12 @@ def should_scrape(self, scrape_item: ScrapeItem) -> bool: log(f"Skipping {scrape_item.url} as it is a blocked domain", 10) return False - skip_hosts = config.get().ignore_options.skip_hosts + skip_hosts = config.get().ignore.skip_hosts if skip_hosts and is_in_domain_list(scrape_item, skip_hosts): log(f"Skipping URL by skip_hosts config: {scrape_item.url}", 10) return False - only_hosts = config.get().ignore_options.only_hosts + only_hosts = config.get().ignore.only_hosts if only_hosts and not is_in_domain_list(scrape_item, only_hosts): log(f"Skipping URL by only_hosts config: {scrape_item.url}", 10) return False diff --git a/cyberdrop_dl/storage.py b/cyberdrop_dl/storage.py index 5d1823e65..a76a4739c 100644 --- a/cyberdrop_dl/storage.py +++ b/cyberdrop_dl/storage.py @@ -17,8 +17,6 @@ if TYPE_CHECKING: from collections.abc import Awaitable, Callable, Generator - from psutil._ntuples import sdiskpart - from cyberdrop_dl.data_structures.url_objects import MediaItem @@ -32,16 +30,6 @@ class DiskPartition: fstype: str = dataclasses.field(compare=False) opts: str = dataclasses.field(compare=False) - @staticmethod - def from_psutil(diskpart: sdiskpart) -> DiskPartition: - # Resolve converts any mapped drive to UNC paths (windows) - return DiskPartition( - Path(diskpart.mountpoint).resolve(), - Path(diskpart.device).resolve(), - diskpart.fstype, - diskpart.opts, - ) - @dataclasses.dataclass(frozen=True, slots=True, order=True) class DiskPartitionStats: @@ -54,7 +42,7 @@ def __str__(self) -> str: return ", ".join(f"'{k}': '{v}'" for k, v in stats_as_dict.items()) -_storage: ContextVar[StorageChecker] = ContextVar("_storage") +_storage_checker: ContextVar[StorageChecker] = ContextVar("_storage") _PARTITIONS: list[DiskPartition] = [] _UNAVAILABLE: set[Path] = set() _LOCKS: dict[Path, asyncio.Lock] = defaultdict(asyncio.Lock) @@ -68,7 +56,7 @@ class StorageChecker: required_free_space: int _free_space_map: dict[Path, int] = dataclasses.field(init=False, default_factory=dict) - _loop: asyncio.Task[None] | None = None + _loop: asyncio.Task[None] | None = dataclasses.field(init=False, default=None) def __str__(self) -> str: info = "\n".join(f" {stats!s}" for stats in self._partition_stats()) @@ -96,12 +84,12 @@ async def _has_sufficient_space(self, folder: Path) -> bool: logger.info(self) if self._loop is None: - self._loop = asyncio.create_task(self._check_free_space_loop()) + self._loop = asyncio.create_task(self._start_loop()) free_space = self._free_space_map[mount] return free_space == -1 or free_space > self.required_free_space - async def _check_free_space_loop(self) -> None: + async def _start_loop(self) -> None: """Infinite loop to get free space of all used mounts and update internal dict""" last_check = -1 @@ -110,7 +98,7 @@ async def _check_free_space_loop(self) -> None: last_check += 1 mountpoints = sorted(mount for mount, free_space in self._free_space_map.items() if free_space != -1) if mountpoints: - results = await asyncio.gather(*(get_free_space(mount) for mount in mountpoints)) + results = await asyncio.gather(*map(get_free_space, mountpoints)) self._free_space_map.update(zip(mountpoints, results, strict=True)) if last_check % _LOG_PERIOD == 0: @@ -118,13 +106,17 @@ async def _check_free_space_loop(self) -> None: await asyncio.sleep(_CHECK_PERIOD) - async def check_free_space(self, media_item: MediaItem) -> None: + async def check(self, media_item: MediaItem) -> None: """Checks if there is enough free space to download this item.""" if not await self._has_sufficient_space(media_item.download_folder): raise InsufficientFreeSpaceError(origin=media_item) - async def close(self) -> None: + async def __aenter__(self) -> Self: + _ = _storage_checker.set(self) + return self + + async def __aexit__(self, *_) -> None: self._free_space_map.clear() if self._loop is None: return @@ -134,13 +126,6 @@ async def close(self) -> None: except asyncio.CancelledError: pass - async def __aenter__(self) -> Self: - _ = _storage.set(self) - return self - - async def __aexit__(self, *_) -> None: - await self.close() - @functools.lru_cache def _get_mount_point(folder: Path) -> Path | None: @@ -164,12 +149,18 @@ def _drive_as_path(drive: str) -> Path: def _get_disk_partitions() -> Generator[DiskPartition]: - for p in psutil.disk_partitions(all=True): + for diskpart in psutil.disk_partitions(all=True): try: - yield DiskPartition.from_psutil(p) + # Resolve converts any mapped drive to UNC paths (windows) + yield DiskPartition( + Path(diskpart.mountpoint).resolve(), + Path(diskpart.device).resolve(), + diskpart.fstype, + diskpart.opts, + ) except OSError as e: logger.error( - f"Unable to get information about {p.mountpoint}. All files with that mountpoint as target will be skipped: {e!r}" + f"Unable to get information about {diskpart.mountpoint}. All files with that mountpoint as target will be skipped: {e!r}" ) @@ -212,7 +203,7 @@ async def _check_nt_network_drive(folder: Path) -> None: if folder_drive in _UNAVAILABLE: return - mounts = mountpoints() + mounts = tuple(p.mountpoint for p in partitions()) if folder_drive in mounts: return @@ -260,7 +251,7 @@ async def get_free_space(path: Path) -> int: def create_free_space_checker(media_item: MediaItem, *, frecuency: int = 5) -> Callable[[], Awaitable[None]]: current_chunk = 0 - check = _storage.get().check_free_space + check = _storage_checker.get().check async def checker() -> None: nonlocal current_chunk @@ -277,10 +268,6 @@ def partitions() -> tuple[DiskPartition, ...]: return tuple(_PARTITIONS) -def mountpoints() -> tuple[Path, ...]: - return tuple(p.mountpoint for p in partitions()) - - def clear_cache() -> None: _PARTITIONS.clear() _UNAVAILABLE.clear() diff --git a/cyberdrop_dl/utils/utilities.py b/cyberdrop_dl/utils/utilities.py index 314f12cc1..c284e4c69 100644 --- a/cyberdrop_dl/utils/utilities.py +++ b/cyberdrop_dl/utils/utilities.py @@ -264,7 +264,7 @@ def get_download_path(manager: Manager, scrape_item: ScrapeItem, domain: str) -> def remove_file_id(filename: str, ext: str) -> tuple[str, str]: """Removes the additional string some websites adds to the end of every filename.""" original_filename = filename - if not config.get().download_options.remove_generated_id_from_filenames: + if not config.get().download.remove_generated_id_from_filenames: return original_filename, filename filename = filename.rsplit(ext, 1)[0] diff --git a/tests/test_hashing.py b/tests/test_hashing.py index 0bab7bf4e..1cc6a72a0 100644 --- a/tests/test_hashing.py +++ b/tests/test_hashing.py @@ -55,8 +55,8 @@ def test_hash_directory_scanner(manager: Manager, expected_results: set[tuple[st n_files = max(count.values()) algos = count.keys() assert len(expected_results) == len(algos) * n_files - config.get().dupe_cleanup_options.add_md5_hash = "md5" in algos - config.get().dupe_cleanup_options.add_sha256_hash = "sha256" in algos + config.get().dedupe.add_md5_hash = "md5" in algos + config.get().dedupe.add_sha256_hash = "sha256" in algos config.get().files.download_folder.mkdir(parents=True) db_path = appdata.get().db_file diff --git a/tests/test_storage.py b/tests/test_storage.py index 56b99095f..4a5c22ddc 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -62,9 +62,12 @@ def test_storage_only_work_with_abs_paths() -> None: async def test_find_partition_finds_the_correct_partition() -> None: - def part(path: str) -> storage.DiskPartition: + def part(path: str): return storage.DiskPartition(Path(path), Path(path), "", "") + def find(path: str): + return storage.find_partition(Path(path)) + root, home, usb, external_ssd = partitions = [ part("/"), part("/home"), @@ -74,9 +77,8 @@ def part(path: str) -> storage.DiskPartition: storage._PARTITIONS = partitions # pyright: ignore[reportPrivateUsage] - assert storage.find_partition(Path("/swap_file")) is root - assert storage.find_partition(Path("/home/user/.bash_rc")) is home - assert storage.find_partition(Path("/home/external_SSD/song.mp3")) is external_ssd - assert storage.find_partition(Path("mnt/USB")) is None - assert storage.find_partition(Path("/mnt/USB")) is usb - assert storage.find_partition(Path("/mnt")) is root + assert find("/swap_file") is root + assert find("/home/user/.bash_rc") is home + assert find("/home/external_SSD/song.mp3") is external_ssd + assert find("/mnt/USB") is usb + assert find("/mnt") is root