From 0f6ea68a195fa6bee671f8d35e65b54a1e49685e Mon Sep 17 00:00:00 2001 From: valerii Date: Mon, 26 Jan 2026 15:43:27 +0300 Subject: [PATCH 1/4] feat: properly include README and config.yaml.example in package - Add config.yaml.example to email_processor package directory - Configure package-data in pyproject.toml to include config.yaml.example - Update config.py to find config.yaml.example in package using importlib.resources - Keep MANIFEST.in for additional files (README.md, LICENSE) - Files are now accessible in installed package Fixes #32 --- email_processor/cli/commands/config.py | 23 ++++++++--- email_processor/config.yaml.example | 57 ++++++++++++++++++++++++++ pyproject.toml | 4 ++ 3 files changed, 79 insertions(+), 5 deletions(-) create mode 100644 email_processor/config.yaml.example diff --git a/email_processor/cli/commands/config.py b/email_processor/cli/commands/config.py index 9403387..afc740c 100644 --- a/email_processor/cli/commands/config.py +++ b/email_processor/cli/commands/config.py @@ -1,6 +1,7 @@ """Configuration management commands.""" import shutil +from importlib import resources from pathlib import Path from email_processor.cli.ui import CLIUI @@ -11,15 +12,27 @@ def _find_config_example() -> Path: - """Find config.yaml.example file, checking current directory. + """Find config.yaml.example file, checking package first, then current directory. Returns: Path to config.yaml.example file """ - # Check current directory (for development and installed package) - # Files from MANIFEST.in are in .dist-info directory - # TODO: Add support for finding config.yaml.example in package when included as package data - return Path(CONFIG_EXAMPLE) + # First, try to find in current directory (for development) + current_dir_path = Path(CONFIG_EXAMPLE) + if current_dir_path.exists(): + return current_dir_path + + # Try to find in package (for installed package) + try: + # Try to access from package data + with resources.path("email_processor", CONFIG_EXAMPLE) as pkg_path: + if pkg_path.exists(): + return Path(pkg_path) + except (ModuleNotFoundError, FileNotFoundError, TypeError): + # Fallback to current directory if not found in package + pass + + return current_dir_path def create_default_config(config_path: str, ui: CLIUI) -> int: diff --git a/email_processor/config.yaml.example b/email_processor/config.yaml.example new file mode 100644 index 0000000..ad15a12 --- /dev/null +++ b/email_processor/config.yaml.example @@ -0,0 +1,57 @@ +# Email Processor Configuration Example +# Copy this file to config.yaml and fill in your settings + +imap: + server: "imap.example.com" + user: "your_email@example.com" + max_retries: 5 + retry_delay: 3 + +# SMTP settings for sending emails +smtp: + server: "smtp.example.com" + port: 587 # or 465 for SSL + use_tls: true # for port 587 + use_ssl: false # for port 465 + user: "your_email@example.com" # SMTP login for authentication (reuse from imap.user or set separately) + from_address: "sender@example.com" # Required: email address to send from (From header) + default_recipient: "recipient@example.com" # default recipient email address (required) + max_email_size: 25 # MB + sent_files_dir: "sent_files" # directory for storing sent file hashes + send_folder: "send_folder" # Optional: default folder to send files from (can be overridden with --send-folder) + # Optional: subject templates + # subject_template: "File: {filename}" # template for single file + # subject_template_package: "Package of files - {date}" # template for multiple files + # Available variables: {filename}, {filenames}, {file_count}, {date}, {datetime}, {size}, {total_size} + +processing: + start_days_back: 5 + archive_folder: "INBOX/Processed" + processed_dir: "processed_uids" # Can be absolute or relative path + keep_processed_days: 180 + archive_only_mapped: true + skip_non_allowed_as_processed: true + skip_unmapped_as_processed: true + show_progress: true # Show progress bar during email processing (requires tqdm package) + # Extension filtering (optional): + # allowed_extensions: [".pdf", ".doc", ".docx", ".xls", ".xlsx", ".zip", ".txt"] # Only download these extensions + # blocked_extensions: [".exe", ".bat", ".sh", ".scr", ".vbs", ".js"] # Block these extensions (takes priority over allowed) + +# Logging settings +logging: + level: INFO # Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL + format: console # Output format for console: "console" (readable) or "json" (structured) + format_file: json # Output format for file logs: "console" (readable) or "json" (structured). Default: "json" + file: logs # Optional: Directory for log files (format: yyyy-mm-dd.log, rotated daily) + # If not set, logs go to stdout + +allowed_senders: + - "client1@example.com" + - "finance@example.com" + - "boss@example.com" + +topic_mapping: + ".*roadmap.*": "roadmap" + "(reports).*": "reports" + "(invoice).*": "invoices" + ".*": "default" # Last rule is used as default for unmatched emails diff --git a/pyproject.toml b/pyproject.toml index 6bdb0ee..5807f43 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,6 +4,10 @@ build-backend = "setuptools.build_meta" [tool.setuptools] packages = {find = {}} +include-package-data = true + +[tool.setuptools.package-data] +"email_processor" = ["config.yaml.example"] [project] name = "email-processor" From 27d77e9ebcfb3fdb9edbcce2f4499f74c1cf2b3d Mon Sep 17 00:00:00 2001 From: valerii Date: Mon, 26 Jan 2026 15:44:06 +0300 Subject: [PATCH 2/4] feat: include README.md in package data - Add README.md to email_processor package directory - Update package-data to include README.md - README.md is now accessible in installed package Fixes #32 --- email_processor/README.md | 803 ++++++++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- 2 files changed, 804 insertions(+), 1 deletion(-) create mode 100644 email_processor/README.md diff --git a/email_processor/README.md b/email_processor/README.md new file mode 100644 index 0000000..a986269 --- /dev/null +++ b/email_processor/README.md @@ -0,0 +1,803 @@ +# 📦 Email Attachment Processor +### (YAML + keyring + per-day UID storage + password management + modular architecture) + +[![PyPI](https://img.shields.io/pypi/v/email-processor)](https://pypi.org/project/email-processor/) +[![CI](https://github.com/KHolodilin/python-email-automation-processor/actions/workflows/ci.yml/badge.svg)](https://github.com/KHolodilin/python-email-automation-processor/actions/workflows/ci.yml) +[![Test Coverage](https://codecov.io/gh/KHolodilin/python-email-automation-processor/branch/main/graph/badge.svg)](https://codecov.io/gh/KHolodilin/python-email-automation-processor) +[![Python Version](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org/downloads/) +[![License](https://img.shields.io/github/license/KHolodilin/python-email-automation-processor)](LICENSE) +[![Stars](https://img.shields.io/github/stars/KHolodilin/python-email-automation-processor)](https://github.com/KHolodilin/python-email-automation-processor/stargazers) +[![Code style: Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) + +Email Processor is a reliable, idempotent, and secure tool for automatic email processing: +- **IMAP**: downloads attachments, organizes them into folders based on subject, archives processed emails +- **SMTP**: sends files via email with automatic tracking of sent files +- stores processed email UIDs in separate files by date +- uses keyring for secure password storage +- **command structure with subcommands support** +- **standardized exit codes** (`email_processor.exit_codes.ExitCode`) for scripting and automation +- **progress bar** for long-running operations +- **file extension filtering** (whitelist/blacklist) +- **disk space checking** before downloads +- **structured logging** with file output +- **dry-run mode** for testing +--- + +# 🚀 Key Features + +### 🔐 Secure IMAP Password Management +- Password is not stored in code or YAML +- Saved in system storage (**Windows Credential Manager**, **macOS Keychain**, **Linux SecretService**) +- **Passwords are encrypted** before storing in keyring using system-based key derivation +- Encryption key is generated from system characteristics (MAC address, hostname, user ID) - never stored +- On first run, the script will prompt for password and offer to save it +- Backward compatible: automatically migrates unencrypted passwords on next save + +### ⚙️ Configuration via `config.yaml` +- **IMAP**: Download folder management, subject-based sorting rules (`topic_mapping`), allowed sender management, archive settings +- **SMTP**: Server settings, default recipient, email size limits, subject templates +- Behavior options ("process / skip / archive") +- File extension filtering (whitelist/blacklist) +- Progress bar control +- Structured logging configuration + +### ⚡ Fast Two-Phase IMAP Fetch +1. Fast header fetch: `FROM SUBJECT DATE UID` +2. Full email (`RFC822`) is loaded **only if it matches the logic** + +### 📁 Optimized Processed Email Storage +Each email's UID is saved in: + +``` +processed_uids/YYYY-MM-DD.txt +``` + +This ensures: + +- 🔥 fast lookup of already processed UIDs +- ⚡ minimal memory usage +- 📉 no duplicate downloads +- 📁 convenient rotation of old records + +--- + +# 🚀 Quick Start + +## Installation and Initial Setup + +### 1. Install the module +```bash +pip install email-processor +``` + +### 2. Create Configuration +```bash +# Create configuration file from template +python -m email_processor config init + +# Edit config.yaml with your IMAP/SMTP settings +``` + +### 3. Set Password +```bash +# Set IMAP password (will be prompted interactively) +# --user can be omitted if imap.user is set in config.yaml +python -m email_processor password set --user your_email@example.com +python -m email_processor password set # uses imap.user from config + +# Or from file +python -m email_processor password set --user your_email@example.com --password-file ~/.pass --delete-after-read +``` + +### 4. Validate Configuration +```bash +# Validate configuration +python -m email_processor config validate + +# View system status +python -m email_processor status +``` + +### 5. Fetch (download emails and attachments) +Uses config by default (IMAP server, folder, processing options). +```bash +# Test mode (no real actions) +python -m email_processor fetch --dry-run + +# Run fetch +python -m email_processor fetch +``` + +### 6. Send (email files) +```bash +# Send a single file +python -m email_processor send file /path/to/file.pdf --to recipient@example.com +``` + +### 7. Send All Files from Folder +Uses config by default (`smtp.send_folder`, `smtp.default_recipient`). +```bash +# Send from folder (config defaults) +python -m email_processor send +# Or explicitly: +python -m email_processor send folder +``` + +### 8. Full pipeline: fetch + send +```bash +python -m email_processor run +``` + +--- + +# 🎯 Usage + +## Main Commands + +### Email Processing + +#### Full Pipeline (fetch + send) +```bash +# Process emails and send files +python -m email_processor run + +# With limitations +python -m email_processor run --since 7d --max-emails 100 +``` + +#### Email Fetching Only (without sending) +Uses config (IMAP, processing) by default. +```bash +# Fetch emails and attachments +python -m email_processor fetch + +# Process emails from last 7 days +python -m email_processor fetch --since 7d + +# Process specific folder +python -m email_processor fetch --folder "INBOX/Important" + +# Limit number of emails +python -m email_processor fetch --max-emails 50 + +# Test mode (without real actions) +python -m email_processor fetch --dry-run + +# Test mode with mock server (without connection) +python -m email_processor fetch --dry-run-no-connect +``` + +### Sending Files via Email + +#### Send Single File +```bash +# Send file (--to is required) +python -m email_processor send file /path/to/file.pdf --to recipient@example.com + +# With custom subject +python -m email_processor send file file.pdf --to user@example.com --subject "Important Document" + +# With CC and BCC +python -m email_processor send file file.pdf --to user@example.com --cc copy@example.com --bcc hidden@example.com + +# Test mode (without real sending) +python -m email_processor send file file.pdf --to user@example.com --dry-run +``` + +#### Send All Files from Folder +```bash +# With config defaults (smtp.send_folder, smtp.default_recipient) +python -m email_processor send +# Or explicitly: +python -m email_processor send folder + +# Explicit path and recipient +python -m email_processor send folder /path/to/folder --to recipient@example.com + +# With custom subject +python -m email_processor send folder /path/to/folder --to user@example.com --subject "File Package" +``` + +**Notes:** +- Files are tracked by SHA256 hash, so renamed files with the same content won't be sent again +- Already sent files are automatically skipped + +### Password Management + +#### Set Password +```bash +# Interactive password input +# --user is optional when imap.user is in config.yaml +python -m email_processor password set --user your_email@example.com +python -m email_processor password set # uses imap.user from config + +# From file (file will be deleted after reading) +python -m email_processor password set --user your_email@example.com --password-file ~/.pass --delete-after-read +``` + +#### Clear Password +```bash +# Delete saved password (--user optional if imap.user in config) +python -m email_processor password clear --user your_email@example.com +python -m email_processor password clear # uses imap.user from config +``` + +### Configuration Management + +#### Create Configuration +```bash +# Create config.yaml from template +python -m email_processor config init + +# With custom path +python -m email_processor config init --path /path/to/custom_config.yaml +``` + +#### Validate Configuration +```bash +# Validate configuration +python -m email_processor config validate + +# With custom file +python -m email_processor config validate --config /path/to/config.yaml +``` + +### View Status +```bash +# Show system status +python -m email_processor status +``` + +Shows: +- Application version +- Configuration path +- IMAP/SMTP settings +- Keyring availability +- Storage statistics + +### Global Options + +All commands support the following options: + +```bash +# Specify configuration file +--config /path/to/config.yaml + +# Test mode (without real actions) +--dry-run + +# Logging level +--log-level DEBUG|INFO|WARNING|ERROR + +# Log file path +--log-file /path/to/logs/app.log + +# JSON log format +--json-logs + +# Verbose output +--verbose + +# Quiet mode (errors only) +--quiet + +# Version +--version +``` + +### Option Combination Examples + +```bash +# Verbose output with DEBUG logging +python -m email_processor fetch --verbose --log-level DEBUG + +# Test mode with JSON logs +python -m email_processor run --dry-run --json-logs + +# Processing with limitations and logging +python -m email_processor fetch --since 3d --max-emails 20 --log-file logs/run.log +``` + +--- + +## Exit Codes + +The CLI uses standardized exit codes to provide clear error reporting and enable proper error handling in scripts and automation tools. All exit codes are defined in the `ExitCode` enum in `email_processor.exit_codes`. The `main()` entry point and all CLI commands return `ExitCode` values (or exit with them); as an `IntEnum`, they compare equal to their integer values (e.g. `ExitCode.SUCCESS == 0`). + +### Standard Exit Codes + +| Code | Constant | Description | +|------|----------|-------------| +| `0` | `SUCCESS` | Operation completed successfully | +| `1` | `PROCESSING_ERROR` | Errors during extraction, parsing, mapping, or write operations | +| `2` | `VALIDATION_FAILED` | Input validation errors (e.g., invalid arguments, email format) | +| `3` | `FILE_NOT_FOUND` | Requested file or directory does not exist | +| `4` | `UNSUPPORTED_FORMAT` | Cannot detect or process the requested format (e.g., authentication/keyring errors) | +| `5` | `WARNINGS_AS_ERRORS` | Warnings were treated as errors (when `--fail-on-warnings` is enabled) | +| `6` | `CONFIG_ERROR` | Errors loading or validating configuration file | + +### Usage in Scripts + +You can use exit codes in shell scripts to handle different error scenarios: + +```bash +#!/bin/bash + +# Run email processor +python -m email_processor run + +# Check exit code +case $? in + 0) + echo "Success: Emails processed successfully" + ;; + 1) + echo "Error: Processing failed" + exit 1 + ;; + 2) + echo "Error: Invalid arguments or validation failed" + exit 1 + ;; + 3) + echo "Error: File not found" + exit 1 + ;; + 6) + echo "Error: Configuration file error" + exit 1 + ;; + *) + echo "Error: Unknown error" + exit 1 + ;; +esac +``` + +### Python Script Example + +```python +import subprocess +from email_processor.exit_codes import ExitCode + +result = subprocess.run( + ["python", "-m", "email_processor", "run"], + capture_output=True +) + +if result.returncode == ExitCode.SUCCESS: + print("Processing completed successfully") +elif result.returncode == ExitCode.CONFIG_ERROR: + print("Configuration error - check config.yaml") +elif result.returncode == ExitCode.PROCESSING_ERROR: + print("Processing error occurred") +else: + print(f"Unexpected exit code: {result.returncode}") +``` + +### Common Exit Code Scenarios + +- **`0` (SUCCESS)**: Command executed successfully +- **`1` (PROCESSING_ERROR)**: IMAP/SMTP processing failed, send/archive error, or write error +- **`2` (VALIDATION_FAILED)**: Invalid email address, missing required arguments, or invalid command +- **`3` (FILE_NOT_FOUND)**: Configuration file not found, password file not found, or target file/directory missing +- **`4` (UNSUPPORTED_FORMAT)**: Authentication/keyring error or unsupported format +- **`6` (CONFIG_ERROR)**: Configuration file syntax error, validation failure, or missing required settings + +--- + +## 🔒 Password Encryption + +Passwords stored in keyring are encrypted using a system-based encryption key: + +### How It Works +- **Encryption key** is generated from system characteristics: + - MAC address of network interface + - Hostname + - User ID (Windows SID / Linux UID) + - Config file path hash + - Python version +- **Key is never stored** - computed dynamically each time +- **PBKDF2-HMAC-SHA256** with 100,000 iterations for key derivation +- **Fernet (AES-128)** encryption for passwords + +### Security Benefits +- ✅ Passwords encrypted even if keyring is compromised +- ✅ Key cannot be stolen (not stored anywhere) +- ✅ Automatic operation (no user input required) +- ✅ Backward compatible with existing unencrypted passwords + +### Limitations +- ⚠️ System changes (MAC address, hostname, user) require password re-entry +- ⚠️ Cannot transfer passwords to another system +- ⚠️ System reinstall requires password re-entry + +### Migration +- Old unencrypted passwords are automatically encrypted on next save +- If decryption fails (system changed), you'll be prompted to re-enter password + +--- + +# ⚡ Implementation Benefits + +### ⚡ Time Savings +Duplicate emails are skipped instantly. + +### ⚡ Reduced IMAP Server Load +Minimal IMAP operations, partial fetch. + +### ⚡ No Duplicate Attachment Downloads +Each attachment is downloaded only once. + +### ⚡ No File Duplicates +Automatic numbering is used: `file_01.pdf`, `file_02.pdf`. + +### ⚡ Absolute Idempotency +Can be run 20 times in a row — result doesn't change. + +### ⚡ Scalability +Per-day UID files ensure high performance. + +--- + +# ⚙ Example config.yaml + +```yaml +imap: + server: "imap.example.com" + user: "your_email@example.com" + max_retries: 5 + retry_delay: 3 + +# SMTP settings for sending emails +smtp: + server: "smtp.example.com" + port: 587 # or 465 for SSL + use_tls: true # for port 587 + use_ssl: false # for port 465 + user: "your_email@example.com" # reuse from imap.user or set separately + default_recipient: "recipient@example.com" + max_email_size: 25 # MB + sent_files_dir: "sent_files" # directory for storing sent file hashes + # Optional: subject templates + # subject_template: "File: {filename}" # template for single file + # subject_template_package: "Package of files - {date}" # template for multiple files + # Available variables: {filename}, {filenames}, {file_count}, {date}, {datetime}, {size}, {total_size} + +processing: + start_days_back: 5 + archive_folder: "INBOX/Processed" + processed_dir: "C:\\Users\\YourName\\AppData\\EmailProcessor\\processed_uids" + keep_processed_days: 180 + archive_only_mapped: true + skip_non_allowed_as_processed: true + skip_unmapped_as_processed: true + show_progress: true # Show progress bar during processing + # Extension filtering (optional): + # allowed_extensions: [".pdf", ".doc", ".docx", ".xls", ".xlsx", ".zip", ".txt"] + # blocked_extensions: [".exe", ".bat", ".sh", ".scr", ".vbs", ".js"] + +# Logging settings +logging: + level: INFO # DEBUG, INFO, WARNING, ERROR, CRITICAL + format: console # "console" (readable) or "json" (structured) + format_file: json # Format for file logs (default: "json") + file: logs # Optional: Directory for log files (rotated daily) + +allowed_senders: + - "client1@example.com" + - "finance@example.com" + - "boss@example.com" + +topic_mapping: + ".*Roadmap.*": "roadmap" + "(Report).*": "reports" + "(Invoice|Bill).*": "invoices" + ".*": "default" # Last rule is used as default for unmatched emails +``` + +### SMTP Configuration Details + +**Required settings:** +- `smtp.server`: SMTP server hostname +- `smtp.port`: SMTP server port (typically 587 for TLS or 465 for SSL) +- `smtp.default_recipient`: Default recipient email address + +**Optional settings:** +- `smtp.user`: SMTP username (defaults to `imap.user` if not specified) +- `smtp.use_tls`: Use TLS encryption (default: `true` for port 587) +- `smtp.use_ssl`: Use SSL encryption (default: `false`, use for port 465) +- `smtp.max_email_size`: Maximum email size in MB (default: `25`) +- `smtp.sent_files_dir`: Directory for storing sent file hashes (default: `"sent_files"`) +- `smtp.send_folder`: Default folder to send files from (optional, can be overridden with `send folder` command) +- `smtp.subject_template`: Template for single file subject (e.g., `"File: {filename}"`) +- `smtp.subject_template_package`: Template for multiple files subject (e.g., `"Package - {file_count} files"`) + +**Subject template variables:** +- `{filename}` - Single file name +- `{filenames}` - Comma-separated list of file names (for packages) +- `{file_count}` - Number of files (for packages) +- `{date}` - Date in format YYYY-MM-DD +- `{datetime}` - Date and time in format YYYY-MM-DD HH:MM:SS +- `{size}` - File size in bytes (single file) +- `{total_size}` - Total size in bytes (for packages) + +**Note:** Password is reused from IMAP keyring storage (same `imap.user` key). No separate SMTP password needed. +``` + +**Note:** +- All paths in `topic_mapping` can be either absolute or relative: + - **Absolute paths**: `"C:\\Documents\\Roadmaps"` (Windows) or `"/home/user/documents/reports"` (Linux/macOS) + - **Relative paths**: `"roadmap"` (relative to the script's working directory) +- **The last rule in `topic_mapping` is used as default** for all emails that don't match any of the previous patterns +- Both absolute and relative paths are supported for `processed_dir`: + - **Absolute paths**: `"C:\\Users\\AppData\\processed_uids"` (Windows) or `"/home/user/.cache/processed_uids"` (Linux/macOS) + - **Relative paths**: `"processed_uids"` (relative to the script's working directory) + + Example with mixed paths: + ```yaml + topic_mapping: + ".*Roadmap.*": "C:\\Documents\\Roadmaps" # Absolute path + "(Report).*": "reports" # Relative path + "(Invoice|Bill).*": "C:\\Finance\\Invoices" # Absolute path + ".*": "default" # Default folder (relative path) + ``` + +--- + +# 🔐 Password Management (Complete Command Set) + +### ➕ Save Password (automatically) +```bash +python -m email_processor +``` +On first run, the script will prompt for password and offer to save it. + +### ➕ Set Password from File +```bash +# Read password from file and save it +python -m email_processor password set --user your_email@example.com --password-file ~/.pass + +# Read password from file, save it, and remove the file +python -m email_processor password set --user your_email@example.com --password-file ~/.pass --delete-after-read +``` + +**Security Notes:** +- Password file should have restricted permissions (chmod 600 on Unix) +- Use `--delete-after-read` to automatically delete the file after reading +- Password is encrypted before saving to keyring +- Supports complex passwords via file (can copy-paste) + +**Example:** +```bash +# Create password file +echo "your_complex_password" > ~/.email_password +chmod 600 ~/.email_password # Restrict access (Unix only) + +# Set password and remove file +python -m email_processor password set --user your_email@example.com --password-file ~/.email_password --delete-after-read +``` + +### 🔍 Read Password +```python +import keyring +keyring.get_password("email-vkh-processor", "your_email@example.com") +``` + +### 🗑️ Delete Password +```bash +python -m email_processor password clear --user your_email@example.com +``` + +### ➕ Add Password Manually +```python +import keyring +keyring.set_password( + "email-vkh-processor", + "your_email@example.com", + "MY_PASSWORD" +) +``` + +--- + +# 📋 Installation + +## Using Virtual Environment (Recommended) + +### 1. Create Virtual Environment + +**Windows:** +```bash +python -m venv .venv +.venv\Scripts\activate +``` + +**Linux/macOS:** +```bash +python3 -m venv .venv +source .venv/bin/activate +``` + +### 2. Install Dependencies + +```bash +pip install -r requirements.txt +``` + +**Note:** If you're using 32-bit Python on Windows and encounter DLL errors with cryptography, you may need to install an older version: +```bash +pip install cryptography==40.0.2 +``` +Alternatively, use 64-bit Python for better compatibility. + +### 3. Copy Configuration Template + +```bash +cp config.yaml.example config.yaml +``` + +### 4. Edit Configuration + +Edit `config.yaml` with your IMAP settings + +### 5. Run the Script + +```bash +# As a module +python -m email_processor + +# Or install and use as command +pip install -e . +email-processor +``` + +### 6. Deactivate Virtual Environment (when done) + +```bash +deactivate +``` + +## Alternative: Global Installation + +1. Install dependencies: +```bash +pip install -r requirements.txt +``` + +2. Copy configuration template: +```bash +cp config.yaml.example config.yaml +``` + +3. Edit `config.yaml` with your IMAP settings + +4. Run the script: +```bash +# As a module +python -m email_processor + +# Or install and use as command +pip install -e . +email-processor + +# To build distributable package for pip install, see `docs/_build/BUILD.md` +``` + +## 🛠️ Development Setup + +For development, install additional tools: + +```bash +pip install ruff mypy types-PyYAML +``` + +### Code Quality Tools + +- **Ruff**: Fast linter and formatter (replaces Black) + ```bash + ruff check . # Check for issues + ruff check --fix . # Auto-fix issues + ruff format . # Format code + ruff format --check . # Check formatting + ``` + +- **MyPy**: Type checker + ```bash + mypy email_processor # Type check + ``` + +### Test Coverage + +The project uses [Codecov](https://codecov.io) for test coverage tracking and reporting. Coverage reports are automatically generated during CI runs and uploaded to Codecov. + +- **View coverage reports**: [Codecov Dashboard](https://codecov.io/gh/KHolodilin/python-email-automation-processor) +- **Run tests with coverage locally**: + ```bash + pytest --cov=email_processor --cov-report=term-missing --cov-report=html + ``` +- **View HTML coverage report**: Open `htmlcov/index.html` in your browser after running tests + +The project maintains a minimum test coverage threshold of 70% (with plans to increase to 95%+). Coverage reports help identify untested code paths and ensure code quality. + +See `CONTRIBUTING.md` for detailed development guidelines. + +--- + +# 🔧 Configuration Options + +## IMAP Settings +- `server`: IMAP server address (required) +- `user`: Email address (required) +- `max_retries`: Maximum connection retry attempts (default: 5) +- `retry_delay`: Delay between retries in seconds (default: 3) + +## Processing Settings +- `start_days_back`: How many days back to process emails (default: 5) +- `archive_folder`: IMAP folder for archived emails (default: "INBOX/Processed") +- `processed_dir`: Directory for processed UID files (default: "processed_uids") + - **Supports absolute paths**: `"C:\\Users\\AppData\\processed_uids"` or `"/home/user/.cache/processed_uids"` + - **Supports relative paths**: `"processed_uids"` (relative to script directory) +- `keep_processed_days`: Days to keep processed UID files (0 = keep forever, default: 0) +- `archive_only_mapped`: Archive only emails matching topic_mapping (default: true) +- `skip_non_allowed_as_processed`: Mark non-allowed senders as processed (default: true) +- `skip_unmapped_as_processed`: Mark unmapped emails as processed (default: true) +- `show_progress`: Show progress bar during processing (default: true, requires tqdm) +- `allowed_extensions`: List of allowed file extensions (e.g., `[".pdf", ".doc"]`) + - If specified, only files with these extensions will be downloaded + - Case-insensitive, dot prefix optional +- `blocked_extensions`: List of blocked file extensions (e.g., `[".exe", ".bat"]`) + - Takes priority over `allowed_extensions` + - Files with these extensions will be skipped + - Case-insensitive, dot prefix optional + +## Logging Settings +- `level`: Logging level - DEBUG, INFO, WARNING, ERROR, CRITICAL (default: "INFO") +- `format`: Console output format - "console" (readable) or "json" (structured, default: "console") +- `format_file`: File log format - "console" or "json" (default: "json") +- `file`: Directory for log files (optional, format: `yyyy-mm-dd.log`, rotated daily) + - If not set, logs go to stdout only + +## Allowed Senders +List of email addresses allowed to process. If empty, no emails will be processed. + +## Topic Mapping +Dictionary of regex patterns to folder paths. Emails matching a pattern will be saved to the corresponding folder. +- **The last rule in `topic_mapping` is used as default** for all emails that don't match any of the previous patterns +- All paths can be absolute (e.g., `"C:\\Documents\\Roadmaps"`) or relative (e.g., `"roadmap"`) +- Patterns are checked in order, and the first match is used + +--- + +# 🏗️ Architecture + +The project uses a modular architecture for better maintainability: + +``` +email_processor/ +├── cli/ # CLI commands and user interface +│ ├── commands/ # CLI subcommands (config, imap, passwords, smtp, status) +│ └── ui.py # UI components and console output +├── config/ # Configuration loading and validation +├── imap/ # IMAP operations (client, auth, archive, fetcher, filters) +├── logging/ # Structured logging setup and formatters +├── security/ # Security features (encryption, fingerprint, key generation) +├── smtp/ # SMTP operations (client, sender, config) +├── storage/ # UID storage and file management +└── utils/ # Utility functions (email, path, disk, folder resolver, context) +``` + +Key modules: +- **`cli/`**: Command-line interface with subcommands for all operations +- **`config/`**: YAML configuration loading and validation +- **`imap/`**: Email fetching, attachment downloading, and archiving +- **`smtp/`**: Email sending with file tracking +- **`security/`**: Password encryption and system-based key derivation +- **`storage/`**: Processed UID tracking and sent file management +- **`utils/`**: Helper functions for common operations + +# 📚 Additional Documentation + +- **Testing Guide**: See `docs/_build/README_TESTS.md` +- **Building and Distribution**: See `docs/_build/BUILD.md` (how to build package for `pip install`) +- **Plans, reports, internal docs**: `docs/_build/` (PLAN, REDUNDANT_CODE_REPORT, unit-tests-structure, etc.) diff --git a/pyproject.toml b/pyproject.toml index 5807f43..dba2377 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ packages = {find = {}} include-package-data = true [tool.setuptools.package-data] -"email_processor" = ["config.yaml.example"] +"email_processor" = ["config.yaml.example", "README.md"] [project] name = "email-processor" From 167935fe194ee3649b4cffa29da553fb81ffbbbc Mon Sep 17 00:00:00 2001 From: valerii Date: Mon, 26 Jan 2026 17:16:24 +0300 Subject: [PATCH 3/4] test: add missing test coverage for config.py (Fixes #36) - Add tests for _find_config_example() function - Test finding file in current directory - Test finding file in package via importlib.resources - Test fallback when package resource not found - Add test for create_default_config when example file not found - Add tests for file overwrite confirmation logic - Add test for create_default_config without rich console - Coverage for config.py now reaches 100% --- tests/unit/cli/commands/test_config.py | 169 ++++++++++++++++++++++++- 1 file changed, 168 insertions(+), 1 deletion(-) diff --git a/tests/unit/cli/commands/test_config.py b/tests/unit/cli/commands/test_config.py index 363dc16..d0ef1f2 100644 --- a/tests/unit/cli/commands/test_config.py +++ b/tests/unit/cli/commands/test_config.py @@ -5,7 +5,11 @@ from unittest.mock import MagicMock, patch from email_processor.cli import CLIUI -from email_processor.cli.commands.config import create_default_config, validate_config_file +from email_processor.cli.commands.config import ( + _find_config_example, + create_default_config, + validate_config_file, +) from email_processor.exit_codes import ExitCode @@ -138,3 +142,166 @@ def test_create_config_with_rich_console(self, mock_console_class, mock_copy, mo self.assertEqual(result, ExitCode.SUCCESS) # Check that print was called with rich formatting mock_print.assert_called_once() + + +class TestFindConfigExample(unittest.TestCase): + """Tests for _find_config_example function.""" + + @patch("email_processor.cli.commands.config.Path") + def test_find_config_example_in_current_directory(self, mock_path_class): + """Test _find_config_example finds file in current directory.""" + example_path = MagicMock() + example_path.exists.return_value = True + + mock_path_class.return_value = example_path + + result = _find_config_example() + self.assertEqual(result, example_path) + example_path.exists.assert_called_once() + + @patch("email_processor.cli.commands.config.resources") + @patch("email_processor.cli.commands.config.Path") + def test_find_config_example_in_package(self, mock_path_class, mock_resources): + """Test _find_config_example finds file in package via importlib.resources.""" + # Current directory path doesn't exist + current_dir_path = MagicMock() + current_dir_path.exists.return_value = False + + # Package path exists + pkg_path = Path("/package/path/config.yaml.example") + pkg_path_mock = MagicMock() + pkg_path_mock.exists.return_value = True + + # Mock context manager for resources.path() + context_manager = MagicMock() + context_manager.__enter__.return_value = pkg_path_mock + context_manager.__exit__.return_value = None + mock_resources.path.return_value = context_manager + + mock_path_class.side_effect = lambda p: ( + current_dir_path if p == "config.yaml.example" else Path(pkg_path_mock) + ) + + result = _find_config_example() + # Result should be Path(pkg_path_mock) + self.assertIsInstance(result, Path) + mock_resources.path.assert_called_once_with("email_processor", "config.yaml.example") + + @patch("email_processor.cli.commands.config.resources") + @patch("email_processor.cli.commands.config.Path") + def test_find_config_example_fallback_when_package_not_found( + self, mock_path_class, mock_resources + ): + """Test _find_config_example falls back to current directory when package resource not found.""" + # Current directory path doesn't exist initially + current_dir_path = MagicMock() + current_dir_path.exists.return_value = False + + mock_path_class.return_value = current_dir_path + # Simulate exception when accessing package resource + mock_resources.path.side_effect = FileNotFoundError("Package resource not found") + + result = _find_config_example() + self.assertEqual(result, current_dir_path) + mock_resources.path.assert_called_once_with("email_processor", "config.yaml.example") + + +class TestCreateConfigMissingExample(unittest.TestCase): + """Tests for create_default_config when example file is missing.""" + + @patch("email_processor.cli.commands.config.Path") + def test_create_config_example_not_found(self, mock_path_class): + """Test create_default_config when config.yaml.example is not found.""" + example_path = MagicMock() + example_path.exists.return_value = False + example_path.absolute.return_value = Path("/path/to/config.yaml.example") + + target_path = MagicMock() + target_path.exists.return_value = False + + mock_path_class.side_effect = lambda p: example_path if "example" in str(p) else target_path + + ui = CLIUI() + with patch.object(ui, "error") as mock_error, patch.object(ui, "info") as mock_info: + result = create_default_config("config.yaml", ui) + self.assertEqual(result, ExitCode.FILE_NOT_FOUND) + mock_error.assert_called_once() + mock_info.assert_called_once() + + +class TestCreateConfigOverwrite(unittest.TestCase): + """Tests for create_default_config file overwrite confirmation.""" + + @patch("email_processor.cli.commands.config.Path") + @patch("email_processor.cli.commands.config.shutil.copy2") + def test_create_config_overwrite_cancelled(self, mock_copy, mock_path_class): + """Test create_default_config when user cancels overwrite.""" + example_path = MagicMock() + example_path.exists.return_value = True + + target_path = MagicMock() + target_path.exists.return_value = True # File already exists + target_path.parent = MagicMock() + target_path.absolute.return_value = Path("/path/to/config.yaml") + + mock_path_class.side_effect = lambda p: example_path if "example" in str(p) else target_path + + ui = CLIUI() + with patch.object(ui, "input", return_value="n"), patch.object(ui, "warn") as mock_warn: + result = create_default_config("config.yaml", ui) + self.assertEqual(result, ExitCode.SUCCESS) + mock_warn.assert_called_once_with("Cancelled.") + mock_copy.assert_not_called() + + @patch("email_processor.cli.commands.config.Path") + @patch("email_processor.cli.commands.config.shutil.copy2") + def test_create_config_overwrite_confirmed(self, mock_copy, mock_path_class): + """Test create_default_config when user confirms overwrite.""" + example_path = MagicMock() + example_path.exists.return_value = True + + target_path = MagicMock() + target_path.exists.return_value = True # File already exists + target_path.parent = MagicMock() + target_path.absolute.return_value = Path("/path/to/config.yaml") + + mock_path_class.side_effect = lambda p: example_path if "example" in str(p) else target_path + + ui = CLIUI() + with patch.object(ui, "input", return_value="y"), patch.object( + ui, "success" + ) as mock_success: + result = create_default_config("config.yaml", ui) + self.assertEqual(result, ExitCode.SUCCESS) + mock_copy.assert_called_once_with(example_path, target_path) + mock_success.assert_called_once() + + +class TestCreateConfigWithoutRich(unittest.TestCase): + """Tests for create_default_config without rich console.""" + + @patch("email_processor.cli.commands.config.Path") + @patch("email_processor.cli.commands.config.shutil.copy2") + @patch("email_processor.cli.ui.RICH_AVAILABLE", False) + def test_create_config_without_rich_console(self, mock_copy, mock_path_class): + """Test create_default_config without rich console (fallback to info).""" + example_path = MagicMock() + example_path.exists.return_value = True + + target_path = MagicMock() + target_path.exists.return_value = False + target_path.parent = MagicMock() + target_path.absolute.return_value = Path("/path/to/config.yaml") + + mock_path_class.side_effect = lambda p: example_path if "example" in str(p) else target_path + + ui = CLIUI() + with patch.object(ui, "info") as mock_info, patch.object(ui, "success") as mock_success: + result = create_default_config("config.yaml", ui) + self.assertEqual(result, ExitCode.SUCCESS) + mock_success.assert_called_once() + # Should use info instead of print when rich is not available + mock_info.assert_called() + # Check that info was called with the edit message + info_calls = [call[0][0] for call in mock_info.call_args_list] + self.assertTrue(any("edit" in str(call).lower() for call in info_calls)) From 63afc3b61d340fad29b09eab2a2117e29e57583a Mon Sep 17 00:00:00 2001 From: valerii Date: Mon, 26 Jan 2026 17:41:17 +0300 Subject: [PATCH 4/4] style: apply ruff format to test_config.py --- tests/unit/cli/commands/test_config.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/unit/cli/commands/test_config.py b/tests/unit/cli/commands/test_config.py index d0ef1f2..4296c16 100644 --- a/tests/unit/cli/commands/test_config.py +++ b/tests/unit/cli/commands/test_config.py @@ -268,9 +268,10 @@ def test_create_config_overwrite_confirmed(self, mock_copy, mock_path_class): mock_path_class.side_effect = lambda p: example_path if "example" in str(p) else target_path ui = CLIUI() - with patch.object(ui, "input", return_value="y"), patch.object( - ui, "success" - ) as mock_success: + with ( + patch.object(ui, "input", return_value="y"), + patch.object(ui, "success") as mock_success, + ): result = create_default_config("config.yaml", ui) self.assertEqual(result, ExitCode.SUCCESS) mock_copy.assert_called_once_with(example_path, target_path)