diff --git a/.gitignore b/.gitignore index 7fcfb17..c76fa4f 100644 --- a/.gitignore +++ b/.gitignore @@ -162,4 +162,5 @@ cython_debug/ #.idea/ -vendor/czkawka/* \ No newline at end of file +vendor/czkawka/* +vendor/ffmpeg/* \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index aba06e5..4bb6c45 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,6 +16,12 @@ RUN pip install gunicorn # Copy the FastAPI project files into the container COPY . . +# Copy the czkawka binary into the container +COPY vendor/czkawka /bin/ + +# Install ffmpeg with ffprobe +RUN apt-get update && apt-get install -y ffmpeg + # Expose the port the app runs on EXPOSE 8000 diff --git a/src/deduplication.py b/src/deduplication.py index 036c9f8..9f4f1dd 100644 --- a/src/deduplication.py +++ b/src/deduplication.py @@ -10,23 +10,37 @@ class CZKAWKA_DELETION_METHOD(Enum): NONE="NONE" def deduplicate_files(target_dir, exclude_files): - czkawka_path = os.environ.get("CZKAWKA_PATH", "/app/vendor/czkawka/czkawka_cli") - duplicates = _remove_duplicates( - czkawka_path, target_dir, exclude_files, CZKAWKA_DELETION_METHOD.ALL_EXCEPT_SMALLEST) + executables = { + "linux": "linux_czkawka_cli", + "win32": "windows_czkawka_cli.exe", + "darwin": "mac_czkawka_cli", + "docker": "linux_czkawka_cli" + } -def _remove_duplicates(czkawka_path, target_dir, exclude_files, delete_method: CZKAWKA_DELETION_METHOD): + platform = "docker" if os.path.exists("/.dockerenv") else os.sys.platform + czkawka_executable = executables.get(platform, "czkawka_cli") + czkawka_deletion_method = CZKAWKA_DELETION_METHOD.ALL_EXCEPT_SMALLEST + czkawka_tolerance = os.environ.get("CK_DUPLICATE_TOLERANCE", "2") + _remove_duplicates(czkawka_executable, target_dir, exclude_files, czkawka_deletion_method, czkawka_tolerance) + +def _remove_duplicates(czkawka_path, target_dir, exclude_files, delete_method, tolerance): try: - flags = ["video", "--directories", target_dir, "--not-recursive", "--delete-method", delete_method.value] - for file in exclude_files: - flags.append("--excluded-items") - flags.append(file) - - flags.append("--tolerance") - flags.append(os.environ.get("CK_DUPLICATE_TOLERANCE", "2")) - print(flags) + flags = _build_czkawka_flags(target_dir, exclude_files, delete_method, tolerance) result = subprocess.run([czkawka_path, *flags], capture_output=True, text=True, check=True) print(result.stdout) except subprocess.CalledProcessError as e: print(f"Failed to find duplicates: {e.stderr}") + +def _build_czkawka_flags(target_dir, exclude_files, delete_method, tolerance): + flags = ["video", "--directories", target_dir, "--not-recursive", "--delete-method", delete_method.value] + + + for file in exclude_files: + flags.append("--excluded-items") + flags.append(file) + + flags.append("--tolerance") + flags.append(tolerance) + return flags diff --git a/src/main.py b/src/main.py index 6feffaf..6fdf5ec 100644 --- a/src/main.py +++ b/src/main.py @@ -1,14 +1,16 @@ import os from fastapi import BackgroundTasks, FastAPI +import logging from dotenv import load_dotenv from os import path - from qbittorrent_api import get_qbittorrent_files_downloading from filemoving import group_files_by_prefix from deduplication import deduplicate_files import uuid import time +logger = logging.getLogger(__name__) + load_dotenv() qbit_url = os.getenv('QB_URL') @@ -16,33 +18,39 @@ qbit_user = os.getenv('QB_USER') qbit_password = os.getenv('QB_PASSWORD') target_dir = os.getenv('TARGET_DIR') if not qbit_url or not qbit_user or not qbit_password or not target_dir: - print("Please provide all the required environment variables.") + logger.error("Please provide all the required environment variables.") exit(1) + app = FastAPI() @app.get("/") def root(background_tasks: BackgroundTasks): job_id = str(uuid.uuid4()) + logger.info(f"[{job_id}] Received request to start job.") background_tasks.add_task(launch_job, job_id) return job_id def launch_job(job_id): - # WARNING: Temporary code for testing purposes - time.sleep(5) + logger.info(f"[{job_id}] Fetching downloading files from qBittorrent.") + downloading = get_qbittorrent_files_downloading(qbit_url, qbit_user, qbit_password) + logger.info(f"[{job_id}] Found {len(downloading)} downloading files.") - with open("jobs.txt", "a") as f: - f.write(f"{job_id}\n") + logger.info(f"[{job_id}] Deduplicating files.") + deduplicate_files(target_dir, downloading) - patterns = _load_patterns_file() - # downloading = get_qbittorrent_files_downloading(qbit_url, qbit_user, qbit_password) - # deduplicate_files(target_dir, downloading) - # group_files_by_prefix(target_dir, downloading) + logger.info(f"[{job_id}] Loading patterns.") + patterns = _load_patterns_file(job_id) + logger.info(f"[{job_id}] Using patterns: {patterns}") + logger.info(f"[{job_id}] Creating subdirectories and moving files.") + group_files_by_prefix(target_dir, downloading, patterns) -def _load_patterns_file(): - config_file_path = os.path.join(os.getcwd(), 'config', 'patterns.txt') + logger.info(f"[{job_id}] Job completed.") + +def _load_patterns_file(job_id): + config_file_path = os.path.join(os.getcwd(), '..', 'config', 'patterns.txt') if not path.exists(config_file_path): - print(f"The config file {config_file_path} does not exist.") + logger.error(f"[{job_id}] The config file {config_file_path} does not exist.") return [] with open(config_file_path, 'r') as file: return [line.strip() for line in file if line.strip()] \ No newline at end of file