Remove duplicate files using Czkawka

This commit is contained in:
Jose134 2024-12-08 05:53:12 +01:00
parent e434d28c16
commit 2e65e1bf9a
4 changed files with 38 additions and 1 deletions

3
.gitignore vendored
View File

@ -160,3 +160,6 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear # and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder. # option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/ #.idea/
vendor/czkawka/*

View File

@ -22,7 +22,7 @@ Store all the logs for the current job in order to be able to debug and keep tra
- [x] Read qbittorrent credentials from .env file - [x] Read qbittorrent credentials from .env file
- [x] Implement API endpoint using FastAPI - [x] Implement API endpoint using FastAPI
- [x] Run organization job on a separate thread - [x] Run organization job on a separate thread
- [ ] Deduplicate files using Czkawka - [x] Deduplicate files using Czkawka
- [ ] Add unit tests - [ ] Add unit tests
- [ ] Add logging - [ ] Add logging
- [ ] Make it run in docker - [ ] Make it run in docker

31
src/deduplication.py Normal file
View File

@ -0,0 +1,31 @@
import subprocess
import os
from enum import Enum
class CZKAWKA_DELETION_METHOD(Enum):
ALL_EXCEPT_NEWEST = "AEN"
ALL_EXCEPT_OLDEST = "AEO"
ALL_EXCEPT_BIGGEST = "AEB"
ALL_EXCEPT_SMALLEST = "AES"
NONE="NONE"
def deduplicate_files(target_dir, exclude_files):
czkawka_path = os.environ.get("CZKAWKA_PATH", "/app/vendor/czkawka/czkawka_cli")
duplicates = _remove_duplicates(
czkawka_path, target_dir, exclude_files, CZKAWKA_DELETION_METHOD.ALL_EXCEPT_SMALLEST)
def _remove_duplicates(czkawka_path, target_dir, exclude_files, delete_method: CZKAWKA_DELETION_METHOD):
try:
flags = ["video", "--directories", target_dir, "--not-recursive", "--delete-method", delete_method.value]
if exclude_files:
flags.append("--excluded-items")
flags.extend(exclude_files)
flags.append("--tolerance")
flags.append(os.environ.get("CK_DUPLICATE_TOLERANCE", "2"))
print(flags)
result = subprocess.run([czkawka_path, *flags], capture_output=True, text=True, check=True)
print(result.stdout)
except subprocess.CalledProcessError as e:
print(f"Failed to find duplicates: {e.stderr}")

View File

@ -1,9 +1,11 @@
import os import os
from fastapi import BackgroundTasks, FastAPI from fastapi import BackgroundTasks, FastAPI
from dotenv import load_dotenv from dotenv import load_dotenv
from os import path
from qbittorrent_api import get_qbittorrent_files_downloading from qbittorrent_api import get_qbittorrent_files_downloading
from filemoving import group_files_by_prefix from filemoving import group_files_by_prefix
from deduplication import deduplicate_files
import uuid import uuid
import time import time
@ -33,4 +35,5 @@ def launch_job(job_id):
f.write(f"{job_id}\n") f.write(f"{job_id}\n")
# downloading = get_qbittorrent_files_downloading(qbit_url, qbit_user, qbit_password) # downloading = get_qbittorrent_files_downloading(qbit_url, qbit_user, qbit_password)
# deduplicate_files(target_dir, downloading)
# group_files_by_prefix(target_dir, downloading) # group_files_by_prefix(target_dir, downloading)