Remove duplicate files using Czkawka
This commit is contained in:
parent
e434d28c16
commit
2e65e1bf9a
3
.gitignore
vendored
3
.gitignore
vendored
@ -160,3 +160,6 @@ cython_debug/
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
|
||||
vendor/czkawka/*
|
||||
@ -22,7 +22,7 @@ Store all the logs for the current job in order to be able to debug and keep tra
|
||||
- [x] Read qbittorrent credentials from .env file
|
||||
- [x] Implement API endpoint using FastAPI
|
||||
- [x] Run organization job on a separate thread
|
||||
- [ ] Deduplicate files using Czkawka
|
||||
- [x] Deduplicate files using Czkawka
|
||||
- [ ] Add unit tests
|
||||
- [ ] Add logging
|
||||
- [ ] Make it run in docker
|
||||
|
||||
31
src/deduplication.py
Normal file
31
src/deduplication.py
Normal file
@ -0,0 +1,31 @@
|
||||
import subprocess
|
||||
import os
|
||||
from enum import Enum
|
||||
|
||||
class CZKAWKA_DELETION_METHOD(Enum):
|
||||
ALL_EXCEPT_NEWEST = "AEN"
|
||||
ALL_EXCEPT_OLDEST = "AEO"
|
||||
ALL_EXCEPT_BIGGEST = "AEB"
|
||||
ALL_EXCEPT_SMALLEST = "AES"
|
||||
NONE="NONE"
|
||||
|
||||
def deduplicate_files(target_dir, exclude_files):
|
||||
czkawka_path = os.environ.get("CZKAWKA_PATH", "/app/vendor/czkawka/czkawka_cli")
|
||||
duplicates = _remove_duplicates(
|
||||
czkawka_path, target_dir, exclude_files, CZKAWKA_DELETION_METHOD.ALL_EXCEPT_SMALLEST)
|
||||
|
||||
def _remove_duplicates(czkawka_path, target_dir, exclude_files, delete_method: CZKAWKA_DELETION_METHOD):
|
||||
try:
|
||||
flags = ["video", "--directories", target_dir, "--not-recursive", "--delete-method", delete_method.value]
|
||||
if exclude_files:
|
||||
flags.append("--excluded-items")
|
||||
flags.extend(exclude_files)
|
||||
flags.append("--tolerance")
|
||||
flags.append(os.environ.get("CK_DUPLICATE_TOLERANCE", "2"))
|
||||
print(flags)
|
||||
|
||||
result = subprocess.run([czkawka_path, *flags], capture_output=True, text=True, check=True)
|
||||
print(result.stdout)
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Failed to find duplicates: {e.stderr}")
|
||||
@ -1,9 +1,11 @@
|
||||
import os
|
||||
from fastapi import BackgroundTasks, FastAPI
|
||||
from dotenv import load_dotenv
|
||||
from os import path
|
||||
|
||||
from qbittorrent_api import get_qbittorrent_files_downloading
|
||||
from filemoving import group_files_by_prefix
|
||||
from deduplication import deduplicate_files
|
||||
import uuid
|
||||
import time
|
||||
|
||||
@ -33,4 +35,5 @@ def launch_job(job_id):
|
||||
f.write(f"{job_id}\n")
|
||||
|
||||
# downloading = get_qbittorrent_files_downloading(qbit_url, qbit_user, qbit_password)
|
||||
# deduplicate_files(target_dir, downloading)
|
||||
# group_files_by_prefix(target_dir, downloading)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user