Remove duplicate files using Czkawka
This commit is contained in:
parent
e434d28c16
commit
2e65e1bf9a
3
.gitignore
vendored
3
.gitignore
vendored
@ -160,3 +160,6 @@ cython_debug/
|
|||||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
#.idea/
|
#.idea/
|
||||||
|
|
||||||
|
|
||||||
|
vendor/czkawka/*
|
||||||
@ -22,7 +22,7 @@ Store all the logs for the current job in order to be able to debug and keep tra
|
|||||||
- [x] Read qbittorrent credentials from .env file
|
- [x] Read qbittorrent credentials from .env file
|
||||||
- [x] Implement API endpoint using FastAPI
|
- [x] Implement API endpoint using FastAPI
|
||||||
- [x] Run organization job on a separate thread
|
- [x] Run organization job on a separate thread
|
||||||
- [ ] Deduplicate files using Czkawka
|
- [x] Deduplicate files using Czkawka
|
||||||
- [ ] Add unit tests
|
- [ ] Add unit tests
|
||||||
- [ ] Add logging
|
- [ ] Add logging
|
||||||
- [ ] Make it run in docker
|
- [ ] Make it run in docker
|
||||||
|
|||||||
31
src/deduplication.py
Normal file
31
src/deduplication.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
import subprocess
|
||||||
|
import os
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
class CZKAWKA_DELETION_METHOD(Enum):
|
||||||
|
ALL_EXCEPT_NEWEST = "AEN"
|
||||||
|
ALL_EXCEPT_OLDEST = "AEO"
|
||||||
|
ALL_EXCEPT_BIGGEST = "AEB"
|
||||||
|
ALL_EXCEPT_SMALLEST = "AES"
|
||||||
|
NONE="NONE"
|
||||||
|
|
||||||
|
def deduplicate_files(target_dir, exclude_files):
|
||||||
|
czkawka_path = os.environ.get("CZKAWKA_PATH", "/app/vendor/czkawka/czkawka_cli")
|
||||||
|
duplicates = _remove_duplicates(
|
||||||
|
czkawka_path, target_dir, exclude_files, CZKAWKA_DELETION_METHOD.ALL_EXCEPT_SMALLEST)
|
||||||
|
|
||||||
|
def _remove_duplicates(czkawka_path, target_dir, exclude_files, delete_method: CZKAWKA_DELETION_METHOD):
|
||||||
|
try:
|
||||||
|
flags = ["video", "--directories", target_dir, "--not-recursive", "--delete-method", delete_method.value]
|
||||||
|
if exclude_files:
|
||||||
|
flags.append("--excluded-items")
|
||||||
|
flags.extend(exclude_files)
|
||||||
|
flags.append("--tolerance")
|
||||||
|
flags.append(os.environ.get("CK_DUPLICATE_TOLERANCE", "2"))
|
||||||
|
print(flags)
|
||||||
|
|
||||||
|
result = subprocess.run([czkawka_path, *flags], capture_output=True, text=True, check=True)
|
||||||
|
print(result.stdout)
|
||||||
|
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"Failed to find duplicates: {e.stderr}")
|
||||||
@ -1,9 +1,11 @@
|
|||||||
import os
|
import os
|
||||||
from fastapi import BackgroundTasks, FastAPI
|
from fastapi import BackgroundTasks, FastAPI
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
from os import path
|
||||||
|
|
||||||
from qbittorrent_api import get_qbittorrent_files_downloading
|
from qbittorrent_api import get_qbittorrent_files_downloading
|
||||||
from filemoving import group_files_by_prefix
|
from filemoving import group_files_by_prefix
|
||||||
|
from deduplication import deduplicate_files
|
||||||
import uuid
|
import uuid
|
||||||
import time
|
import time
|
||||||
|
|
||||||
@ -33,4 +35,5 @@ def launch_job(job_id):
|
|||||||
f.write(f"{job_id}\n")
|
f.write(f"{job_id}\n")
|
||||||
|
|
||||||
# downloading = get_qbittorrent_files_downloading(qbit_url, qbit_user, qbit_password)
|
# downloading = get_qbittorrent_files_downloading(qbit_url, qbit_user, qbit_password)
|
||||||
|
# deduplicate_files(target_dir, downloading)
|
||||||
# group_files_by_prefix(target_dir, downloading)
|
# group_files_by_prefix(target_dir, downloading)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user