feat: Remove duplicate checking functionality

Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
This commit is contained in:
2026-05-19 00:36:08 +00:00
parent 3037d4078c
commit 004930f60d
2 changed files with 11 additions and 131 deletions

View File

@@ -3,7 +3,6 @@ File Drop Uploader Backend (FastAPI, simplified)
----------------------------------------------------
- Serves static frontend (no settings UI)
- Uploads to file system
- Duplicate checks (local SHA-1 DB)
- WebSocket progress per session
- Ephemeral "Connected" banner via /api/ping
"""
@@ -34,7 +33,7 @@ from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from starlette.websockets import WebSocketState
from starlette.middleware.sessions import SessionMiddleware
from PIL import Image, ExifTags
from PIL import Image
try:
import qrcode
except Exception:
@@ -80,61 +79,6 @@ except Exception:
pass
# ---------- DB (local dedupe cache) ----------
def db_init() -> None:
"""Create the local SQLite table used for duplicate checks (idempotent)."""
conn = sqlite3.connect(SETTINGS.state_db)
cur = conn.cursor()
cur.execute(
"""
CREATE TABLE IF NOT EXISTS uploads (
id INTEGER PRIMARY KEY AUTOINCREMENT,
checksum TEXT UNIQUE,
filename TEXT,
size INTEGER,
device_asset_id TEXT,
immich_asset_id TEXT,
created_at TEXT,
inserted_at TEXT DEFAULT CURRENT_TIMESTAMP
);
"""
)
conn.commit()
conn.close()
def db_lookup_checksum(checksum: str) -> Optional[dict]:
"""Return a record for the given checksum if seen before (None if not)."""
conn = sqlite3.connect(SETTINGS.state_db)
cur = conn.cursor()
cur.execute("SELECT checksum, immich_asset_id FROM uploads WHERE checksum = ?", (checksum,))
row = cur.fetchone()
conn.close()
if row:
return {"checksum": row[0], "immich_asset_id": row[1]}
return None
def db_lookup_device_asset(device_asset_id: str) -> bool:
"""True if a deviceAssetId has been uploaded by this service previously."""
conn = sqlite3.connect(SETTINGS.state_db)
cur = conn.cursor()
cur.execute("SELECT 1 FROM uploads WHERE device_asset_id = ?", (device_asset_id,))
row = cur.fetchone()
conn.close()
return bool(row)
def db_insert_upload(checksum: str, filename: str, size: int, device_asset_id: str, immich_asset_id: Optional[str], created_at: str) -> None:
"""Insert a newly-uploaded asset into the local cache (ignore on duplicates)."""
conn = sqlite3.connect(SETTINGS.state_db)
cur = conn.cursor()
cur.execute(
"INSERT OR IGNORE INTO uploads (checksum, filename, size, device_asset_id, immich_asset_id, created_at) VALUES (?,?,?,?,?,?)",
(checksum, filename, size, device_asset_id, immich_asset_id, created_at)
)
conn.commit()
conn.close()
db_init()
# ---------- WebSocket hub ----------
@@ -411,33 +355,6 @@ def get_safe_subpath(relative_path: Optional[str]) -> str:
return os.path.join(*safe_parts)
def read_exif_datetimes(file_bytes: bytes):
"""
Extract EXIF DateTimeOriginal / ModifyDate values when possible.
Returns (created, modified) as datetime or (None, None) on failure.
"""
created = modified = None
try:
with Image.open(io.BytesIO(file_bytes)) as im:
exif = getattr(im, "_getexif", lambda: None)() or {}
if exif:
tags = {ExifTags.TAGS.get(k, k): v for k, v in exif.items()}
dt_original = tags.get("DateTimeOriginal") or tags.get("CreateDate")
dt_modified = tags.get("ModifyDate") or dt_original
def parse_dt(s: str):
try:
return datetime.strptime(s, "%Y:%m:%d %H:%M:%S")
except Exception:
return None, None
if isinstance(dt_original, str):
created = parse_dt(dt_original)
if isinstance(dt_modified, str):
modified = parse_dt(dt_modified)
except Exception:
pass
return created, modified
def slugify(value: Optional[str]) -> str:
"""
Normalizes string, converts to lowercase, removes non-alpha characters,
@@ -606,26 +523,11 @@ async def api_upload(
fingerprint: Optional[str] = Form(None),
public_folder_name: Optional[str] = Form(None),
):
"""Receive a file, check duplicates; stream progress via WS."""
"""Receive a file and stream progress via WS."""
raw = await file.read()
size = len(raw)
checksum = sha1_hex(raw)
exif_created, exif_modified = read_exif_datetimes(raw)
created_at = exif_created or (datetime.fromtimestamp(last_modified / 1000) if last_modified else datetime.utcnow())
modified_at = exif_modified or created_at
created_iso = created_at.isoformat()
modified_iso = modified_at.isoformat()
device_asset_id = f"{file.filename}-{last_modified or 0}-{size}"
if db_lookup_checksum(checksum):
await send_progress(session_id, item_id, "duplicate", 100, "Duplicate (by checksum - local cache)")
return JSONResponse({"status": "duplicate", "id": None}, status_code=200)
if db_lookup_device_asset(device_asset_id):
await send_progress(session_id, item_id, "duplicate", 100, "Already uploaded from this device (local cache)")
return JSONResponse({"status": "duplicate", "id": None}, status_code=200)
# Invite token validation (if provided)
target_album_name: Optional[str] = None
@@ -734,7 +636,6 @@ async def api_upload(
i += 1
with open(save_path, "wb") as f:
f.write(raw)
db_insert_upload(checksum, file.filename, size, device_asset_id, None, created_iso)
await add_file_to_batch(file.filename, size, display_album_name, bool(invite_token))
await reset_telegram_debounce()
@@ -955,21 +856,6 @@ async def api_upload_chunk_complete(request: Request) -> JSONResponse:
file_like_name = name
file_size = len(raw)
checksum = sha1_hex(raw)
exif_created, exif_modified = read_exif_datetimes(raw)
created_at = exif_created or (datetime.fromtimestamp(last_modified / 1000) if last_modified else datetime.utcnow())
modified_at = exif_modified or created_at
created_iso = created_at.isoformat()
modified_iso = modified_at.isoformat()
device_asset_id = f"{file_like_name}-{last_modified or 0}-{file_size}"
# Local duplicate checks
if db_lookup_checksum(checksum):
await send_progress(session_id_local, item_id_local, "duplicate", 100, "Duplicate (by checksum - local cache)")
return JSONResponse({"status": "duplicate", "id": None}, status_code=200)
if db_lookup_device_asset(device_asset_id):
await send_progress(session_id_local, item_id_local, "duplicate", 100, "Already uploaded from this device (local cache)")
return JSONResponse({"status": "duplicate", "id": None}, status_code=200)
# Invite validation/gating mirrors api_upload
target_album_name: Optional[str] = None
@@ -1070,7 +956,6 @@ async def api_upload_chunk_complete(request: Request) -> JSONResponse:
i += 1
with open(save_path, "wb") as f:
f.write(raw)
db_insert_upload(checksum, file_like_name, file_size, device_asset_id, None, created_iso)
await add_file_to_batch(file_like_name, file_size, display_album_name, bool(invite_token))
msg = f"Saved to {display_album_name}/{os.path.basename(save_path)}"