import { gunzipSync } from "node:zlib"; import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; import { join } from "node:path"; import { tmpdir } from "node:os"; import sharp from "sharp"; import exifReader from "exif-reader"; import type { Client } from "./client.js"; import { decryptBlob, fromBase64 } from "./crypto/index.js"; import type { EnteFile } from "./model/types.js"; export type ProgressCallback = (message: string) => void; export interface MetadataBackupOptions { mlData?: boolean; exif?: boolean; onProgress?: ProgressCallback; } const sanitizePath = (name: string): string => name.replace(/[/\\:*?"<>|]/g, "_").replace(/^\.+/, "_"); interface RawRemoteFileData { fileID: number; encryptedData: string; decryptionHeader: string; updatedAt?: number; } const fetchMLDataForFiles = async ( client: Client, fileIDs: number[], fileKeys: Map, ): Promise>> => { const api = client.getApiClient(); const result = new Map>(); const batchSize = 200; for (let i = 0; i < fileIDs.length; i += batchSize) { const batch = fileIDs.slice(i, i + batchSize); const { data } = await api.postJSON<{ data: RawRemoteFileData[] }>( "/files/data/fetch", { type: "mldata", fileIDs: batch }, ); for (const entry of data ?? []) { const key = fileKeys.get(entry.fileID); if (!key) continue; try { const decrypted = decryptBlob( fromBase64(entry.encryptedData), fromBase64(entry.decryptionHeader), key, ); const jsonStr = gunzipSync(Buffer.from(decrypted)).toString( "utf-8", ); result.set(entry.fileID, JSON.parse(jsonStr)); } catch { // Corrupted ML data for this file; skip it } } } return result; }; const extractExif = async ( client: Client, file: EnteFile, ): Promise | undefined> => { const tmpDir = mkdtempSync(join(tmpdir(), "quak-exif-")); try { const origPath = join(tmpDir, "original"); await client.downloadFile(file, origPath); const meta = await sharp(origPath).metadata(); const result: Record = { format: meta.format, width: meta.width, height: meta.height, space: meta.space, channels: meta.channels, depth: meta.depth, density: meta.density, chromaSubSampling: meta.chromaSubSampling, isProgressive: meta.isProgressive, hasProfile: meta.hasProfile, hasAlpha: meta.hasAlpha, orientation: meta.orientation, }; if (meta.exif) { try { result.exif = exifReader(meta.exif); } catch { // Malformed EXIF; store the raw bytes as base64 instead result.exifRaw = meta.exif.toString("base64"); } } if (meta.iptc) result.iptcRaw = meta.iptc.toString("base64"); if (meta.xmp) { try { result.xmp = Buffer.from(meta.xmp).toString("utf-8"); } catch { result.xmpRaw = meta.xmp.toString("base64"); } } if (meta.icc) result.iccRaw = meta.icc.toString("base64"); return result; } catch { return undefined; } finally { rmSync(tmpDir, { recursive: true, force: true }); } }; export const runMetadataBackup = async ( client: Client, outDir: string, opts?: MetadataBackupOptions, ): Promise => { const log = opts?.onProgress ?? (() => {}); const wantML = opts?.mlData ?? true; const wantExif = opts?.exif ?? false; mkdirSync(outDir, { recursive: true }); mkdirSync(join(outDir, "collections"), { recursive: true }); const { email, userID } = client.whoami(); writeFileSync( join(outDir, "account.json"), JSON.stringify({ email, userID }, null, 2), ); log("Fetching collections..."); const collections = await client.listCollections(); const allFiles: { file: EnteFile; colDirName: string }[] = []; const fileKeys = new Map(); const seenFileIDs = new Set(); for (const col of collections) { const dirName = `${col.id}-${sanitizePath(col.name || "unnamed")}`; const colDir = join(outDir, "collections", dirName); mkdirSync(colDir, { recursive: true }); const collectionMeta: Record = { id: col.id, name: col.name, type: col.type, ownerID: col.ownerID, isShared: col.isShared, updationTime: col.updationTime, }; if (col.magicMetadata) collectionMeta.magicMetadata = col.magicMetadata; if (col.pubMagicMetadata) collectionMeta.pubMagicMetadata = col.pubMagicMetadata; if (col.sharedMagicMetadata) collectionMeta.sharedMagicMetadata = col.sharedMagicMetadata; writeFileSync( join(colDir, "_collection.json"), JSON.stringify(collectionMeta, null, 2), ); log(`[${col.name}] Fetching files...`); const files = await client.listFiles(col.id, col.key); log(`[${col.name}] ${files.length} file(s)`); for (const file of files) { allFiles.push({ file, colDirName: dirName }); if (!seenFileIDs.has(file.id)) { fileKeys.set(file.id, file.key); seenFileIDs.add(file.id); } } } // Fetch ML data in bulk if requested let mlDataMap = new Map>(); if (wantML) { log("Fetching ML data (face detections, CLIP embeddings)..."); mlDataMap = await fetchMLDataForFiles( client, [...fileKeys.keys()], fileKeys, ); log(`Got ML data for ${mlDataMap.size} file(s)`); } // Write per-file JSON (with optional ML data and EXIF) const writtenFileIDs = new Set(); for (const { file, colDirName } of allFiles) { const colDir = join(outDir, "collections", colDirName); const fileMeta: Record = { id: file.id, collectionID: file.collectionID, ownerID: file.ownerID, metadata: file.metadata, updationTime: file.updationTime, }; if (file.magicMetadata) fileMeta.magicMetadata = file.magicMetadata; if (file.pubMagicMetadata) fileMeta.pubMagicMetadata = file.pubMagicMetadata; const ml = mlDataMap.get(file.id); if (ml) fileMeta.mlData = ml; if (wantExif && !writtenFileIDs.has(file.id)) { log(`[${file.metadata.title}] Extracting EXIF...`); const exifData = await extractExif(client, file); if (exifData) fileMeta.imageMetadata = exifData; } writtenFileIDs.add(file.id); writeFileSync( join(colDir, `${file.id}.json`), JSON.stringify(fileMeta, null, 2), ); } log("Metadata backup complete."); };