ML data (face detections, CLIP embeddings) is now fetched by default in backup-metadata. Use --no-ml to skip it. EXIF extraction (which requires downloading every file) remains opt-in via --exif. --all is an alias for --exif.
225 lines
7.3 KiB
TypeScript
225 lines
7.3 KiB
TypeScript
import { gunzipSync } from "node:zlib";
|
|
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
|
import { join } from "node:path";
|
|
import { tmpdir } from "node:os";
|
|
import sharp from "sharp";
|
|
import exifReader from "exif-reader";
|
|
import type { Client } from "./client.js";
|
|
import { decryptBlob, fromBase64 } from "./crypto/index.js";
|
|
import type { EnteFile } from "./model/types.js";
|
|
|
|
export type ProgressCallback = (message: string) => void;
|
|
|
|
export interface MetadataBackupOptions {
|
|
mlData?: boolean;
|
|
exif?: boolean;
|
|
onProgress?: ProgressCallback;
|
|
}
|
|
|
|
const sanitizePath = (name: string): string =>
|
|
name.replace(/[/\\:*?"<>|]/g, "_").replace(/^\.+/, "_");
|
|
|
|
interface RawRemoteFileData {
|
|
fileID: number;
|
|
encryptedData: string;
|
|
decryptionHeader: string;
|
|
updatedAt?: number;
|
|
}
|
|
|
|
const fetchMLDataForFiles = async (
|
|
client: Client,
|
|
fileIDs: number[],
|
|
fileKeys: Map<number, Uint8Array>,
|
|
): Promise<Map<number, Record<string, unknown>>> => {
|
|
const api = client.getApiClient();
|
|
const result = new Map<number, Record<string, unknown>>();
|
|
const batchSize = 200;
|
|
|
|
for (let i = 0; i < fileIDs.length; i += batchSize) {
|
|
const batch = fileIDs.slice(i, i + batchSize);
|
|
const { data } = await api.postJSON<{ data: RawRemoteFileData[] }>(
|
|
"/files/data/fetch",
|
|
{ type: "mldata", fileIDs: batch },
|
|
);
|
|
|
|
for (const entry of data ?? []) {
|
|
const key = fileKeys.get(entry.fileID);
|
|
if (!key) continue;
|
|
try {
|
|
const decrypted = decryptBlob(
|
|
fromBase64(entry.encryptedData),
|
|
fromBase64(entry.decryptionHeader),
|
|
key,
|
|
);
|
|
const jsonStr = gunzipSync(Buffer.from(decrypted)).toString(
|
|
"utf-8",
|
|
);
|
|
result.set(entry.fileID, JSON.parse(jsonStr));
|
|
} catch {
|
|
// Corrupted ML data for this file; skip it
|
|
}
|
|
}
|
|
}
|
|
return result;
|
|
};
|
|
|
|
const extractExif = async (
|
|
client: Client,
|
|
file: EnteFile,
|
|
): Promise<Record<string, unknown> | undefined> => {
|
|
const tmpDir = mkdtempSync(join(tmpdir(), "quak-exif-"));
|
|
try {
|
|
const origPath = join(tmpDir, "original");
|
|
await client.downloadFile(file, origPath);
|
|
const meta = await sharp(origPath).metadata();
|
|
|
|
const result: Record<string, unknown> = {
|
|
format: meta.format,
|
|
width: meta.width,
|
|
height: meta.height,
|
|
space: meta.space,
|
|
channels: meta.channels,
|
|
depth: meta.depth,
|
|
density: meta.density,
|
|
chromaSubSampling: meta.chromaSubSampling,
|
|
isProgressive: meta.isProgressive,
|
|
hasProfile: meta.hasProfile,
|
|
hasAlpha: meta.hasAlpha,
|
|
orientation: meta.orientation,
|
|
};
|
|
|
|
if (meta.exif) {
|
|
try {
|
|
result.exif = exifReader(meta.exif);
|
|
} catch {
|
|
// Malformed EXIF; store the raw bytes as base64 instead
|
|
result.exifRaw = meta.exif.toString("base64");
|
|
}
|
|
}
|
|
if (meta.iptc) result.iptcRaw = meta.iptc.toString("base64");
|
|
if (meta.xmp) {
|
|
try {
|
|
result.xmp = Buffer.from(meta.xmp).toString("utf-8");
|
|
} catch {
|
|
result.xmpRaw = meta.xmp.toString("base64");
|
|
}
|
|
}
|
|
if (meta.icc) result.iccRaw = meta.icc.toString("base64");
|
|
|
|
return result;
|
|
} catch {
|
|
return undefined;
|
|
} finally {
|
|
rmSync(tmpDir, { recursive: true, force: true });
|
|
}
|
|
};
|
|
|
|
export const runMetadataBackup = async (
|
|
client: Client,
|
|
outDir: string,
|
|
opts?: MetadataBackupOptions,
|
|
): Promise<void> => {
|
|
const log = opts?.onProgress ?? (() => {});
|
|
const wantML = opts?.mlData ?? true;
|
|
const wantExif = opts?.exif ?? false;
|
|
|
|
mkdirSync(outDir, { recursive: true });
|
|
mkdirSync(join(outDir, "collections"), { recursive: true });
|
|
|
|
const { email, userID } = client.whoami();
|
|
writeFileSync(
|
|
join(outDir, "account.json"),
|
|
JSON.stringify({ email, userID }, null, 2),
|
|
);
|
|
|
|
log("Fetching collections...");
|
|
const collections = await client.listCollections();
|
|
|
|
const allFiles: { file: EnteFile; colDirName: string }[] = [];
|
|
const fileKeys = new Map<number, Uint8Array>();
|
|
const seenFileIDs = new Set<number>();
|
|
|
|
for (const col of collections) {
|
|
const dirName = `${col.id}-${sanitizePath(col.name || "unnamed")}`;
|
|
const colDir = join(outDir, "collections", dirName);
|
|
mkdirSync(colDir, { recursive: true });
|
|
|
|
const collectionMeta: Record<string, unknown> = {
|
|
id: col.id,
|
|
name: col.name,
|
|
type: col.type,
|
|
ownerID: col.ownerID,
|
|
isShared: col.isShared,
|
|
updationTime: col.updationTime,
|
|
};
|
|
if (col.magicMetadata) collectionMeta.magicMetadata = col.magicMetadata;
|
|
if (col.pubMagicMetadata)
|
|
collectionMeta.pubMagicMetadata = col.pubMagicMetadata;
|
|
if (col.sharedMagicMetadata)
|
|
collectionMeta.sharedMagicMetadata = col.sharedMagicMetadata;
|
|
|
|
writeFileSync(
|
|
join(colDir, "_collection.json"),
|
|
JSON.stringify(collectionMeta, null, 2),
|
|
);
|
|
|
|
log(`[${col.name}] Fetching files...`);
|
|
const files = await client.listFiles(col.id, col.key);
|
|
log(`[${col.name}] ${files.length} file(s)`);
|
|
|
|
for (const file of files) {
|
|
allFiles.push({ file, colDirName: dirName });
|
|
if (!seenFileIDs.has(file.id)) {
|
|
fileKeys.set(file.id, file.key);
|
|
seenFileIDs.add(file.id);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Fetch ML data in bulk if requested
|
|
let mlDataMap = new Map<number, Record<string, unknown>>();
|
|
if (wantML) {
|
|
log("Fetching ML data (face detections, CLIP embeddings)...");
|
|
mlDataMap = await fetchMLDataForFiles(
|
|
client,
|
|
[...fileKeys.keys()],
|
|
fileKeys,
|
|
);
|
|
log(`Got ML data for ${mlDataMap.size} file(s)`);
|
|
}
|
|
|
|
// Write per-file JSON (with optional ML data and EXIF)
|
|
const writtenFileIDs = new Set<number>();
|
|
for (const { file, colDirName } of allFiles) {
|
|
const colDir = join(outDir, "collections", colDirName);
|
|
|
|
const fileMeta: Record<string, unknown> = {
|
|
id: file.id,
|
|
collectionID: file.collectionID,
|
|
ownerID: file.ownerID,
|
|
metadata: file.metadata,
|
|
updationTime: file.updationTime,
|
|
};
|
|
if (file.magicMetadata) fileMeta.magicMetadata = file.magicMetadata;
|
|
if (file.pubMagicMetadata)
|
|
fileMeta.pubMagicMetadata = file.pubMagicMetadata;
|
|
|
|
const ml = mlDataMap.get(file.id);
|
|
if (ml) fileMeta.mlData = ml;
|
|
|
|
if (wantExif && !writtenFileIDs.has(file.id)) {
|
|
log(`[${file.metadata.title}] Extracting EXIF...`);
|
|
const exifData = await extractExif(client, file);
|
|
if (exifData) fileMeta.imageMetadata = exifData;
|
|
}
|
|
writtenFileIDs.add(file.id);
|
|
|
|
writeFileSync(
|
|
join(colDir, `${file.id}.json`),
|
|
JSON.stringify(fileMeta, null, 2),
|
|
);
|
|
}
|
|
|
|
log("Metadata backup complete.");
|
|
};
|