ML data always included in backup-metadata; remove --no-ml

ML metadata (face detections, CLIP embeddings) is not a separate
category from the rest of the metadata. It is always fetched and
included. The only opt-in is --exif (or --all) which requires
downloading every file for EXIF extraction.
This commit is contained in:
2026-06-09 17:42:30 -04:00
parent 21a1a78f07
commit 5e6069f574
3 changed files with 17 additions and 73 deletions

View File

@@ -340,26 +340,19 @@ program
"Dump all decrypted account metadata to a directory of JSON files", "Dump all decrypted account metadata to a directory of JSON files",
) )
.argument("<dir>", "Output directory") .argument("<dir>", "Output directory")
.option("--no-ml", "Skip ML data (face detections, CLIP embeddings)")
.option( .option(
"--exif", "--exif",
"Download each file and extract full EXIF/IPTC/XMP metadata (slow)", "Download each file and extract full EXIF/IPTC/XMP metadata (slow)",
) )
.option("--all", "Alias for --exif") .option("--all", "Alias for --exif")
.action( .action(async (dir: string, opts: { exif?: boolean; all?: boolean }) => {
async (
dir: string,
opts: { ml?: boolean; exif?: boolean; all?: boolean },
) => {
await init(); await init();
const client = requireSession(); const client = requireSession();
await runMetadataBackup(client, dir, { await runMetadataBackup(client, dir, {
mlData: opts.ml,
exif: opts.exif || opts.all, exif: opts.exif || opts.all,
onProgress: (msg) => stderr.write(msg + "\n"), onProgress: (msg) => stderr.write(msg + "\n"),
}); });
}, });
);
program program
.command("backup") .command("backup")

View File

@@ -11,7 +11,6 @@ import type { EnteFile } from "./model/types.js";
export type ProgressCallback = (message: string) => void; export type ProgressCallback = (message: string) => void;
export interface MetadataBackupOptions { export interface MetadataBackupOptions {
mlData?: boolean;
exif?: boolean; exif?: boolean;
onProgress?: ProgressCallback; onProgress?: ProgressCallback;
} }
@@ -120,7 +119,6 @@ export const runMetadataBackup = async (
opts?: MetadataBackupOptions, opts?: MetadataBackupOptions,
): Promise<void> => { ): Promise<void> => {
const log = opts?.onProgress ?? (() => {}); const log = opts?.onProgress ?? (() => {});
const wantML = opts?.mlData ?? true;
const wantExif = opts?.exif ?? false; const wantExif = opts?.exif ?? false;
mkdirSync(outDir, { recursive: true }); mkdirSync(outDir, { recursive: true });
@@ -176,17 +174,13 @@ export const runMetadataBackup = async (
} }
} }
// Fetch ML data in bulk if requested
let mlDataMap = new Map<number, Record<string, unknown>>();
if (wantML) {
log("Fetching ML data (face detections, CLIP embeddings)..."); log("Fetching ML data (face detections, CLIP embeddings)...");
mlDataMap = await fetchMLDataForFiles( const mlDataMap = await fetchMLDataForFiles(
client, client,
[...fileKeys.keys()], [...fileKeys.keys()],
fileKeys, fileKeys,
); );
log(`Got ML data for ${mlDataMap.size} file(s)`); log(`Got ML data for ${mlDataMap.size} file(s)`);
}
// Write per-file JSON (with optional ML data and EXIF) // Write per-file JSON (with optional ML data and EXIF)
const writtenFileIDs = new Set<number>(); const writtenFileIDs = new Set<number>();

View File

@@ -557,7 +557,7 @@ describe("quak backup-metadata", () => {
expect(account.email).toBe(TEST_EMAIL); expect(account.email).toBe(TEST_EMAIL);
}); });
it("fetches and decrypts ML data when --ml is set", async () => { it("fetches and decrypts ML data by default", async () => {
const outDir = join(testDir, "ml-data"); const outDir = join(testDir, "ml-data");
const client = await Client.login({ const client = await Client.login({
email: TEST_EMAIL, email: TEST_EMAIL,
@@ -565,7 +565,7 @@ describe("quak backup-metadata", () => {
apiOptions: { fetch: buildMetaFetch(mock) }, apiOptions: { fetch: buildMetaFetch(mock) },
}); });
await runMetadataBackup(client, outDir, { mlData: true }); await runMetadataBackup(client, outDir);
const collDirs = readdirSync(join(outDir, "collections")); const collDirs = readdirSync(join(outDir, "collections"));
const vacDir = collDirs.find((d) => d.includes("Vacation"))!; const vacDir = collDirs.find((d) => d.includes("Vacation"))!;
@@ -587,49 +587,6 @@ describe("quak backup-metadata", () => {
expect(fileMeta.mlData.clip.embedding).toEqual([0.5, 0.6, 0.7]); expect(fileMeta.mlData.clip.embedding).toEqual([0.5, 0.6, 0.7]);
}); });
it("includes ML data by default", async () => {
const outDir = join(testDir, "ml-default");
const client = await Client.login({
email: TEST_EMAIL,
password: TEST_PASSWORD,
apiOptions: { fetch: buildMetaFetch(mock) },
});
await runMetadataBackup(client, outDir);
const collDirs = readdirSync(join(outDir, "collections"));
const vacDir = collDirs.find((d) => d.includes("Vacation"))!;
const fileMeta = JSON.parse(
readFileSync(
join(outDir, "collections", vacDir, "100.json"),
"utf-8",
),
);
expect(fileMeta.mlData).toBeDefined();
expect(fileMeta.mlData.face.faces[0].faceID).toBe("face-abc");
});
it("excludes ML data when mlData: false", async () => {
const outDir = join(testDir, "no-ml");
const client = await Client.login({
email: TEST_EMAIL,
password: TEST_PASSWORD,
apiOptions: { fetch: buildMetaFetch(mock) },
});
await runMetadataBackup(client, outDir, { mlData: false });
const collDirs = readdirSync(join(outDir, "collections"));
const vacDir = collDirs.find((d) => d.includes("Vacation"))!;
const fileMeta = JSON.parse(
readFileSync(
join(outDir, "collections", vacDir, "100.json"),
"utf-8",
),
);
expect(fileMeta.mlData).toBeUndefined();
});
it("extracts EXIF from downloaded files when --exif is set", async () => { it("extracts EXIF from downloaded files when --exif is set", async () => {
const outDir = join(testDir, "exif-data"); const outDir = join(testDir, "exif-data");
const client = await Client.login({ const client = await Client.login({