Add --ml and --exif flags to backup-metadata
--ml fetches face detections and CLIP embeddings from the /files/data/fetch endpoint (type 'mldata'). Each blob is encrypted with the file's key and gzipped; we decrypt with decryptBlob, gunzip, and include the parsed JSON as 'mlData' in the per-file output. Fetched in batches of 200 file IDs. --exif downloads each file, runs sharp().metadata() to extract image properties (format, dimensions, color space, orientation), then parses the raw EXIF buffer with exif-reader for structured tags (lens, ISO, shutter, aperture, GPS altitude, etc.). Also captures raw IPTC, XMP, and ICC profile data. Included as 'imageMetadata' in the per-file output. Without either flag, behavior is unchanged (fast metadata-only dump). Adds exif-reader 2.0.3 as a runtime dependency. 3 new tests (ML data decrypted, ML data absent when flag not set, EXIF extraction). 119 total tests, all green.
This commit is contained in:
@@ -22,6 +22,7 @@
|
||||
* the output tree is correct and complete.
|
||||
*/
|
||||
|
||||
import { gzipSync } from "node:zlib";
|
||||
import {
|
||||
existsSync,
|
||||
mkdtempSync,
|
||||
@@ -39,7 +40,9 @@ import {
|
||||
toBase64,
|
||||
deriveKEK,
|
||||
deriveLoginSubkey,
|
||||
encryptBlob,
|
||||
} from "../../src/crypto/index.js";
|
||||
import sharp from "sharp";
|
||||
import { Client } from "../../src/client.js";
|
||||
import { runMetadataBackup } from "../../src/metadata-backup.js";
|
||||
import type { KeyAttributes } from "../../src/auth/types.js";
|
||||
@@ -56,6 +59,12 @@ interface MetaMockState {
|
||||
encryptedToken: string;
|
||||
collections: Record<string, unknown>[];
|
||||
filesByCollection: Record<number, Record<string, unknown>[]>;
|
||||
// For ML data and EXIF tests
|
||||
encryptedMLData: Record<
|
||||
number,
|
||||
{ encryptedData: string; decryptionHeader: string }
|
||||
>;
|
||||
fileCiphertexts: Record<number, Uint8Array>;
|
||||
}
|
||||
|
||||
let mock: MetaMockState;
|
||||
@@ -245,6 +254,58 @@ const buildMetaMock = async (): Promise<MetaMockState> => {
|
||||
updationTime: 1710000000000000,
|
||||
};
|
||||
|
||||
// Encrypt ML data for file 100 (gzipped JSON, encrypted with file key)
|
||||
const mlPayload = JSON.stringify({
|
||||
face: {
|
||||
version: 1,
|
||||
client: "test",
|
||||
width: 3000,
|
||||
height: 2000,
|
||||
faces: [
|
||||
{
|
||||
faceID: "face-abc",
|
||||
detection: {
|
||||
box: { x: 0.1, y: 0.2, width: 0.3, height: 0.4 },
|
||||
landmarks: [
|
||||
{ x: 0.15, y: 0.25 },
|
||||
{ x: 0.25, y: 0.25 },
|
||||
],
|
||||
},
|
||||
score: 0.98,
|
||||
blur: 12.5,
|
||||
embedding: [0.1, 0.2, 0.3],
|
||||
},
|
||||
],
|
||||
},
|
||||
clip: {
|
||||
version: 1,
|
||||
client: "test",
|
||||
embedding: [0.5, 0.6, 0.7],
|
||||
},
|
||||
});
|
||||
const gzipped = gzipSync(Buffer.from(mlPayload));
|
||||
const { header: mlHeader, ciphertext: mlCiphertext } = encryptBlob(
|
||||
new Uint8Array(gzipped),
|
||||
fk1,
|
||||
);
|
||||
|
||||
// Generate a real JPEG for EXIF extraction tests
|
||||
const tinyJpeg = await sharp({
|
||||
create: { width: 100, height: 80, channels: 3, background: "red" },
|
||||
})
|
||||
.jpeg({ quality: 80 })
|
||||
.toBuffer();
|
||||
const filePush1 =
|
||||
sodium.crypto_secretstream_xchacha20poly1305_init_push(fk1);
|
||||
const encFileBody1 = sodium.crypto_secretstream_xchacha20poly1305_push(
|
||||
filePush1.state,
|
||||
new Uint8Array(tinyJpeg),
|
||||
null,
|
||||
sodium.crypto_secretstream_xchacha20poly1305_TAG_FINAL,
|
||||
);
|
||||
// Patch rawFile1's file.decryptionHeader to match the push header
|
||||
rawFile1.file.decryptionHeader = toBase64(filePush1.header);
|
||||
|
||||
return {
|
||||
verifier,
|
||||
srpAttributes: {
|
||||
@@ -259,6 +320,13 @@ const buildMetaMock = async (): Promise<MetaMockState> => {
|
||||
encryptedToken: toBase64(encToken),
|
||||
collections: [rawColl1, rawColl2],
|
||||
filesByCollection: { 10: [rawFile1], 20: [rawFile2] },
|
||||
encryptedMLData: {
|
||||
100: {
|
||||
encryptedData: toBase64(mlCiphertext),
|
||||
decryptionHeader: toBase64(mlHeader),
|
||||
},
|
||||
},
|
||||
fileCiphertexts: { 100: encFileBody1 },
|
||||
};
|
||||
};
|
||||
|
||||
@@ -316,6 +384,29 @@ const buildMetaFetch = (m: MetaMockState) => {
|
||||
hasMore: false,
|
||||
});
|
||||
}
|
||||
if (path === "/files/data/fetch") {
|
||||
const body = JSON.parse(init?.body as string);
|
||||
const data = (body.fileIDs as number[])
|
||||
.filter((id: number) => m.encryptedMLData[id])
|
||||
.map((id: number) => ({
|
||||
fileID: id,
|
||||
...m.encryptedMLData[id],
|
||||
updatedAt: 1700000000000000,
|
||||
}));
|
||||
return json({ data });
|
||||
}
|
||||
if (
|
||||
url.includes("files.ente.io") ||
|
||||
path.startsWith("/files/download/")
|
||||
) {
|
||||
const parsed = new URL(url);
|
||||
const fileID = Number(
|
||||
parsed.searchParams.get("fileID") ?? path.split("/").pop(),
|
||||
);
|
||||
const ct = m.fileCiphertexts[fileID];
|
||||
if (ct) return new Response(ct, { status: 200 });
|
||||
return new Response("not found", { status: 404 });
|
||||
}
|
||||
return new Response("not found", { status: 404 });
|
||||
}) as typeof globalThis.fetch;
|
||||
};
|
||||
@@ -465,4 +556,82 @@ describe("quak backup-metadata", () => {
|
||||
);
|
||||
expect(account.email).toBe(TEST_EMAIL);
|
||||
});
|
||||
|
||||
it("fetches and decrypts ML data when --ml is set", async () => {
|
||||
const outDir = join(testDir, "ml-data");
|
||||
const client = await Client.login({
|
||||
email: TEST_EMAIL,
|
||||
password: TEST_PASSWORD,
|
||||
apiOptions: { fetch: buildMetaFetch(mock) },
|
||||
});
|
||||
|
||||
await runMetadataBackup(client, outDir, { mlData: true });
|
||||
|
||||
const collDirs = readdirSync(join(outDir, "collections"));
|
||||
const vacDir = collDirs.find((d) => d.includes("Vacation"))!;
|
||||
const fileMeta = JSON.parse(
|
||||
readFileSync(
|
||||
join(outDir, "collections", vacDir, "100.json"),
|
||||
"utf-8",
|
||||
),
|
||||
);
|
||||
|
||||
// ML data should be present and decrypted
|
||||
expect(fileMeta.mlData).toBeDefined();
|
||||
expect(fileMeta.mlData.face).toBeDefined();
|
||||
expect(fileMeta.mlData.face.faces.length).toBe(1);
|
||||
expect(fileMeta.mlData.face.faces[0].faceID).toBe("face-abc");
|
||||
expect(fileMeta.mlData.face.faces[0].score).toBeCloseTo(0.98);
|
||||
expect(fileMeta.mlData.face.faces[0].detection.box.x).toBeCloseTo(0.1);
|
||||
expect(fileMeta.mlData.clip).toBeDefined();
|
||||
expect(fileMeta.mlData.clip.embedding).toEqual([0.5, 0.6, 0.7]);
|
||||
});
|
||||
|
||||
it("does not include ML data when --ml is not set", async () => {
|
||||
const outDir = join(testDir, "no-ml");
|
||||
const client = await Client.login({
|
||||
email: TEST_EMAIL,
|
||||
password: TEST_PASSWORD,
|
||||
apiOptions: { fetch: buildMetaFetch(mock) },
|
||||
});
|
||||
|
||||
await runMetadataBackup(client, outDir);
|
||||
|
||||
const collDirs = readdirSync(join(outDir, "collections"));
|
||||
const vacDir = collDirs.find((d) => d.includes("Vacation"))!;
|
||||
const fileMeta = JSON.parse(
|
||||
readFileSync(
|
||||
join(outDir, "collections", vacDir, "100.json"),
|
||||
"utf-8",
|
||||
),
|
||||
);
|
||||
expect(fileMeta.mlData).toBeUndefined();
|
||||
});
|
||||
|
||||
it("extracts EXIF from downloaded files when --exif is set", async () => {
|
||||
const outDir = join(testDir, "exif-data");
|
||||
const client = await Client.login({
|
||||
email: TEST_EMAIL,
|
||||
password: TEST_PASSWORD,
|
||||
apiOptions: { fetch: buildMetaFetch(mock) },
|
||||
});
|
||||
|
||||
await runMetadataBackup(client, outDir, { exif: true });
|
||||
|
||||
const collDirs = readdirSync(join(outDir, "collections"));
|
||||
const vacDir = collDirs.find((d) => d.includes("Vacation"))!;
|
||||
const fileMeta = JSON.parse(
|
||||
readFileSync(
|
||||
join(outDir, "collections", vacDir, "100.json"),
|
||||
"utf-8",
|
||||
),
|
||||
);
|
||||
|
||||
// imageMetadata from sharp should be present
|
||||
expect(fileMeta.imageMetadata).toBeDefined();
|
||||
expect(fileMeta.imageMetadata.format).toBe("jpeg");
|
||||
expect(fileMeta.imageMetadata.width).toBe(100);
|
||||
expect(fileMeta.imageMetadata.height).toBe(80);
|
||||
expect(fileMeta.imageMetadata.channels).toBe(3);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user