refactor: vendor phishing blocklist, delta-only in-memory updates
All checks were successful
check / check (push) Successful in 25s

Vendor the MetaMask eth-phishing-detect config.json (231k domains) into
src/data/phishing-domains.json as the baseline blocklist shipped with
the extension.

On 24h refresh, only the delta (new domains not in the vendored snapshot)
is kept in memory. Domain checks hit the in-memory delta first (fresh
scam sites), then binary-search the vendored sorted array.

If the delta is under 256 KiB it is persisted to chrome.storage.local
so it survives service-worker restarts without re-fetching.

Removes the previous approach of downloading and holding the full
blocklist in memory as a Set.
This commit is contained in:
2026-03-01 07:33:10 -08:00
parent b8d81a4c8a
commit 0d06df6cbe
3 changed files with 231715 additions and 80 deletions

View File

@@ -2,11 +2,14 @@ const {
isPhishingDomain,
loadConfig,
getBlocklistSize,
getDeltaSize,
hostnameVariants,
binarySearch,
_reset,
} = require("../src/shared/phishingDomains");
// Reset state before each test to avoid cross-test contamination.
// The vendored baseline is loaded automatically via require().
// _reset() clears only the delta state, not the vendored baseline.
beforeEach(() => {
_reset();
});
@@ -39,8 +42,54 @@ describe("phishingDomains", () => {
});
});
describe("loadConfig + isPhishingDomain", () => {
test("detects exact blacklisted domain", () => {
describe("binarySearch", () => {
const sorted = ["alpha.com", "beta.com", "gamma.com", "zeta.com"];
test("finds existing elements", () => {
expect(binarySearch(sorted, "alpha.com")).toBe(true);
expect(binarySearch(sorted, "gamma.com")).toBe(true);
expect(binarySearch(sorted, "zeta.com")).toBe(true);
});
test("returns false for missing elements", () => {
expect(binarySearch(sorted, "aaa.com")).toBe(false);
expect(binarySearch(sorted, "delta.com")).toBe(false);
expect(binarySearch(sorted, "zzz.com")).toBe(false);
});
test("handles empty array", () => {
expect(binarySearch([], "anything")).toBe(false);
});
test("handles single-element array", () => {
expect(binarySearch(["only.com"], "only.com")).toBe(true);
expect(binarySearch(["only.com"], "other.com")).toBe(false);
});
});
describe("vendored baseline detection", () => {
// These tests verify that the vendored phishing-domains.json
// is loaded and searchable without any delta loaded.
test("getBlocklistSize reflects vendored list (no delta)", () => {
// The vendored list has 231k+ domains; delta is empty after reset.
expect(getBlocklistSize()).toBeGreaterThan(200000);
expect(getDeltaSize()).toBe(0);
});
test("returns false for clean domains against vendored list", () => {
expect(isPhishingDomain("google.com")).toBe(false);
expect(isPhishingDomain("github.com")).toBe(false);
});
test("returns false for empty/null hostname", () => {
expect(isPhishingDomain("")).toBe(false);
expect(isPhishingDomain(null)).toBe(false);
});
});
describe("delta (loadConfig) + isPhishingDomain", () => {
test("detects domains loaded into delta via loadConfig", () => {
loadConfig({
blacklist: ["evil-phishing.com", "scam-swap.xyz"],
whitelist: [],
@@ -49,16 +98,7 @@ describe("phishingDomains", () => {
expect(isPhishingDomain("scam-swap.xyz")).toBe(true);
});
test("returns false for clean domains", () => {
loadConfig({
blacklist: ["evil-phishing.com"],
whitelist: [],
});
expect(isPhishingDomain("etherscan.io")).toBe(false);
expect(isPhishingDomain("uniswap.org")).toBe(false);
});
test("detects subdomain of blacklisted domain", () => {
test("detects subdomain of delta-blacklisted domain", () => {
loadConfig({
blacklist: ["evil-phishing.com"],
whitelist: [],
@@ -67,7 +107,7 @@ describe("phishingDomains", () => {
expect(isPhishingDomain("sub.app.evil-phishing.com")).toBe(true);
});
test("whitelist overrides blacklist", () => {
test("delta whitelist overrides delta blacklist", () => {
loadConfig({
blacklist: ["metamask.io"],
whitelist: ["metamask.io"],
@@ -75,7 +115,7 @@ describe("phishingDomains", () => {
expect(isPhishingDomain("metamask.io")).toBe(false);
});
test("whitelist on parent domain overrides blacklist", () => {
test("delta whitelist on parent domain overrides blacklist", () => {
loadConfig({
blacklist: ["sub.legit.com"],
whitelist: ["legit.com"],
@@ -83,7 +123,7 @@ describe("phishingDomains", () => {
expect(isPhishingDomain("sub.legit.com")).toBe(false);
});
test("case-insensitive matching", () => {
test("case-insensitive matching in delta", () => {
loadConfig({
blacklist: ["Evil-Phishing.COM"],
whitelist: [],
@@ -92,30 +132,15 @@ describe("phishingDomains", () => {
expect(isPhishingDomain("EVIL-PHISHING.COM")).toBe(true);
});
test("returns false for empty/null hostname", () => {
loadConfig({
blacklist: ["evil.com"],
whitelist: [],
});
expect(isPhishingDomain("")).toBe(false);
expect(isPhishingDomain(null)).toBe(false);
});
test("getBlocklistSize reflects loaded config", () => {
test("getDeltaSize reflects loaded delta", () => {
loadConfig({
blacklist: ["a.com", "b.com", "c.com"],
whitelist: ["d.com"],
});
expect(getBlocklistSize()).toBe(3);
expect(getDeltaSize()).toBe(3);
});
test("handles config with no blacklist/whitelist keys", () => {
loadConfig({});
expect(isPhishingDomain("anything.com")).toBe(false);
expect(getBlocklistSize()).toBe(0);
});
test("re-loading config replaces previous data", () => {
test("re-loading config replaces previous delta", () => {
loadConfig({
blacklist: ["old-scam.com"],
whitelist: [],
@@ -129,10 +154,15 @@ describe("phishingDomains", () => {
expect(isPhishingDomain("old-scam.com")).toBe(false);
expect(isPhishingDomain("new-scam.com")).toBe(true);
});
test("handles config with no blacklist/whitelist keys", () => {
loadConfig({});
expect(getDeltaSize()).toBe(0);
});
});
describe("real-world MetaMask blocklist patterns", () => {
test("detects known phishing domains from MetaMask list", () => {
describe("real-world MetaMask blocklist patterns (via delta)", () => {
test("detects known phishing domains loaded as delta", () => {
loadConfig({
blacklist: [
"uniswap-trade.web.app",
@@ -146,21 +176,42 @@ describe("phishingDomains", () => {
expect(isPhishingDomain("blast-pools.pages.dev")).toBe(true);
});
test("does not flag legitimate domains whitelisted by MetaMask", () => {
test("delta whitelist overrides vendored blacklist entries", () => {
// If a domain is in the vendored blacklist but a fresh whitelist
// update adds it, the whitelist should win.
loadConfig({
blacklist: ["opensea.pro"],
whitelist: [
"opensea.io",
"metamask.io",
"etherscan.io",
"opensea.pro",
],
blacklist: [],
whitelist: ["opensea.io", "metamask.io", "etherscan.io"],
});
expect(isPhishingDomain("opensea.io")).toBe(false);
expect(isPhishingDomain("metamask.io")).toBe(false);
expect(isPhishingDomain("etherscan.io")).toBe(false);
// opensea.pro is both blacklisted and whitelisted — whitelist wins
expect(isPhishingDomain("opensea.pro")).toBe(false);
});
});
describe("delta + vendored interaction", () => {
test("delta blacklist entries are found even with empty vendored match", () => {
// This domain is (almost certainly) not in the vendored list
const uniqueDomain =
"test-unique-domain-not-in-vendored-" +
Date.now() +
".example.com";
expect(isPhishingDomain(uniqueDomain)).toBe(false);
loadConfig({
blacklist: [uniqueDomain],
whitelist: [],
});
expect(isPhishingDomain(uniqueDomain)).toBe(true);
});
test("getBlocklistSize includes both vendored and delta", () => {
const baseSize = getBlocklistSize();
loadConfig({
blacklist: ["new-a.com", "new-b.com"],
whitelist: [],
});
expect(getBlocklistSize()).toBe(baseSize + 2);
});
});
});