feat: add Etherscan label scraping and MetaMask phishing domain blocklist

- Add etherscanLabels module: scrapes Etherscan address pages for phishing/scam labels (Fake_Phishing*, Exploiter, scam warnings). Integrated as best-effort async check in addressWarnings. - Add phishingDomains module: fetches MetaMask's eth-phishing-detect blocklist (~231K domains) at runtime, caches in memory, refreshes every 24h. Checks hostnames with subdomain matching and whitelist overrides. - Integrate domain phishing checks into all approval flows: connection requests, transaction approvals, and signature requests show a prominent red warning banner when the requesting site is on the MetaMask blocklist. - Add unit tests for both modules (12 tests for etherscanLabels parsing, 15 tests for phishingDomains matching). Closes #114
2026-03-01 05:03:39 -08:00
parent bf01ae6f4d
commit e08b409043
8 changed files with 594 additions and 0 deletions
--- a/src/shared/phishingDomains.js
+++ b/src/shared/phishingDomains.js
@@ -0,0 +1,133 @@
+// Domain-based phishing detection using MetaMask's eth-phishing-detect blocklist.
+// Fetches the blocklist at runtime, caches it in memory, and checks hostnames.
+//
+// The blocklist source:
+// https://github.com/MetaMask/eth-phishing-detect  (src/config.json)
+//
+// The config uses { blacklist: [...], whitelist: [...], fuzzylist: [...] }.
+// We check exact hostname and parent-domain matches against the blacklist,
+// with whitelist overrides.
+
+const BLOCKLIST_URL =
+    "https://raw.githubusercontent.com/MetaMask/eth-phishing-detect/main/src/config.json";
+
+const CACHE_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
+
+let blacklistSet = new Set();
+let whitelistSet = new Set();
+let lastFetchTime = 0;
+let fetchPromise = null;
+
+/**
+ * Load a pre-parsed config into the in-memory sets.
+ * Used for testing and for loading from cache.
+ *
+ * @param {{ blacklist?: string[], whitelist?: string[] }} config
+ */
+function loadConfig(config) {
+    blacklistSet = new Set(
+        (config.blacklist || []).map((d) => d.toLowerCase()),
+    );
+    whitelistSet = new Set(
+        (config.whitelist || []).map((d) => d.toLowerCase()),
+    );
+    lastFetchTime = Date.now();
+}
+
+/**
+ * Generate hostname variants for subdomain matching.
+ * "sub.evil.com" yields ["sub.evil.com", "evil.com"].
+ *
+ * @param {string} hostname
+ * @returns {string[]}
+ */
+function hostnameVariants(hostname) {
+    const h = hostname.toLowerCase();
+    const variants = [h];
+    const parts = h.split(".");
+    // Parent domains: a.b.c.d -> b.c.d, c.d
+    for (let i = 1; i < parts.length - 1; i++) {
+        variants.push(parts.slice(i).join("."));
+    }
+    return variants;
+}
+
+/**
+ * Check if a hostname is on the phishing blocklist.
+ * Checks exact hostname and all parent domains.
+ * Whitelisted domains are never flagged.
+ *
+ * @param {string} hostname - The hostname to check.
+ * @returns {boolean}
+ */
+function isPhishingDomain(hostname) {
+    if (!hostname) return false;
+    const variants = hostnameVariants(hostname);
+    // Whitelist takes priority
+    for (const v of variants) {
+        if (whitelistSet.has(v)) return false;
+    }
+    for (const v of variants) {
+        if (blacklistSet.has(v)) return true;
+    }
+    return false;
+}
+
+/**
+ * Fetch the latest blocklist from the MetaMask repo.
+ * De-duplicates concurrent fetches. Results are cached for CACHE_TTL_MS.
+ *
+ * @returns {Promise<void>}
+ */
+async function updatePhishingList() {
+    // Skip if recently fetched
+    if (Date.now() - lastFetchTime < CACHE_TTL_MS && blacklistSet.size > 0) {
+        return;
+    }
+
+    // De-duplicate concurrent calls
+    if (fetchPromise) return fetchPromise;
+
+    fetchPromise = (async () => {
+        try {
+            const resp = await fetch(BLOCKLIST_URL);
+            if (!resp.ok) throw new Error("HTTP " + resp.status);
+            const config = await resp.json();
+            loadConfig(config);
+        } catch {
+            // Silently fail — we'll retry next time.
+        } finally {
+            fetchPromise = null;
+        }
+    })();
+
+    return fetchPromise;
+}
+
+/**
+ * Return the current blocklist size (for diagnostics).
+ *
+ * @returns {number}
+ */
+function getBlocklistSize() {
+    return blacklistSet.size;
+}
+
+/**
+ * Reset internal state (for testing).
+ */
+function _reset() {
+    blacklistSet = new Set();
+    whitelistSet = new Set();
+    lastFetchTime = 0;
+    fetchPromise = null;
+}
+
+module.exports = {
+    isPhishingDomain,
+    updatePhishingList,
+    loadConfig,
+    getBlocklistSize,
+    hostnameVariants,
+    _reset,
+};