AutistMask/src/shared/phishingDomains.js

// Domain-based phishing detection using MetaMask's eth-phishing-detect blocklist.
// Fetches the blocklist at runtime, caches it in memory, and checks hostnames.
//
// The blocklist source:
// https://github.com/MetaMask/eth-phishing-detect  (src/config.json)
//
// The config uses { blacklist: [...], whitelist: [...], fuzzylist: [...] }.
// We check exact hostname and parent-domain matches against the blacklist,
// with whitelist overrides.

const BLOCKLIST_URL =
    "https://raw.githubusercontent.com/MetaMask/eth-phishing-detect/main/src/config.json";

const CACHE_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours

let blacklistSet = new Set();
let whitelistSet = new Set();
let lastFetchTime = 0;
let fetchPromise = null;

/**
 * Load a pre-parsed config into the in-memory sets.
 * Used for testing and for loading from cache.
 *
 * @param {{ blacklist?: string[], whitelist?: string[] }} config
 */
function loadConfig(config) {
    blacklistSet = new Set(
        (config.blacklist || []).map((d) => d.toLowerCase()),
    );
    whitelistSet = new Set(
        (config.whitelist || []).map((d) => d.toLowerCase()),
    );
    lastFetchTime = Date.now();
}

/**
 * Generate hostname variants for subdomain matching.
 * "sub.evil.com" yields ["sub.evil.com", "evil.com"].
 *
 * @param {string} hostname
 * @returns {string[]}
 */
function hostnameVariants(hostname) {
    const h = hostname.toLowerCase();
    const variants = [h];
    const parts = h.split(".");
    // Parent domains: a.b.c.d -> b.c.d, c.d
    for (let i = 1; i < parts.length - 1; i++) {
        variants.push(parts.slice(i).join("."));
    }
    return variants;
}

/**
 * Check if a hostname is on the phishing blocklist.
 * Checks exact hostname and all parent domains.
 * Whitelisted domains are never flagged.
 *
 * @param {string} hostname - The hostname to check.
 * @returns {boolean}
 */
function isPhishingDomain(hostname) {
    if (!hostname) return false;
    const variants = hostnameVariants(hostname);
    // Whitelist takes priority
    for (const v of variants) {
        if (whitelistSet.has(v)) return false;
    }
    for (const v of variants) {
        if (blacklistSet.has(v)) return true;
    }
    return false;
}

/**
 * Fetch the latest blocklist from the MetaMask repo.
 * De-duplicates concurrent fetches. Results are cached for CACHE_TTL_MS.
 *
 * @returns {Promise<void>}
 */
async function updatePhishingList() {
    // Skip if recently fetched
    if (Date.now() - lastFetchTime < CACHE_TTL_MS && blacklistSet.size > 0) {
        return;
    }

    // De-duplicate concurrent calls
    if (fetchPromise) return fetchPromise;

    fetchPromise = (async () => {
        try {
            const resp = await fetch(BLOCKLIST_URL);
            if (!resp.ok) throw new Error("HTTP " + resp.status);
            const config = await resp.json();
            loadConfig(config);
        } catch {
            // Silently fail — we'll retry next time.
        } finally {
            fetchPromise = null;
        }
    })();

    return fetchPromise;
}

/**
 * Return the current blocklist size (for diagnostics).
 *
 * @returns {number}
 */
function getBlocklistSize() {
    return blacklistSet.size;
}

/**
 * Reset internal state (for testing).
 */
function _reset() {
    blacklistSet = new Set();
    whitelistSet = new Set();
    lastFetchTime = 0;
    fetchPromise = null;
}

module.exports = {
    isPhishingDomain,
    updatePhishingList,
    loadConfig,
    getBlocklistSize,
    hostnameVariants,
    _reset,
};