feat: add Etherscan label scraping and MetaMask phishing domain blocklist

- Add etherscanLabels module: scrapes Etherscan address pages for
  phishing/scam labels (Fake_Phishing*, Exploiter, scam warnings).
  Integrated as best-effort async check in addressWarnings.

- Add phishingDomains module: fetches MetaMask's eth-phishing-detect
  blocklist (~231K domains) at runtime, caches in memory, refreshes
  every 24h. Checks hostnames with subdomain matching and whitelist
  overrides.

- Integrate domain phishing checks into all approval flows:
  connection requests, transaction approvals, and signature requests
  show a prominent red warning banner when the requesting site is on
  the MetaMask blocklist.

- Add unit tests for both modules (12 tests for etherscanLabels
  parsing, 15 tests for phishingDomains matching).

Closes #114
This commit is contained in:
user
2026-03-01 05:03:39 -08:00
parent bf01ae6f4d
commit e08b409043
8 changed files with 594 additions and 0 deletions

View File

@@ -0,0 +1,100 @@
const { parseEtherscanPage } = require("../src/shared/etherscanLabels");
describe("etherscanLabels", () => {
describe("parseEtherscanPage", () => {
test("detects Fake_Phishing label in title", () => {
const html = `<html><head><title>Fake_Phishing184810 | Address: 0x00000c07...3ea470000 | Etherscan</title></head><body></body></html>`;
const result = parseEtherscanPage(html);
expect(result.label).toBe("Fake_Phishing184810");
expect(result.isPhishing).toBe(true);
expect(result.warning).toContain("Fake_Phishing184810");
expect(result.warning).toContain("Phish/Hack");
});
test("detects Fake_Phishing with different number", () => {
const html = `<html><head><title>Fake_Phishing5169 | Address: 0x3e0defb8...99a7a8a74 | Etherscan</title></head><body></body></html>`;
const result = parseEtherscanPage(html);
expect(result.label).toBe("Fake_Phishing5169");
expect(result.isPhishing).toBe(true);
});
test("detects Exploiter label", () => {
const html = `<html><head><title>Exploiter 42 | Address: 0xabcdef...1234 | Etherscan</title></head><body></body></html>`;
const result = parseEtherscanPage(html);
expect(result.label).toBe("Exploiter 42");
expect(result.isPhishing).toBe(true);
});
test("detects scam warning in body text", () => {
const html =
`<html><head><title>Address: 0xabcdef...1234 | Etherscan</title></head>` +
`<body>There are reports that this address was used in a Phishing scam.</body></html>`;
const result = parseEtherscanPage(html);
expect(result.label).toBeNull();
expect(result.isPhishing).toBe(true);
expect(result.warning).toContain("phishing/scam");
});
test("detects scam warning with label in body", () => {
const html =
`<html><head><title>SomeScammer | Address: 0xabcdef...1234 | Etherscan</title></head>` +
`<body>There are reports that this address was used in a scam.</body></html>`;
const result = parseEtherscanPage(html);
expect(result.label).toBe("SomeScammer");
expect(result.isPhishing).toBe(true);
expect(result.warning).toContain("SomeScammer");
});
test("returns clean result for legitimate address", () => {
const html = `<html><head><title>vitalik.eth | Address: 0xd8dA6BF2...37aA96045 | Etherscan</title></head><body>Overview</body></html>`;
const result = parseEtherscanPage(html);
expect(result.label).toBe("vitalik.eth");
expect(result.isPhishing).toBe(false);
expect(result.warning).toBeNull();
});
test("returns clean result for unlabeled address", () => {
const html = `<html><head><title>Address: 0x1234567890...abcdef | Etherscan</title></head><body>Overview</body></html>`;
const result = parseEtherscanPage(html);
expect(result.label).toBeNull();
expect(result.isPhishing).toBe(false);
expect(result.warning).toBeNull();
});
test("handles exchange labels correctly (not phishing)", () => {
const html = `<html><head><title>Coinbase 10 | Address: 0xa9d1e08c...b81d3e43 | Etherscan</title></head><body>Overview</body></html>`;
const result = parseEtherscanPage(html);
expect(result.label).toBe("Coinbase 10");
expect(result.isPhishing).toBe(false);
});
test("handles contract names correctly (not phishing)", () => {
const html = `<html><head><title>Beacon Deposit Contract | Address: 0x00000000...03d7705Fa | Etherscan</title></head><body>Overview</body></html>`;
const result = parseEtherscanPage(html);
expect(result.label).toBe("Beacon Deposit Contract");
expect(result.isPhishing).toBe(false);
});
test("handles empty HTML gracefully", () => {
const result = parseEtherscanPage("");
expect(result.label).toBeNull();
expect(result.isPhishing).toBe(false);
expect(result.warning).toBeNull();
});
test("handles malformed title tag", () => {
const html = `<html><head><title></title></head><body></body></html>`;
const result = parseEtherscanPage(html);
expect(result.label).toBeNull();
expect(result.isPhishing).toBe(false);
});
test("detects wallet drainer warning", () => {
const html =
`<html><head><title>Address: 0xabc...def | Etherscan</title></head>` +
`<body>This is a known wallet drainer contract.</body></html>`;
const result = parseEtherscanPage(html);
expect(result.isPhishing).toBe(true);
});
});
});

View File

@@ -0,0 +1,166 @@
const {
isPhishingDomain,
loadConfig,
getBlocklistSize,
hostnameVariants,
_reset,
} = require("../src/shared/phishingDomains");
// Reset state before each test to avoid cross-test contamination.
beforeEach(() => {
_reset();
});
describe("phishingDomains", () => {
describe("hostnameVariants", () => {
test("returns exact hostname plus parent domains", () => {
const variants = hostnameVariants("sub.evil.com");
expect(variants).toEqual(["sub.evil.com", "evil.com"]);
});
test("returns just the hostname for a bare domain", () => {
const variants = hostnameVariants("example.com");
expect(variants).toEqual(["example.com"]);
});
test("handles deep subdomain chains", () => {
const variants = hostnameVariants("a.b.c.d.com");
expect(variants).toEqual([
"a.b.c.d.com",
"b.c.d.com",
"c.d.com",
"d.com",
]);
});
test("lowercases hostnames", () => {
const variants = hostnameVariants("Evil.COM");
expect(variants).toEqual(["evil.com"]);
});
});
describe("loadConfig + isPhishingDomain", () => {
test("detects exact blacklisted domain", () => {
loadConfig({
blacklist: ["evil-phishing.com", "scam-swap.xyz"],
whitelist: [],
});
expect(isPhishingDomain("evil-phishing.com")).toBe(true);
expect(isPhishingDomain("scam-swap.xyz")).toBe(true);
});
test("returns false for clean domains", () => {
loadConfig({
blacklist: ["evil-phishing.com"],
whitelist: [],
});
expect(isPhishingDomain("etherscan.io")).toBe(false);
expect(isPhishingDomain("uniswap.org")).toBe(false);
});
test("detects subdomain of blacklisted domain", () => {
loadConfig({
blacklist: ["evil-phishing.com"],
whitelist: [],
});
expect(isPhishingDomain("app.evil-phishing.com")).toBe(true);
expect(isPhishingDomain("sub.app.evil-phishing.com")).toBe(true);
});
test("whitelist overrides blacklist", () => {
loadConfig({
blacklist: ["metamask.io"],
whitelist: ["metamask.io"],
});
expect(isPhishingDomain("metamask.io")).toBe(false);
});
test("whitelist on parent domain overrides blacklist", () => {
loadConfig({
blacklist: ["sub.legit.com"],
whitelist: ["legit.com"],
});
expect(isPhishingDomain("sub.legit.com")).toBe(false);
});
test("case-insensitive matching", () => {
loadConfig({
blacklist: ["Evil-Phishing.COM"],
whitelist: [],
});
expect(isPhishingDomain("evil-phishing.com")).toBe(true);
expect(isPhishingDomain("EVIL-PHISHING.COM")).toBe(true);
});
test("returns false for empty/null hostname", () => {
loadConfig({
blacklist: ["evil.com"],
whitelist: [],
});
expect(isPhishingDomain("")).toBe(false);
expect(isPhishingDomain(null)).toBe(false);
});
test("getBlocklistSize reflects loaded config", () => {
loadConfig({
blacklist: ["a.com", "b.com", "c.com"],
whitelist: ["d.com"],
});
expect(getBlocklistSize()).toBe(3);
});
test("handles config with no blacklist/whitelist keys", () => {
loadConfig({});
expect(isPhishingDomain("anything.com")).toBe(false);
expect(getBlocklistSize()).toBe(0);
});
test("re-loading config replaces previous data", () => {
loadConfig({
blacklist: ["old-scam.com"],
whitelist: [],
});
expect(isPhishingDomain("old-scam.com")).toBe(true);
loadConfig({
blacklist: ["new-scam.com"],
whitelist: [],
});
expect(isPhishingDomain("old-scam.com")).toBe(false);
expect(isPhishingDomain("new-scam.com")).toBe(true);
});
});
describe("real-world MetaMask blocklist patterns", () => {
test("detects known phishing domains from MetaMask list", () => {
loadConfig({
blacklist: [
"uniswap-trade.web.app",
"hopprotocol.pro",
"blast-pools.pages.dev",
],
whitelist: [],
});
expect(isPhishingDomain("uniswap-trade.web.app")).toBe(true);
expect(isPhishingDomain("hopprotocol.pro")).toBe(true);
expect(isPhishingDomain("blast-pools.pages.dev")).toBe(true);
});
test("does not flag legitimate domains whitelisted by MetaMask", () => {
loadConfig({
blacklist: ["opensea.pro"],
whitelist: [
"opensea.io",
"metamask.io",
"etherscan.io",
"opensea.pro",
],
});
expect(isPhishingDomain("opensea.io")).toBe(false);
expect(isPhishingDomain("metamask.io")).toBe(false);
expect(isPhishingDomain("etherscan.io")).toBe(false);
// opensea.pro is both blacklisted and whitelisted — whitelist wins
expect(isPhishingDomain("opensea.pro")).toBe(false);
});
});
});