feat: add PSL-based target classification

Classify DNS names as apex domains or hostnames using the Public
Suffix List (golang.org/x/net/publicsuffix). Correctly handles
multi-level TLDs like .co.uk and .com.au.
This commit is contained in:
clawbot 2026-02-19 20:09:07 -08:00
parent 144a2df665
commit 1db3056594
2 changed files with 145 additions and 0 deletions

View File

@ -0,0 +1,61 @@
// Package config provides application configuration via Viper.
package config
import (
"fmt"
"strings"
"golang.org/x/net/publicsuffix"
)
// ClassifyTarget determines whether a DNS name is an apex domain
// (eTLD+1) or a hostname (subdomain of an eTLD+1). Returns
// "domain" or "hostname". Returns an error if the name is itself
// a public suffix (e.g. "co.uk") or otherwise invalid.
func ClassifyTarget(name string) (string, error) {
// Normalize: lowercase, strip trailing dot.
name = strings.ToLower(strings.TrimSuffix(name, "."))
if name == "" {
return "", fmt.Errorf("empty target name")
}
apex, err := publicsuffix.EffectiveTLDPlusOne(name)
if err != nil {
return "", fmt.Errorf(
"invalid target %q: %w", name, err,
)
}
if name == apex {
return "domain", nil
}
return "hostname", nil
}
// classifyTargets splits a list of DNS names into apex domains
// and hostnames using the Public Suffix List.
func classifyTargets(
targets []string,
) (domains, hostnames []string, err error) {
for _, target := range targets {
kind, classifyErr := ClassifyTarget(target)
if classifyErr != nil {
return nil, nil, classifyErr
}
switch kind {
case "domain":
domains = append(domains, strings.ToLower(
strings.TrimSuffix(target, "."),
))
case "hostname":
hostnames = append(hostnames, strings.ToLower(
strings.TrimSuffix(target, "."),
))
}
}
return domains, hostnames, nil
}

View File

@ -0,0 +1,84 @@
package config
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestClassifyTarget(t *testing.T) {
t.Parallel()
tests := []struct {
name string
input string
expected string
wantErr bool
}{
{"apex .com", "example.com", "domain", false},
{"apex .org", "example.org", "domain", false},
{"apex .co.uk", "example.co.uk", "domain", false},
{"apex .com.au", "example.com.au", "domain", false},
{"subdomain www", "www.example.com", "hostname", false},
{"subdomain api", "api.example.com", "hostname", false},
{"deep subdomain", "a.b.c.example.com", "hostname", false},
{"subdomain .co.uk", "www.example.co.uk", "hostname", false},
{"trailing dot", "example.com.", "domain", false},
{"trailing dot sub", "www.example.com.", "hostname", false},
{"uppercase", "EXAMPLE.COM", "domain", false},
{"mixed case", "Www.Example.Com", "hostname", false},
{"public suffix", "co.uk", "", true},
{"tld only", "com", "", true},
{"empty", "", "", true},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
result, err := ClassifyTarget(tc.input)
if tc.wantErr {
assert.Error(t, err)
return
}
require.NoError(t, err)
assert.Equal(t, tc.expected, result)
})
}
}
func TestClassifyTargets(t *testing.T) {
t.Parallel()
targets := []string{
"example.com",
"www.example.com",
"api.example.com",
"example.co.uk",
"blog.example.co.uk",
}
domains, hostnames, err := classifyTargets(targets)
require.NoError(t, err)
assert.Equal(
t,
[]string{"example.com", "example.co.uk"},
domains,
)
assert.Equal(
t,
[]string{"www.example.com", "api.example.com", "blog.example.co.uk"},
hostnames,
)
}
func TestClassifyTargets_RejectsPublicSuffix(t *testing.T) {
t.Parallel()
_, _, err := classifyTargets([]string{"example.com", "co.uk"})
assert.Error(t, err)
}