Files
education-flagger/main.go
Robert Rapp 082ecc579a feat: enrich /lookup with university domain list check
Add a second detection path alongside ASN lookup: a self-maintained
list of university domains (uni_domains.txt) loaded at startup.

- New /lookup params: email= (extracts domain from address), domain= unchanged
- Suffix matching: insti.uni-stuttgart.de matches list entry uni-stuttgart.de
  without false-positives (evil-uni-stuttgart.de does not match)
- New response fields: asn_match, domain_match, matched_domain (omitempty)
- nren remains true if either asn_match OR domain_match is true (backwards compat)
- /healthz now returns JSON body: {"asn_count":N,"domain_count":N}
- asn-updater: new update_uni_domains() merges hs-kompass.de TSV + Hipo JSON
  (configurable via UNI_DOMAIN_COUNTRIES / HS_KOMPASS_URL env vars)
- 7 new tests; all existing tests pass unchanged

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 15:10:49 +01:00

365 lines
8.2 KiB
Go

package main
import (
"bufio"
"encoding/json"
"fmt"
"log"
"net"
"net/http"
"os"
"strconv"
"strings"
"sync/atomic"
"time"
"github.com/oschwald/maxminddb-golang"
)
type asnRecord struct {
ASN uint `maxminddb:"autonomous_system_number"`
Org string `maxminddb:"autonomous_system_organization"`
}
type lookupResponse struct {
Domain string `json:"domain"`
NREN bool `json:"nren"`
ASNMatch bool `json:"asn_match"`
DomainMatch bool `json:"domain_match"`
MatchedDomain string `json:"matched_domain,omitempty"`
ASN *uint `json:"asn,omitempty"`
ASNOrg string `json:"asn_org,omitempty"`
IPs []string `json:"ips"`
MatchedIP string `json:"matched_ip,omitempty"`
Error string `json:"error,omitempty"`
}
type server struct {
db *maxminddb.Reader
nrenASNs map[uint]struct{}
uniDomains map[string]struct{}
ready atomic.Bool
versionTag string
minASN int
asnCount int
}
func loadASNSet(path string) (map[uint]struct{}, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
set := make(map[uint]struct{}, 4096)
sc := bufio.NewScanner(f)
for sc.Scan() {
line := strings.TrimSpace(sc.Text())
if line == "" || strings.HasPrefix(line, "#") {
continue
}
v, err := strconv.ParseUint(line, 10, 32)
if err != nil {
continue
}
set[uint(v)] = struct{}{}
}
return set, sc.Err()
}
func loadDomainSet(path string) (map[string]struct{}, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
set := make(map[string]struct{}, 2048)
sc := bufio.NewScanner(f)
for sc.Scan() {
line := strings.ToLower(strings.TrimSpace(sc.Text()))
if line == "" || strings.HasPrefix(line, "#") {
continue
}
if !strings.Contains(line, ".") {
log.Printf("[warn] skipping invalid domain entry: %s", line)
continue
}
set[line] = struct{}{}
}
return set, sc.Err()
}
// matchesUniDomain checks if domain or any parent domain (label-aligned suffix)
// appears in set. Returns (true, matchedEntry) on match, (false, "") otherwise.
// Prevents false positives: "evil-uni-stuttgart.de" does not match "uni-stuttgart.de".
func matchesUniDomain(domain string, set map[string]struct{}) (bool, string) {
d := strings.TrimRight(strings.ToLower(domain), ".")
if d == "" {
return false, ""
}
if _, ok := set[d]; ok {
return true, d
}
for {
dot := strings.Index(d, ".")
if dot < 0 {
break
}
d = d[dot+1:]
if !strings.Contains(d, ".") {
// fewer than 2 labels remain — stop
break
}
if _, ok := set[d]; ok {
return true, d
}
}
return false, ""
}
func extractDomain(s string) string {
if strings.Contains(s, "@") {
parts := strings.SplitN(s, "@", 2)
return strings.ToLower(strings.TrimSpace(parts[1]))
}
return strings.ToLower(strings.TrimSpace(s))
}
func firstForwardedFor(r *http.Request) string {
xff := r.Header.Get("X-Forwarded-For")
if xff == "" {
return ""
}
parts := strings.Split(xff, ",")
if len(parts) == 0 {
return ""
}
return strings.TrimSpace(parts[0])
}
func remoteIP(r *http.Request) string {
// Prefer XFF (because Traefik is proxy)
ip := firstForwardedFor(r)
if ip != "" {
return ip
}
host, _, err := net.SplitHostPort(r.RemoteAddr)
if err == nil {
return host
}
return r.RemoteAddr
}
func writeJSON(w http.ResponseWriter, status int, payload any) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(payload)
}
func (s *server) healthzHandler(w http.ResponseWriter, _ *http.Request) {
status := http.StatusOK
if s.asnCount < s.minASN {
status = http.StatusServiceUnavailable
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
fmt.Fprintf(w, `{"asn_count":%d,"domain_count":%d}`, s.asnCount, len(s.uniDomains))
}
func (s *server) authHandler(w http.ResponseWriter, r *http.Request) {
if !s.ready.Load() {
w.WriteHeader(http.StatusServiceUnavailable)
return
}
ipStr := remoteIP(r)
parsed := net.ParseIP(ipStr)
if parsed == nil {
// Always 200: we enrich, not block
w.Header().Set("X-NREN", "0")
w.WriteHeader(http.StatusOK)
return
}
var rec asnRecord
if err := s.db.Lookup(parsed, &rec); err != nil || rec.ASN == 0 {
w.Header().Set("X-NREN", "0")
w.WriteHeader(http.StatusOK)
return
}
w.Header().Set("X-ASN", strconv.FormatUint(uint64(rec.ASN), 10))
if rec.Org != "" {
// optional: keep it short; some org strings can be long
w.Header().Set("X-ASN-ORG", rec.Org)
}
_, ok := s.nrenASNs[rec.ASN]
if ok {
w.Header().Set("X-NREN", "1")
} else {
w.Header().Set("X-NREN", "0")
}
w.Header().Set("Cache-Control", "no-store")
w.Header().Set("X-Service", s.versionTag)
w.WriteHeader(http.StatusOK)
}
func (s *server) lookupHandler(w http.ResponseWriter, r *http.Request) {
if !s.ready.Load() {
writeJSON(w, http.StatusServiceUnavailable, lookupResponse{
NREN: false,
Error: "service not ready",
})
return
}
emailParam := r.URL.Query().Get("email")
domainParam := r.URL.Query().Get("domain")
var rawInput string
resp := lookupResponse{}
if emailParam != "" {
rawInput = emailParam
if !strings.Contains(emailParam, "@") {
resp.Error = "email param has no @ — interpreted as bare domain"
}
} else if domainParam != "" {
rawInput = strings.TrimSpace(domainParam)
} else {
writeJSON(w, http.StatusBadRequest, lookupResponse{
NREN: false,
Error: "missing domain",
})
return
}
domain := extractDomain(rawInput)
resp.Domain = domain
ips, dnsErr := net.LookupIP(domain)
if dnsErr == nil && len(ips) > 0 {
resp.IPs = make([]string, 0, len(ips))
var firstASN *uint
var firstOrg string
for _, ip := range ips {
ipStr := ip.String()
resp.IPs = append(resp.IPs, ipStr)
if s.db == nil {
continue
}
var rec asnRecord
if err := s.db.Lookup(ip, &rec); err != nil || rec.ASN == 0 {
continue
}
if firstASN == nil {
firstASN = new(uint)
*firstASN = rec.ASN
firstOrg = rec.Org
}
if _, ok := s.nrenASNs[rec.ASN]; ok {
asn := rec.ASN
resp.ASNMatch = true
resp.ASN = &asn
resp.ASNOrg = rec.Org
resp.MatchedIP = ipStr
break
}
}
if !resp.ASNMatch && firstASN != nil {
resp.ASN = firstASN
resp.ASNOrg = firstOrg
}
} else if resp.Error == "" {
resp.Error = "domain lookup failed"
}
resp.DomainMatch, resp.MatchedDomain = matchesUniDomain(domain, s.uniDomains)
resp.NREN = resp.ASNMatch || resp.DomainMatch
writeJSON(w, http.StatusOK, resp)
}
func main() {
mmdbPath := getenv("MMDB_PATH", "/data/GeoLite2-ASN.mmdb")
asnListPath := getenv("ASN_LIST_PATH", "/data/nren_asns.txt")
addr := getenv("ADDR", ":8080")
version := getenv("VERSION_TAG", "asn-header-service")
minASN := getenvInt("MIN_ASN_COUNT", 10)
db, err := maxminddb.Open(mmdbPath)
if err != nil {
log.Fatalf("failed to open mmdb: %v", err)
}
defer db.Close()
set, err := loadASNSet(asnListPath)
if err != nil {
log.Fatalf("failed to load asn list: %v", err)
}
asnCount := len(set)
uniDomainsPath := getenv("UNI_DOMAINS_PATH", "/data/uni_domains.txt")
uniDomains, err := loadDomainSet(uniDomainsPath)
if err != nil {
if os.IsNotExist(err) {
log.Printf("[warn] uni_domains.txt not found — domain_match will always be false")
} else {
log.Printf("[warn] failed to load uni_domains.txt: %v", err)
}
uniDomains = make(map[string]struct{})
}
s := &server{
db: db,
nrenASNs: set,
uniDomains: uniDomains,
versionTag: version,
minASN: minASN,
asnCount: asnCount,
}
s.ready.Store(true)
mux := http.NewServeMux()
mux.HandleFunc("/auth", s.authHandler)
mux.HandleFunc("/lookup", s.lookupHandler)
mux.HandleFunc("/healthz", s.healthzHandler)
srv := &http.Server{
Addr: addr,
Handler: mux,
ReadHeaderTimeout: 2 * time.Second,
}
log.Printf("listening on %s (asn_count=%d, min_asn=%d, domain_count=%d)", addr, asnCount, minASN, len(uniDomains))
log.Fatal(srv.ListenAndServe())
}
func getenv(k, def string) string {
v := strings.TrimSpace(os.Getenv(k))
if v == "" {
return def
}
return v
}
func getenvInt(k string, def int) int {
v := strings.TrimSpace(os.Getenv(k))
if v == "" {
return def
}
parsed, err := strconv.Atoi(v)
if err != nil {
return def
}
return parsed
}