resolver: split unresolved metrics and soften NX hard quarantine

This commit is contained in:
beckline
2026-02-25 09:54:54 +03:00
parent 5bd7f1c9f4
commit e8fb361b4c
3 changed files with 27 additions and 4 deletions

View File

@@ -408,12 +408,13 @@ func runResolverJob(opts ResolverOpts, logf func(string, ...any)) (resolverResul
directPolicy := directDNSAttemptPolicy(len(cfg.Default))
wildcardPolicy := wildcardDNSAttemptPolicy(1)
logf(
"resolver policy: direct_try=%d direct_budget_ms=%d wildcard_try=%d wildcard_budget_ms=%d nx_early_stop=%t stale_keep_sec=%d precheck_every_sec=%d precheck_max=%d precheck_forced_env=%t precheck_forced_file=%t",
"resolver policy: direct_try=%d direct_budget_ms=%d wildcard_try=%d wildcard_budget_ms=%d nx_early_stop=%t nx_hard_quarantine=%t stale_keep_sec=%d precheck_every_sec=%d precheck_max=%d precheck_forced_env=%t precheck_forced_file=%t",
directPolicy.TryLimit,
directPolicy.DomainBudget.Milliseconds(),
wildcardPolicy.TryLimit,
wildcardPolicy.DomainBudget.Milliseconds(),
resolveNXEarlyStopEnabled(),
resolveNXHardQuarantineEnabled(),
staleKeepSec,
precheckEverySec,
precheckMaxDomains,
@@ -675,7 +676,7 @@ func runResolverJob(opts ResolverOpts, logf func(string, ...any)) (resolverResul
if logf != nil {
dnsErrors := dnsStats.totalErrors()
logf(
"resolve summary: domains=%d cache_hits=%d cache_neg_hits=%d quarantine_hits=%d stale_hits=%d resolved_now=%d unresolved=%d static_entries=%d static_skipped=%d unique_ips=%d direct_ips=%d wildcard_ips=%d ptr_lookups=%d ptr_errors=%d dns_attempts=%d dns_ok=%d dns_nxdomain=%d dns_timeout=%d dns_temporary=%d dns_other=%d dns_errors=%d timeout_recheck_checked=%d timeout_recheck_recovered=%d timeout_recheck_recovered_ips=%d timeout_recheck_still_timeout=%d timeout_recheck_now_nxdomain=%d timeout_recheck_now_temporary=%d timeout_recheck_now_other=%d timeout_recheck_no_signal=%d duration_ms=%d",
"resolve summary: domains=%d cache_hits=%d cache_neg_hits=%d quarantine_hits=%d stale_hits=%d resolved_now=%d unresolved=%d unresolved_live=%d unresolved_suppressed=%d static_entries=%d static_skipped=%d unique_ips=%d direct_ips=%d wildcard_ips=%d ptr_lookups=%d ptr_errors=%d dns_attempts=%d dns_ok=%d dns_nxdomain=%d dns_timeout=%d dns_temporary=%d dns_other=%d dns_errors=%d timeout_recheck_checked=%d timeout_recheck_recovered=%d timeout_recheck_recovered_ips=%d timeout_recheck_still_timeout=%d timeout_recheck_now_nxdomain=%d timeout_recheck_now_temporary=%d timeout_recheck_now_other=%d timeout_recheck_no_signal=%d duration_ms=%d",
len(domains),
len(fresh),
cacheNegativeHits,
@@ -683,6 +684,8 @@ func runResolverJob(opts ResolverOpts, logf func(string, ...any)) (resolverResul
staleHits,
len(resolved)-len(fresh),
len(domains)-len(resolved),
unresolvedAfterAttempts,
cacheNegativeHits+quarantineHits,
len(staticEntries),
staticSkipped,
len(res.IPs),
@@ -1164,6 +1167,15 @@ func resolveNXEarlyStopEnabled() bool {
}
}
func resolveNXHardQuarantineEnabled() bool {
switch strings.ToLower(strings.TrimSpace(os.Getenv("RESOLVE_NX_HARD_QUARANTINE"))) {
case "1", "true", "yes", "on":
return true
default:
return false
}
}
func resolvePrecheckForceEnvEnabled() bool {
switch strings.ToLower(strings.TrimSpace(os.Getenv("RESOLVE_PRECHECK_FORCE"))) {
case "1", "true", "yes", "on":
@@ -1825,6 +1837,14 @@ func (s *domainCacheState) setErrorWithStats(domain string, source domainCacheSo
}
entry.State = domainStateSuspect
}
// NXDOMAIN-heavy synthetic subdomains create large false "hard quarantine" pools.
// By default, keep NX failures in regular quarantine (24h), not hard quarantine.
if normKind == dnsErrorNXDomain && !resolveNXHardQuarantineEnabled() && entry.State == domainStateHardQuar {
entry.State = domainStateQuarantine
if entry.Score < -30 {
entry.Score = -30
}
}
entry.LastErrorKind = string(normKind)
entry.LastErrorAt = now
switch entry.State {