From 5bd7f1c9f4463aff64bca8bb9d0ffdbc8628458b Mon Sep 17 00:00:00 2001 From: beckline Date: Wed, 25 Feb 2026 09:39:53 +0300 Subject: [PATCH] resolver: surface timeout-recheck stats and keep timeout-only domains suspect --- PLAN_DHSQ_GLOBAL.md | 2 +- selective-vpn-api/app/resolver.go | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/PLAN_DHSQ_GLOBAL.md b/PLAN_DHSQ_GLOBAL.md index 3016473..7017711 100644 --- a/PLAN_DHSQ_GLOBAL.md +++ b/PLAN_DHSQ_GLOBAL.md @@ -6,7 +6,7 @@ - [x] ~~3. Add stale-keep policy.~~ - [x] ~~4. Wire 24h precheck cycle (soft pruning only).~~ - [x] ~~5. Expose metrics/log clarity in API + GUI (API/trace done; DNS benchmark load-profile UI done; route badges done).~~ -- [ ] 6. Tune thresholds with production data. +- [ ] 6. Tune thresholds with production data (pass-1 done: timeout-only now stays suspect/no quarantine by default). ## 1) Goal - Stabilize resolver behavior under high domain volume. diff --git a/selective-vpn-api/app/resolver.go b/selective-vpn-api/app/resolver.go index 0bb3107..4ba02a1 100644 --- a/selective-vpn-api/app/resolver.go +++ b/selective-vpn-api/app/resolver.go @@ -1813,8 +1813,18 @@ func (s *domainCacheState) setErrorWithStats(domain string, source domainCacheSo if entry == nil { entry = &domainCacheEntry{} } + prevKind, _ := normalizeCacheErrorKind(entry.LastErrorKind) entry.Score = clampDomainScore(entry.Score + penalty) entry.State = domainStateFromScore(entry.Score) + + // Timeout-only failures are treated as transient transport noise by default. + // Keep them in suspect bucket (no quarantine) unless we have NX signal. + if normKind == dnsErrorTimeout && prevKind != dnsErrorNXDomain { + if entry.Score < -10 { + entry.Score = -10 + } + entry.State = domainStateSuspect + } entry.LastErrorKind = string(normKind) entry.LastErrorAt = now switch entry.State {