Files
elmprodvpn/selective-vpn-api/app/transport_policy_apply_health.go

228 lines
6.8 KiB
Go

package app
import (
"fmt"
"sort"
"strings"
"time"
)
func runTransportPolicyHealthCheck(clients []TransportClient, plan TransportPolicyCompilePlan, now time.Time) (TransportPolicyHealthCheck, []TransportClient, bool) {
updated := append([]TransportClient(nil), clients...)
clientIDs := collectTransportPolicyHealthCheckClientIDs(plan)
items := make([]TransportPolicyHealthCheckItem, 0, len(clientIDs))
itemByClientID := make(map[string]TransportPolicyHealthCheckItem, len(clientIDs))
checkedCount := 0
failedCount := 0
changed := false
for _, clientID := range clientIDs {
idx := findTransportClientIndex(updated, clientID)
if idx < 0 {
item := TransportPolicyHealthCheckItem{
ClientID: clientID,
Required: true,
OK: false,
Code: "TRANSPORT_CLIENT_NOT_FOUND",
Message: "client not found during health-check",
}
items = append(items, item)
itemByClientID[clientID] = item
checkedCount++
failedCount++
continue
}
current := updated[idx]
required := transportPolicyHealthCheckRequired(current)
item := TransportPolicyHealthCheckItem{
ClientID: current.ID,
Kind: string(current.Kind),
Required: required,
OK: true,
Status: string(normalizeTransportStatus(current.Status)),
}
if !required {
item.Message = "skipped inactive draft client"
items = append(items, item)
itemByClientID[current.ID] = item
continue
}
checkedCount++
backend := selectTransportBackend(current)
probe := backend.Health(current)
next := applyTransportHealthProbeSnapshot(current, backend.ID(), probe, now)
updated[idx] = next
if transportHealthChanged(current, next) || transportShouldPersistHealthSnapshot(current, next, now) {
changed = true
}
item.Status = string(normalizeTransportStatus(next.Status))
item.Code = strings.TrimSpace(probe.Code)
if probe.OK && next.Status != TransportClientDown {
item.Message = "ok"
items = append(items, item)
itemByClientID[current.ID] = item
continue
}
item.OK = false
if item.Code == "" && next.Status == TransportClientDown {
item.Code = "TRANSPORT_POLICY_HEALTH_DOWN"
}
msg := strings.TrimSpace(probe.Message)
if msg == "" && next.Status == TransportClientDown {
msg = "transport client is down after apply"
}
if msg == "" {
msg = "transport policy health-check failed"
}
item.Message = msg
items = append(items, item)
itemByClientID[current.ID] = item
failedCount++
}
clientByID := make(map[string]TransportClient, len(updated))
for _, client := range updated {
clientByID[client.ID] = client
}
interfaces := buildTransportPolicyHealthCheckInterfaces(plan, itemByClientID, clientByID, now)
resp := TransportPolicyHealthCheck{
OK: failedCount == 0,
CheckedCount: checkedCount,
FailedCount: failedCount,
InterfaceCount: len(interfaces),
Interfaces: interfaces,
Items: items,
}
switch {
case checkedCount == 0:
resp.OK = true
resp.Message = "health-check skipped: no active transport clients in policy"
case failedCount == 0:
resp.Message = fmt.Sprintf("health-check passed for %d client(s)", checkedCount)
default:
resp.Message = fmt.Sprintf("health-check failed for %d of %d client(s)", failedCount, checkedCount)
}
return resp, updated, changed
}
func buildTransportPolicyHealthCheckInterfaces(
plan TransportPolicyCompilePlan,
itemByClientID map[string]TransportPolicyHealthCheckItem,
clientByID map[string]TransportClient,
now time.Time,
) []TransportPolicyHealthCheckInterface {
if len(plan.Interfaces) == 0 {
return nil
}
out := make([]TransportPolicyHealthCheckInterface, 0, len(plan.Interfaces))
for _, iface := range plan.Interfaces {
summary := TransportPolicyHealthCheckInterface{
IfaceID: normalizeTransportIfaceID(iface.IfaceID),
Mode: strings.TrimSpace(iface.Mode),
RuntimeIface: strings.TrimSpace(iface.RuntimeIface),
NetnsName: strings.TrimSpace(iface.NetnsName),
RoutingTable: strings.TrimSpace(iface.RoutingTable),
Status: string(TransportClientDown),
OK: true,
}
seen := map[string]struct{}{}
members := make([]TransportClient, 0, len(iface.ClientIDs))
for _, rawClientID := range iface.ClientIDs {
clientID := sanitizeID(rawClientID)
if clientID == "" {
continue
}
if _, ok := seen[clientID]; ok {
continue
}
seen[clientID] = struct{}{}
summary.ClientIDs = append(summary.ClientIDs, clientID)
summary.ClientCount++
if client, ok := clientByID[clientID]; ok {
members = append(members, client)
}
item, ok := itemByClientID[clientID]
if !ok {
continue
}
if item.Required {
summary.CheckedCount++
if !item.OK {
summary.FailedCount++
}
continue
}
summary.SkippedCount++
}
if len(members) > 0 {
counters := buildTransportRuntimeObservabilityCounters(members)
summary.Status = string(aggregateTransportRuntimeObservabilityStatus(counters))
if primary, ok := selectTransportRuntimeObservabilityPrimaryClient(members); ok {
summary.ActiveClientID = primary.ID
summary.LatencyMS = primary.Health.LatencyMS
if summary.RuntimeIface == "" {
summary.RuntimeIface = strings.TrimSpace(primary.Iface)
}
if summary.NetnsName == "" && transportNetnsEnabled(primary) {
summary.NetnsName = transportNetnsName(primary)
}
if summary.RoutingTable == "" {
summary.RoutingTable = strings.TrimSpace(primary.RoutingTable)
}
}
if errClient, ok := selectTransportRuntimeObservabilityErrorClient(members); ok {
summary.LastError = transportRuntimeObservabilityClientError(errClient, now)
}
}
switch {
case summary.ClientCount == 0:
summary.Message = "health-check skipped: no compiled clients on interface"
case summary.CheckedCount == 0:
summary.Message = "health-check skipped: no active transport clients on interface"
case summary.FailedCount == 0:
summary.Message = fmt.Sprintf("health-check passed for %d client(s) on interface", summary.CheckedCount)
default:
summary.OK = false
summary.Message = fmt.Sprintf(
"health-check failed for %d of %d client(s) on interface",
summary.FailedCount,
summary.CheckedCount,
)
}
out = append(out, summary)
}
return out
}
func collectTransportPolicyHealthCheckClientIDs(plan TransportPolicyCompilePlan) []string {
seen := map[string]struct{}{}
out := make([]string, 0, plan.RuleCount)
for _, iface := range plan.Interfaces {
for _, clientID := range iface.ClientIDs {
id := sanitizeID(clientID)
if id == "" {
continue
}
if _, ok := seen[id]; ok {
continue
}
seen[id] = struct{}{}
out = append(out, id)
}
}
sort.Strings(out)
return out
}
func transportPolicyHealthCheckRequired(client TransportClient) bool {
if client.Enabled {
return true
}
return normalizeTransportStatus(client.Status) != TransportClientDown
}