package app import ( "fmt" "sort" "strings" "time" ) func runTransportPolicyHealthCheck(clients []TransportClient, plan TransportPolicyCompilePlan, now time.Time) (TransportPolicyHealthCheck, []TransportClient, bool) { updated := append([]TransportClient(nil), clients...) clientIDs := collectTransportPolicyHealthCheckClientIDs(plan) items := make([]TransportPolicyHealthCheckItem, 0, len(clientIDs)) itemByClientID := make(map[string]TransportPolicyHealthCheckItem, len(clientIDs)) checkedCount := 0 failedCount := 0 changed := false for _, clientID := range clientIDs { idx := findTransportClientIndex(updated, clientID) if idx < 0 { item := TransportPolicyHealthCheckItem{ ClientID: clientID, Required: true, OK: false, Code: "TRANSPORT_CLIENT_NOT_FOUND", Message: "client not found during health-check", } items = append(items, item) itemByClientID[clientID] = item checkedCount++ failedCount++ continue } current := updated[idx] required := transportPolicyHealthCheckRequired(current) item := TransportPolicyHealthCheckItem{ ClientID: current.ID, Kind: string(current.Kind), Required: required, OK: true, Status: string(normalizeTransportStatus(current.Status)), } if !required { item.Message = "skipped inactive draft client" items = append(items, item) itemByClientID[current.ID] = item continue } checkedCount++ backend := selectTransportBackend(current) probe := backend.Health(current) next := applyTransportHealthProbeSnapshot(current, backend.ID(), probe, now) updated[idx] = next if transportHealthChanged(current, next) || transportShouldPersistHealthSnapshot(current, next, now) { changed = true } item.Status = string(normalizeTransportStatus(next.Status)) item.Code = strings.TrimSpace(probe.Code) if probe.OK && next.Status != TransportClientDown { item.Message = "ok" items = append(items, item) itemByClientID[current.ID] = item continue } item.OK = false if item.Code == "" && next.Status == TransportClientDown { item.Code = "TRANSPORT_POLICY_HEALTH_DOWN" } msg := strings.TrimSpace(probe.Message) if msg == "" && next.Status == TransportClientDown { msg = "transport client is down after apply" } if msg == "" { msg = "transport policy health-check failed" } item.Message = msg items = append(items, item) itemByClientID[current.ID] = item failedCount++ } clientByID := make(map[string]TransportClient, len(updated)) for _, client := range updated { clientByID[client.ID] = client } interfaces := buildTransportPolicyHealthCheckInterfaces(plan, itemByClientID, clientByID, now) resp := TransportPolicyHealthCheck{ OK: failedCount == 0, CheckedCount: checkedCount, FailedCount: failedCount, InterfaceCount: len(interfaces), Interfaces: interfaces, Items: items, } switch { case checkedCount == 0: resp.OK = true resp.Message = "health-check skipped: no active transport clients in policy" case failedCount == 0: resp.Message = fmt.Sprintf("health-check passed for %d client(s)", checkedCount) default: resp.Message = fmt.Sprintf("health-check failed for %d of %d client(s)", failedCount, checkedCount) } return resp, updated, changed } func buildTransportPolicyHealthCheckInterfaces( plan TransportPolicyCompilePlan, itemByClientID map[string]TransportPolicyHealthCheckItem, clientByID map[string]TransportClient, now time.Time, ) []TransportPolicyHealthCheckInterface { if len(plan.Interfaces) == 0 { return nil } out := make([]TransportPolicyHealthCheckInterface, 0, len(plan.Interfaces)) for _, iface := range plan.Interfaces { summary := TransportPolicyHealthCheckInterface{ IfaceID: normalizeTransportIfaceID(iface.IfaceID), Mode: strings.TrimSpace(iface.Mode), RuntimeIface: strings.TrimSpace(iface.RuntimeIface), NetnsName: strings.TrimSpace(iface.NetnsName), RoutingTable: strings.TrimSpace(iface.RoutingTable), Status: string(TransportClientDown), OK: true, } seen := map[string]struct{}{} members := make([]TransportClient, 0, len(iface.ClientIDs)) for _, rawClientID := range iface.ClientIDs { clientID := sanitizeID(rawClientID) if clientID == "" { continue } if _, ok := seen[clientID]; ok { continue } seen[clientID] = struct{}{} summary.ClientIDs = append(summary.ClientIDs, clientID) summary.ClientCount++ if client, ok := clientByID[clientID]; ok { members = append(members, client) } item, ok := itemByClientID[clientID] if !ok { continue } if item.Required { summary.CheckedCount++ if !item.OK { summary.FailedCount++ } continue } summary.SkippedCount++ } if len(members) > 0 { counters := buildTransportRuntimeObservabilityCounters(members) summary.Status = string(aggregateTransportRuntimeObservabilityStatus(counters)) if primary, ok := selectTransportRuntimeObservabilityPrimaryClient(members); ok { summary.ActiveClientID = primary.ID summary.LatencyMS = primary.Health.LatencyMS if summary.RuntimeIface == "" { summary.RuntimeIface = strings.TrimSpace(primary.Iface) } if summary.NetnsName == "" && transportNetnsEnabled(primary) { summary.NetnsName = transportNetnsName(primary) } if summary.RoutingTable == "" { summary.RoutingTable = strings.TrimSpace(primary.RoutingTable) } } if errClient, ok := selectTransportRuntimeObservabilityErrorClient(members); ok { summary.LastError = transportRuntimeObservabilityClientError(errClient, now) } } switch { case summary.ClientCount == 0: summary.Message = "health-check skipped: no compiled clients on interface" case summary.CheckedCount == 0: summary.Message = "health-check skipped: no active transport clients on interface" case summary.FailedCount == 0: summary.Message = fmt.Sprintf("health-check passed for %d client(s) on interface", summary.CheckedCount) default: summary.OK = false summary.Message = fmt.Sprintf( "health-check failed for %d of %d client(s) on interface", summary.FailedCount, summary.CheckedCount, ) } out = append(out, summary) } return out } func collectTransportPolicyHealthCheckClientIDs(plan TransportPolicyCompilePlan) []string { seen := map[string]struct{}{} out := make([]string, 0, plan.RuleCount) for _, iface := range plan.Interfaces { for _, clientID := range iface.ClientIDs { id := sanitizeID(clientID) if id == "" { continue } if _, ok := seen[id]; ok { continue } seen[id] = struct{}{} out = append(out, id) } } sort.Strings(out) return out } func transportPolicyHealthCheckRequired(client TransportClient) bool { if client.Enabled { return true } return normalizeTransportStatus(client.Status) != TransportClientDown }