228 lines
6.8 KiB
Go
228 lines
6.8 KiB
Go
package app
|
|
|
|
import (
|
|
"fmt"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
func runTransportPolicyHealthCheck(clients []TransportClient, plan TransportPolicyCompilePlan, now time.Time) (TransportPolicyHealthCheck, []TransportClient, bool) {
|
|
updated := append([]TransportClient(nil), clients...)
|
|
clientIDs := collectTransportPolicyHealthCheckClientIDs(plan)
|
|
items := make([]TransportPolicyHealthCheckItem, 0, len(clientIDs))
|
|
itemByClientID := make(map[string]TransportPolicyHealthCheckItem, len(clientIDs))
|
|
checkedCount := 0
|
|
failedCount := 0
|
|
changed := false
|
|
|
|
for _, clientID := range clientIDs {
|
|
idx := findTransportClientIndex(updated, clientID)
|
|
if idx < 0 {
|
|
item := TransportPolicyHealthCheckItem{
|
|
ClientID: clientID,
|
|
Required: true,
|
|
OK: false,
|
|
Code: "TRANSPORT_CLIENT_NOT_FOUND",
|
|
Message: "client not found during health-check",
|
|
}
|
|
items = append(items, item)
|
|
itemByClientID[clientID] = item
|
|
checkedCount++
|
|
failedCount++
|
|
continue
|
|
}
|
|
|
|
current := updated[idx]
|
|
required := transportPolicyHealthCheckRequired(current)
|
|
item := TransportPolicyHealthCheckItem{
|
|
ClientID: current.ID,
|
|
Kind: string(current.Kind),
|
|
Required: required,
|
|
OK: true,
|
|
Status: string(normalizeTransportStatus(current.Status)),
|
|
}
|
|
if !required {
|
|
item.Message = "skipped inactive draft client"
|
|
items = append(items, item)
|
|
itemByClientID[current.ID] = item
|
|
continue
|
|
}
|
|
|
|
checkedCount++
|
|
backend := selectTransportBackend(current)
|
|
probe := backend.Health(current)
|
|
next := applyTransportHealthProbeSnapshot(current, backend.ID(), probe, now)
|
|
updated[idx] = next
|
|
if transportHealthChanged(current, next) || transportShouldPersistHealthSnapshot(current, next, now) {
|
|
changed = true
|
|
}
|
|
|
|
item.Status = string(normalizeTransportStatus(next.Status))
|
|
item.Code = strings.TrimSpace(probe.Code)
|
|
if probe.OK && next.Status != TransportClientDown {
|
|
item.Message = "ok"
|
|
items = append(items, item)
|
|
itemByClientID[current.ID] = item
|
|
continue
|
|
}
|
|
|
|
item.OK = false
|
|
if item.Code == "" && next.Status == TransportClientDown {
|
|
item.Code = "TRANSPORT_POLICY_HEALTH_DOWN"
|
|
}
|
|
msg := strings.TrimSpace(probe.Message)
|
|
if msg == "" && next.Status == TransportClientDown {
|
|
msg = "transport client is down after apply"
|
|
}
|
|
if msg == "" {
|
|
msg = "transport policy health-check failed"
|
|
}
|
|
item.Message = msg
|
|
items = append(items, item)
|
|
itemByClientID[current.ID] = item
|
|
failedCount++
|
|
}
|
|
|
|
clientByID := make(map[string]TransportClient, len(updated))
|
|
for _, client := range updated {
|
|
clientByID[client.ID] = client
|
|
}
|
|
interfaces := buildTransportPolicyHealthCheckInterfaces(plan, itemByClientID, clientByID, now)
|
|
|
|
resp := TransportPolicyHealthCheck{
|
|
OK: failedCount == 0,
|
|
CheckedCount: checkedCount,
|
|
FailedCount: failedCount,
|
|
InterfaceCount: len(interfaces),
|
|
Interfaces: interfaces,
|
|
Items: items,
|
|
}
|
|
switch {
|
|
case checkedCount == 0:
|
|
resp.OK = true
|
|
resp.Message = "health-check skipped: no active transport clients in policy"
|
|
case failedCount == 0:
|
|
resp.Message = fmt.Sprintf("health-check passed for %d client(s)", checkedCount)
|
|
default:
|
|
resp.Message = fmt.Sprintf("health-check failed for %d of %d client(s)", failedCount, checkedCount)
|
|
}
|
|
return resp, updated, changed
|
|
}
|
|
|
|
func buildTransportPolicyHealthCheckInterfaces(
|
|
plan TransportPolicyCompilePlan,
|
|
itemByClientID map[string]TransportPolicyHealthCheckItem,
|
|
clientByID map[string]TransportClient,
|
|
now time.Time,
|
|
) []TransportPolicyHealthCheckInterface {
|
|
if len(plan.Interfaces) == 0 {
|
|
return nil
|
|
}
|
|
out := make([]TransportPolicyHealthCheckInterface, 0, len(plan.Interfaces))
|
|
for _, iface := range plan.Interfaces {
|
|
summary := TransportPolicyHealthCheckInterface{
|
|
IfaceID: normalizeTransportIfaceID(iface.IfaceID),
|
|
Mode: strings.TrimSpace(iface.Mode),
|
|
RuntimeIface: strings.TrimSpace(iface.RuntimeIface),
|
|
NetnsName: strings.TrimSpace(iface.NetnsName),
|
|
RoutingTable: strings.TrimSpace(iface.RoutingTable),
|
|
Status: string(TransportClientDown),
|
|
OK: true,
|
|
}
|
|
seen := map[string]struct{}{}
|
|
members := make([]TransportClient, 0, len(iface.ClientIDs))
|
|
for _, rawClientID := range iface.ClientIDs {
|
|
clientID := sanitizeID(rawClientID)
|
|
if clientID == "" {
|
|
continue
|
|
}
|
|
if _, ok := seen[clientID]; ok {
|
|
continue
|
|
}
|
|
seen[clientID] = struct{}{}
|
|
summary.ClientIDs = append(summary.ClientIDs, clientID)
|
|
summary.ClientCount++
|
|
if client, ok := clientByID[clientID]; ok {
|
|
members = append(members, client)
|
|
}
|
|
item, ok := itemByClientID[clientID]
|
|
if !ok {
|
|
continue
|
|
}
|
|
if item.Required {
|
|
summary.CheckedCount++
|
|
if !item.OK {
|
|
summary.FailedCount++
|
|
}
|
|
continue
|
|
}
|
|
summary.SkippedCount++
|
|
}
|
|
if len(members) > 0 {
|
|
counters := buildTransportRuntimeObservabilityCounters(members)
|
|
summary.Status = string(aggregateTransportRuntimeObservabilityStatus(counters))
|
|
if primary, ok := selectTransportRuntimeObservabilityPrimaryClient(members); ok {
|
|
summary.ActiveClientID = primary.ID
|
|
summary.LatencyMS = primary.Health.LatencyMS
|
|
if summary.RuntimeIface == "" {
|
|
summary.RuntimeIface = strings.TrimSpace(primary.Iface)
|
|
}
|
|
if summary.NetnsName == "" && transportNetnsEnabled(primary) {
|
|
summary.NetnsName = transportNetnsName(primary)
|
|
}
|
|
if summary.RoutingTable == "" {
|
|
summary.RoutingTable = strings.TrimSpace(primary.RoutingTable)
|
|
}
|
|
}
|
|
if errClient, ok := selectTransportRuntimeObservabilityErrorClient(members); ok {
|
|
summary.LastError = transportRuntimeObservabilityClientError(errClient, now)
|
|
}
|
|
}
|
|
switch {
|
|
case summary.ClientCount == 0:
|
|
summary.Message = "health-check skipped: no compiled clients on interface"
|
|
case summary.CheckedCount == 0:
|
|
summary.Message = "health-check skipped: no active transport clients on interface"
|
|
case summary.FailedCount == 0:
|
|
summary.Message = fmt.Sprintf("health-check passed for %d client(s) on interface", summary.CheckedCount)
|
|
default:
|
|
summary.OK = false
|
|
summary.Message = fmt.Sprintf(
|
|
"health-check failed for %d of %d client(s) on interface",
|
|
summary.FailedCount,
|
|
summary.CheckedCount,
|
|
)
|
|
}
|
|
out = append(out, summary)
|
|
}
|
|
return out
|
|
}
|
|
|
|
func collectTransportPolicyHealthCheckClientIDs(plan TransportPolicyCompilePlan) []string {
|
|
seen := map[string]struct{}{}
|
|
out := make([]string, 0, plan.RuleCount)
|
|
for _, iface := range plan.Interfaces {
|
|
for _, clientID := range iface.ClientIDs {
|
|
id := sanitizeID(clientID)
|
|
if id == "" {
|
|
continue
|
|
}
|
|
if _, ok := seen[id]; ok {
|
|
continue
|
|
}
|
|
seen[id] = struct{}{}
|
|
out = append(out, id)
|
|
}
|
|
}
|
|
sort.Strings(out)
|
|
return out
|
|
}
|
|
|
|
func transportPolicyHealthCheckRequired(client TransportClient) bool {
|
|
if client.Enabled {
|
|
return true
|
|
}
|
|
return normalizeTransportStatus(client.Status) != TransportClientDown
|
|
}
|