Compare commits
5 Commits
1b62d20320
...
e659714335
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e659714335 | ||
|
|
fd3efbc13a | ||
|
|
7dc89e3bb5 | ||
|
|
db000e8e48 | ||
| 611270127f |
146
Jenkinsfile
vendored
146
Jenkinsfile
vendored
@ -90,9 +90,19 @@ spec:
|
||||
TEST_EXIT_CODE_PATH = 'build/test.exitcode'
|
||||
SUITE_NAME = 'metis'
|
||||
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
|
||||
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
|
||||
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
|
||||
}
|
||||
options {
|
||||
disableConcurrentBuilds()
|
||||
buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
|
||||
}
|
||||
parameters {
|
||||
booleanParam(
|
||||
name: 'PUBLISH_IMAGES',
|
||||
defaultValue: false,
|
||||
description: 'Build and push runtime images (enable for release runs).'
|
||||
)
|
||||
}
|
||||
triggers {
|
||||
pollSCM('H/5 * * * *')
|
||||
@ -104,7 +114,74 @@ spec:
|
||||
}
|
||||
}
|
||||
|
||||
stage('Unit tests') {
|
||||
stage('Collect SonarQube evidence') {
|
||||
steps {
|
||||
container('publisher') {
|
||||
sh '''
|
||||
set -eu
|
||||
mkdir -p build
|
||||
python3 - <<'PY'
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
|
||||
host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
|
||||
project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
|
||||
token = os.getenv('SONARQUBE_TOKEN', '').strip()
|
||||
report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
|
||||
payload = {"status": "ERROR", "note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY"}
|
||||
if host and project_key:
|
||||
query = urllib.parse.urlencode({"projectKey": project_key})
|
||||
request = urllib.request.Request(f"{host}/api/qualitygates/project_status?{query}", method="GET")
|
||||
if token:
|
||||
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
|
||||
request.add_header("Authorization", f"Basic {encoded}")
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=12) as response:
|
||||
payload = json.loads(response.read().decode("utf-8"))
|
||||
except Exception as exc: # noqa: BLE001
|
||||
payload = {"status": "ERROR", "error": str(exc)}
|
||||
with open(report_path, "w", encoding="utf-8") as handle:
|
||||
json.dump(payload, handle, indent=2, sort_keys=True)
|
||||
handle.write("\\n")
|
||||
PY
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('Collect Supply Chain evidence') {
|
||||
steps {
|
||||
container('publisher') {
|
||||
sh '''
|
||||
set -eu
|
||||
mkdir -p build
|
||||
python3 - <<'PY'
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
|
||||
if report_path.exists():
|
||||
raise SystemExit(0)
|
||||
status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
|
||||
compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
|
||||
payload = {"status": status or "unknown", "compliant": compliant in {"1", "true", "yes", "on"} if compliant else None}
|
||||
payload = {k: v for k, v in payload.items() if v is not None}
|
||||
if "status" not in payload:
|
||||
payload["status"] = "unknown"
|
||||
payload["note"] = "Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT or write build/ironbank-compliance.json in image-building repos."
|
||||
report_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
|
||||
PY
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('Run quality gate') {
|
||||
steps {
|
||||
container('tester') {
|
||||
sh '''
|
||||
@ -113,19 +190,50 @@ spec:
|
||||
apt-get install -y --no-install-recommends xz-utils >/dev/null
|
||||
mkdir -p build
|
||||
go install github.com/jstemmer/go-junit-report/v2@latest
|
||||
cd "${WORKSPACE}"
|
||||
docs_rc=1
|
||||
quality_rc=1
|
||||
test_rc=1
|
||||
|
||||
set +e
|
||||
go test -v -count=1 -coverprofile=build/coverage.out ./... > build/test.out 2>&1
|
||||
test_rc=$?
|
||||
cd "${WORKSPACE}/testing"
|
||||
METIS_USE_EXISTING_COVERAGE=1 go test -v -run TestExportedDocs ./...
|
||||
docs_rc=$?
|
||||
printf '%s\n' "${docs_rc}" > "${WORKSPACE}/build/docs-naming.rc"
|
||||
if [ "${docs_rc}" -eq 0 ]; then
|
||||
METIS_USE_EXISTING_COVERAGE=1 go test -v ./...
|
||||
quality_rc=$?
|
||||
fi
|
||||
cd "${WORKSPACE}"
|
||||
|
||||
if [ "${docs_rc}" -eq 0 ] && [ "${quality_rc}" -eq 0 ]; then
|
||||
go test -v -count=1 -coverprofile=build/coverage.out ./... > build/test.out 2>&1
|
||||
test_rc=$?
|
||||
fi
|
||||
set -e
|
||||
printf '%s\n' "${test_rc}" > "${TEST_EXIT_CODE_PATH}"
|
||||
cat build/test.out
|
||||
"$(go env GOPATH)/bin/go-junit-report" < build/test.out > "${JUNIT_XML}"
|
||||
|
||||
if [ -f build/test.out ]; then
|
||||
cat build/test.out
|
||||
"$(go env GOPATH)/bin/go-junit-report" < build/test.out > "${JUNIT_XML}"
|
||||
else
|
||||
cat > "${JUNIT_XML}" <<'EOF'
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<testsuites tests="0" failures="0" errors="0" skipped="0"></testsuites>
|
||||
EOF
|
||||
fi
|
||||
|
||||
coverage="0"
|
||||
if [ -f build/coverage.out ]; then
|
||||
coverage="$(go tool cover -func=build/coverage.out | awk '/^total:/ {gsub("%","",$3); print $3}')"
|
||||
fi
|
||||
export GO_COVERAGE="${coverage}"
|
||||
printf '{"summary":{"percent_covered":%s}}\n' "${GO_COVERAGE}" > "${COVERAGE_JSON}"
|
||||
|
||||
gate_rc=0
|
||||
if [ "${docs_rc}" -ne 0 ] || [ "${quality_rc}" -ne 0 ] || [ "${test_rc}" -ne 0 ]; then
|
||||
gate_rc=1
|
||||
fi
|
||||
printf '%s\n' "${gate_rc}" > "${TEST_EXIT_CODE_PATH}"
|
||||
'''
|
||||
}
|
||||
}
|
||||
@ -142,7 +250,7 @@ spec:
|
||||
}
|
||||
}
|
||||
|
||||
stage('Enforce test result') {
|
||||
stage('Enforce quality gate') {
|
||||
steps {
|
||||
container('tester') {
|
||||
sh '''
|
||||
@ -154,19 +262,10 @@ spec:
|
||||
}
|
||||
}
|
||||
|
||||
stage('Quality gate') {
|
||||
steps {
|
||||
container('tester') {
|
||||
sh '''
|
||||
set -eu
|
||||
cd testing
|
||||
METIS_USE_EXISTING_COVERAGE=1 go test -v ./...
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('Prep toolchain') {
|
||||
when {
|
||||
expression { return params.PUBLISH_IMAGES }
|
||||
}
|
||||
steps {
|
||||
container('builder') {
|
||||
sh '''
|
||||
@ -179,6 +278,9 @@ spec:
|
||||
}
|
||||
|
||||
stage('Compute version') {
|
||||
when {
|
||||
expression { return params.PUBLISH_IMAGES }
|
||||
}
|
||||
steps {
|
||||
container('builder') {
|
||||
script {
|
||||
@ -196,6 +298,9 @@ spec:
|
||||
}
|
||||
|
||||
stage('Buildx setup') {
|
||||
when {
|
||||
expression { return params.PUBLISH_IMAGES }
|
||||
}
|
||||
steps {
|
||||
container('builder') {
|
||||
sh '''
|
||||
@ -224,6 +329,9 @@ spec:
|
||||
}
|
||||
|
||||
stage('Build & push images') {
|
||||
when {
|
||||
expression { return params.PUBLISH_IMAGES }
|
||||
}
|
||||
steps {
|
||||
container('builder') {
|
||||
sh '''
|
||||
|
||||
@ -11,8 +11,10 @@ import (
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"metis/pkg/image"
|
||||
"metis/pkg/plan"
|
||||
"metis/pkg/service"
|
||||
"metis/pkg/writer"
|
||||
@ -63,9 +65,45 @@ func remoteBuildCmd(args []string) {
|
||||
}
|
||||
output := filepath.Join(*workDir, fmt.Sprintf("%s.img", *node))
|
||||
inv := loadInventory(*invPath)
|
||||
if err := plan.BuildImageFile(context.Background(), inv, *node, *cacheDir, output); err != nil {
|
||||
fatalf("build image: %v", err)
|
||||
emitStageProgress("build", 12, fmt.Sprintf("Resolving the replacement build plan for %s", *node))
|
||||
p, err := plan.Build(inv, *node, output, *cacheDir)
|
||||
if err != nil {
|
||||
fatalf("build plan: %v", err)
|
||||
}
|
||||
_, class, err := inv.FindNode(*node)
|
||||
if err != nil {
|
||||
fatalf("load node class: %v", err)
|
||||
}
|
||||
cacheImage := filepath.Join(*cacheDir, strings.TrimSuffix(filepath.Base(p.Image), ".xz"))
|
||||
emitStageProgress("build", 16, fmt.Sprintf("Downloading and verifying the base image for %s", *node))
|
||||
cacheImage, err = image.DownloadAndVerify(p.Image, cacheImage, class.Checksum)
|
||||
if err != nil {
|
||||
fatalf("download image: %v", err)
|
||||
}
|
||||
copyEmitter := newProgressEmitter("build", 20, 34, fmt.Sprintf("Copying the verified base image for %s", *node), false)
|
||||
if err := writer.WriteImageWithProgress(context.Background(), cacheImage, output, copyEmitter); err != nil {
|
||||
fatalf("copy base image: %v", err)
|
||||
}
|
||||
emitStageProgress("build", 36, fmt.Sprintf("Preparing node-specific injected files for %s", *node))
|
||||
files, err := plan.Files(inv, *node)
|
||||
if err != nil {
|
||||
fatalf("resolve files: %v", err)
|
||||
}
|
||||
rootfsProgress := map[string]service.RemoteProgressUpdate{
|
||||
image.RootFSProgressFindingPartition: {Stage: "build", ProgressPct: 40, Message: fmt.Sprintf("Finding the Linux root partition for %s", *node)},
|
||||
image.RootFSProgressExtracting: {Stage: "build", ProgressPct: 44, Message: fmt.Sprintf("Extracting the Linux root partition for %s", *node)},
|
||||
image.RootFSProgressWritingFiles: {Stage: "build", ProgressPct: 50, Message: fmt.Sprintf("Injecting node-specific files into the root filesystem for %s", *node)},
|
||||
image.RootFSProgressReplacing: {Stage: "build", ProgressPct: 56, Message: fmt.Sprintf("Replacing the root partition inside the replacement image for %s", *node)},
|
||||
}
|
||||
if err := image.InjectRootFSWithProgress(output, files, func(step string) {
|
||||
if update, ok := rootfsProgress[step]; ok {
|
||||
emitProgress(update)
|
||||
}
|
||||
}); err != nil {
|
||||
fatalf("inject rootfs: %v", err)
|
||||
}
|
||||
emitStageProgress("build", 58, fmt.Sprintf("Built the replacement image filesystem for %s", *node))
|
||||
emitStageProgress("build", 60, fmt.Sprintf("Compressing the replacement image for %s before upload", *node))
|
||||
if err := exec.Command("xz", "-T0", "-z", "-f", output).Run(); err != nil {
|
||||
fatalf("xz compress: %v", err)
|
||||
}
|
||||
@ -92,13 +130,17 @@ func remoteBuildCmd(args []string) {
|
||||
if err := os.WriteFile(metadataPath, metaBytes, 0o644); err != nil {
|
||||
fatalf("write metadata: %v", err)
|
||||
}
|
||||
emitStageProgress("build", 68, fmt.Sprintf("Compression complete for %s; preparing the Harbor upload", *node))
|
||||
emitStageProgress("build", 70, fmt.Sprintf("Authenticating to Harbor for %s", *node))
|
||||
if err := orasLogin(*harborRegistry, *harborUsername, *harborPassword); err != nil {
|
||||
fatalf("oras login: %v", err)
|
||||
}
|
||||
taggedRef := fmt.Sprintf("%s:%s", *artifactRef, *buildTag)
|
||||
emitStageProgress("build", 72, fmt.Sprintf("Uploading %s to Harbor", filepath.Base(compressedPath)))
|
||||
if err := orasPush(taggedRef, compressedPath, metadataPath); err != nil {
|
||||
fatalf("oras push: %v", err)
|
||||
}
|
||||
emitStageProgress("build", 76, fmt.Sprintf("Refreshing the latest Harbor tag for %s", *node))
|
||||
if err := orasTag(taggedRef, "latest"); err != nil {
|
||||
fatalf("oras tag latest: %v", err)
|
||||
}
|
||||
@ -133,18 +175,21 @@ func remoteFlashCmd(args []string) {
|
||||
if err := os.MkdirAll(*workDir, 0o755); err != nil {
|
||||
fatalf("mkdir workdir: %v", err)
|
||||
}
|
||||
emitStageProgress("flash", 84, fmt.Sprintf("Pulling the latest Harbor artifact for %s", *node))
|
||||
if err := orasLogin(*harborRegistry, *harborUsername, *harborPassword); err != nil {
|
||||
fatalf("oras login: %v", err)
|
||||
}
|
||||
if err := orasPull(fmt.Sprintf("%s:latest", *artifactRef), *workDir); err != nil {
|
||||
fatalf("oras pull: %v", err)
|
||||
}
|
||||
emitStageProgress("flash", 88, fmt.Sprintf("Preparing the downloaded image for %s", *node))
|
||||
imagePath, compressed, err := resolvePulledArtifact(*workDir)
|
||||
if err != nil {
|
||||
fatalf("resolve artifact: %v", err)
|
||||
}
|
||||
rawImage := imagePath
|
||||
if compressed {
|
||||
emitStageProgress("flash", 90, fmt.Sprintf("Decompressing the image for %s before writing", *node))
|
||||
rawImage = filepath.Join(*workDir, fmt.Sprintf("%s.img", *node))
|
||||
cmd := exec.Command("sh", "-lc", fmt.Sprintf("xz -dc '%s' > '%s'", imagePath, rawImage))
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
@ -159,9 +204,12 @@ func remoteFlashCmd(args []string) {
|
||||
}
|
||||
destPath = filepath.Join(*hostTmpDir, fmt.Sprintf("%s.img", *node))
|
||||
}
|
||||
if err := writer.WriteImage(context.Background(), rawImage, destPath); err != nil {
|
||||
emitStageProgress("flash", 92, fmt.Sprintf("Writing the latest image for %s to %s", *node, destPath))
|
||||
writeEmitter := newProgressEmitter("flash", 92, 98, fmt.Sprintf("Writing the latest image for %s", *node), true)
|
||||
if err := writer.WriteImageWithProgress(context.Background(), rawImage, destPath, writeEmitter); err != nil {
|
||||
fatalf("write image: %v", err)
|
||||
}
|
||||
emitStageProgress("flash", 99, fmt.Sprintf("Flushing the finished image for %s", *node))
|
||||
_ = exec.Command("sync").Run()
|
||||
if strings.HasPrefix(destPath, "/dev/") {
|
||||
_ = exec.Command("blockdev", "--flushbufs", destPath).Run()
|
||||
@ -192,6 +240,55 @@ func writeStructuredResult(payload any) {
|
||||
_ = os.WriteFile("/dev/termination-log", data, 0o644)
|
||||
}
|
||||
|
||||
func emitStageProgress(stage string, progress float64, message string) {
|
||||
emitProgress(service.RemoteProgressUpdate{
|
||||
Stage: stage,
|
||||
ProgressPct: progress,
|
||||
Message: message,
|
||||
})
|
||||
}
|
||||
|
||||
func emitProgress(update service.RemoteProgressUpdate) {
|
||||
line := service.ProgressLogLine(update)
|
||||
if strings.TrimSpace(line) == "" {
|
||||
return
|
||||
}
|
||||
fmt.Fprintln(os.Stdout, line)
|
||||
}
|
||||
|
||||
func newProgressEmitter(stage string, minPct, maxPct float64, message string, includeBytes bool) writer.ProgressFunc {
|
||||
var mu sync.Mutex
|
||||
lastPct := minPct
|
||||
lastEmit := time.Time{}
|
||||
return func(written, total int64) {
|
||||
if total <= 0 {
|
||||
return
|
||||
}
|
||||
pct := minPct + (float64(written)/float64(total))*(maxPct-minPct)
|
||||
if pct > maxPct {
|
||||
pct = maxPct
|
||||
}
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
now := time.Now()
|
||||
if pct-lastPct < 0.5 && now.Sub(lastEmit) < time.Second {
|
||||
return
|
||||
}
|
||||
update := service.RemoteProgressUpdate{
|
||||
Stage: stage,
|
||||
ProgressPct: pct,
|
||||
Message: message,
|
||||
}
|
||||
if includeBytes {
|
||||
update.WrittenBytes = written
|
||||
update.TotalBytes = total
|
||||
}
|
||||
emitProgress(update)
|
||||
lastPct = pct
|
||||
lastEmit = now
|
||||
}
|
||||
}
|
||||
|
||||
func localFlashDevices(maxBytes int64, hostTmpDir string) ([]service.Device, error) {
|
||||
cmd := exec.Command("lsblk", "-J", "-b", "-o", "NAME,PATH,RM,HOTPLUG,SIZE,MODEL,TRAN,TYPE")
|
||||
out, err := cmd.Output()
|
||||
|
||||
@ -29,9 +29,23 @@ type partitionTablePart struct {
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
type RootFSProgressFunc func(step string)
|
||||
|
||||
const (
|
||||
RootFSProgressFindingPartition = "finding-partition"
|
||||
RootFSProgressExtracting = "extracting-partition"
|
||||
RootFSProgressWritingFiles = "writing-rootfs-files"
|
||||
RootFSProgressReplacing = "replacing-partition"
|
||||
)
|
||||
|
||||
// InjectRootFS rewrites the Linux root partition inside a raw image file without
|
||||
// requiring block-device mounts. Only rootfs-targeted files are written.
|
||||
func InjectRootFS(imagePath string, files []inject.FileSpec) error {
|
||||
return InjectRootFSWithProgress(imagePath, files, nil)
|
||||
}
|
||||
|
||||
// InjectRootFSWithProgress emits coarse step changes while rewriting the root partition.
|
||||
func InjectRootFSWithProgress(imagePath string, files []inject.FileSpec, progress RootFSProgressFunc) error {
|
||||
rootFiles := make([]inject.FileSpec, 0, len(files))
|
||||
for _, f := range files {
|
||||
if f.RootFS {
|
||||
@ -42,6 +56,7 @@ func InjectRootFS(imagePath string, files []inject.FileSpec) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
emitRootFSProgress(progress, RootFSProgressFindingPartition)
|
||||
part, sectorSize, err := findLinuxPartition(imagePath)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -54,15 +69,24 @@ func InjectRootFS(imagePath string, files []inject.FileSpec) error {
|
||||
defer os.RemoveAll(workDir)
|
||||
|
||||
rootImage := filepath.Join(workDir, "root.ext4")
|
||||
emitRootFSProgress(progress, RootFSProgressExtracting)
|
||||
if err := extractPartition(imagePath, rootImage, part, sectorSize); err != nil {
|
||||
return err
|
||||
}
|
||||
emitRootFSProgress(progress, RootFSProgressWritingFiles)
|
||||
if err := writeExt4Files(rootImage, rootFiles); err != nil {
|
||||
return err
|
||||
}
|
||||
emitRootFSProgress(progress, RootFSProgressReplacing)
|
||||
return replacePartition(imagePath, rootImage, part, sectorSize)
|
||||
}
|
||||
|
||||
func emitRootFSProgress(progress RootFSProgressFunc, step string) {
|
||||
if progress != nil {
|
||||
progress(step)
|
||||
}
|
||||
}
|
||||
|
||||
func findLinuxPartition(imagePath string) (partitionTablePart, uint64, error) {
|
||||
out, err := exec.Command("sfdisk", "-J", imagePath).Output()
|
||||
if err != nil {
|
||||
|
||||
@ -210,7 +210,13 @@ func (a *App) Build(node string) (*Job, error) {
|
||||
if err := a.ensureReplacementReady(node); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
job := a.newJob("build", node, "", "")
|
||||
if active := a.activeJobForNode(node); active != nil {
|
||||
return nil, &activeNodeJobError{Node: node, Kind: active.Kind, JobID: active.ID}
|
||||
}
|
||||
job, err := a.reserveJob("build", node, "", "")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
go a.runBuild(job, false)
|
||||
return job, nil
|
||||
}
|
||||
@ -223,10 +229,16 @@ func (a *App) Replace(node, host, device string) (*Job, error) {
|
||||
if err := a.ensureReplacementReady(node); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if active := a.activeJobForNode(node); active != nil {
|
||||
return nil, &activeNodeJobError{Node: node, Kind: active.Kind, JobID: active.ID}
|
||||
}
|
||||
if _, err := a.ensureDevice(host, device); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
job := a.newJob("replace", node, host, device)
|
||||
job, err := a.reserveJob("replace", node, host, device)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
go a.runBuild(job, true)
|
||||
return job, nil
|
||||
}
|
||||
|
||||
@ -34,6 +34,76 @@ func (a *App) newJob(kind, node, host, device string) *Job {
|
||||
return job
|
||||
}
|
||||
|
||||
type activeNodeJobError struct {
|
||||
Node string
|
||||
Kind string
|
||||
JobID string
|
||||
}
|
||||
|
||||
func (e *activeNodeJobError) Error() string {
|
||||
if e == nil {
|
||||
return "node already has an active metis job"
|
||||
}
|
||||
return fmt.Sprintf("node %s already has an active %s job (%s)", e.Node, e.Kind, e.JobID)
|
||||
}
|
||||
|
||||
func (a *App) activeJobForNode(node string) *Job {
|
||||
a.mu.RLock()
|
||||
defer a.mu.RUnlock()
|
||||
return a.activeJobForNodeLocked(node)
|
||||
}
|
||||
|
||||
func (a *App) activeJobForNodeLocked(node string) *Job {
|
||||
node = strings.TrimSpace(node)
|
||||
if node == "" {
|
||||
return nil
|
||||
}
|
||||
var active *Job
|
||||
for _, job := range a.jobs {
|
||||
if job == nil || strings.TrimSpace(job.Node) != node {
|
||||
continue
|
||||
}
|
||||
if job.Status != JobQueued && job.Status != JobRunning {
|
||||
continue
|
||||
}
|
||||
switch job.Kind {
|
||||
case "build", "replace":
|
||||
default:
|
||||
continue
|
||||
}
|
||||
if active == nil || job.StartedAt.Before(active.StartedAt) {
|
||||
active = job
|
||||
}
|
||||
}
|
||||
if active == nil {
|
||||
return nil
|
||||
}
|
||||
copyJob := *active
|
||||
return ©Job
|
||||
}
|
||||
|
||||
func (a *App) reserveJob(kind, node, host, device string) (*Job, error) {
|
||||
a.mu.Lock()
|
||||
defer a.mu.Unlock()
|
||||
if active := a.activeJobForNodeLocked(node); active != nil {
|
||||
return nil, &activeNodeJobError{Node: node, Kind: active.Kind, JobID: active.ID}
|
||||
}
|
||||
now := time.Now().UTC()
|
||||
job := &Job{
|
||||
ID: fmt.Sprintf("%d", now.UnixNano()),
|
||||
Kind: kind,
|
||||
Node: node,
|
||||
Host: host,
|
||||
Device: device,
|
||||
Status: JobQueued,
|
||||
ProgressPct: 0,
|
||||
StartedAt: now,
|
||||
UpdatedAt: now,
|
||||
}
|
||||
a.jobs[job.ID] = job
|
||||
return job, nil
|
||||
}
|
||||
|
||||
func (a *App) job(id string) *Job {
|
||||
a.mu.RLock()
|
||||
defer a.mu.RUnlock()
|
||||
|
||||
@ -158,6 +158,54 @@ func clusterNodes() []clusterNode {
|
||||
return nodes
|
||||
}
|
||||
|
||||
func clusterActiveRemotePodLoads(namespace, run string) map[string]int {
|
||||
kube, err := kubeClientFactory()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
ns := url.PathEscape(strings.TrimSpace(namespace))
|
||||
if ns == "" {
|
||||
return nil
|
||||
}
|
||||
selector := "app=metis-remote"
|
||||
if value := strings.TrimSpace(run); value != "" {
|
||||
selector += ",metis-run=" + value
|
||||
}
|
||||
path := fmt.Sprintf("/api/v1/namespaces/%s/pods?labelSelector=%s", ns, url.QueryEscape(selector))
|
||||
var payload struct {
|
||||
Items []struct {
|
||||
Metadata struct {
|
||||
Labels map[string]string `json:"labels"`
|
||||
} `json:"metadata"`
|
||||
Spec struct {
|
||||
NodeName string `json:"nodeName"`
|
||||
} `json:"spec"`
|
||||
Status struct {
|
||||
Phase string `json:"phase"`
|
||||
} `json:"status"`
|
||||
} `json:"items"`
|
||||
}
|
||||
if err := kube.jsonRequest(http.MethodGet, path, nil, &payload); err != nil {
|
||||
return nil
|
||||
}
|
||||
loads := map[string]int{}
|
||||
for _, item := range payload.Items {
|
||||
phase := strings.TrimSpace(item.Status.Phase)
|
||||
if phase == "Succeeded" || phase == "Failed" {
|
||||
continue
|
||||
}
|
||||
if value := strings.TrimSpace(run); value != "" && strings.TrimSpace(item.Metadata.Labels["metis-run"]) != value {
|
||||
continue
|
||||
}
|
||||
nodeName := strings.TrimSpace(item.Spec.NodeName)
|
||||
if nodeName == "" {
|
||||
continue
|
||||
}
|
||||
loads[nodeName]++
|
||||
}
|
||||
return loads
|
||||
}
|
||||
|
||||
func (a *App) podImageForArch(arch string) string {
|
||||
switch strings.TrimSpace(arch) {
|
||||
case "arm64":
|
||||
@ -183,7 +231,11 @@ func (a *App) runRemotePod(jobID, podName string, podSpec map[string]any) (strin
|
||||
return "", err
|
||||
}
|
||||
|
||||
deadline := time.Now().Add(12 * time.Minute)
|
||||
timeout := time.Duration(a.settings.RemotePodTimeout) * time.Second
|
||||
if timeout < 5*time.Minute {
|
||||
timeout = 5 * time.Minute
|
||||
}
|
||||
deadline := time.Now().Add(timeout)
|
||||
lastState := podState{Name: podName}
|
||||
for time.Now().Before(deadline) {
|
||||
state, err := a.remotePodState(kube, podName)
|
||||
@ -192,6 +244,11 @@ func (a *App) runRemotePod(jobID, podName string, podSpec map[string]any) (strin
|
||||
}
|
||||
lastState = state
|
||||
if strings.TrimSpace(jobID) != "" {
|
||||
if logs, logErr := a.remotePodLogs(kube, podName); logErr == nil {
|
||||
if update, ok := parseRemoteProgressLogs(logs); ok {
|
||||
a.applyRemoteProgress(jobID, update)
|
||||
}
|
||||
}
|
||||
a.heartbeatRemoteJob(jobID)
|
||||
}
|
||||
switch state.Phase {
|
||||
|
||||
@ -121,3 +121,50 @@ func TestDeleteNodeObjectFallback(t *testing.T) {
|
||||
t.Fatalf("deleteNodeObject fallback: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClusterActiveRemotePodLoadsCountsOnlyLivePods(t *testing.T) {
|
||||
kube := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.Method == http.MethodGet && r.URL.Path == "/api/v1/namespaces/maintenance/pods":
|
||||
if got := r.URL.Query().Get("labelSelector"); got != "app=metis-remote,metis-run=build" {
|
||||
t.Fatalf("unexpected labelSelector %q", got)
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"items": []any{
|
||||
map[string]any{
|
||||
"metadata": map[string]any{"labels": map[string]string{"app": "metis-remote", "metis-run": "build"}},
|
||||
"spec": map[string]any{"nodeName": "titan-04"},
|
||||
"status": map[string]any{"phase": "Running"},
|
||||
},
|
||||
map[string]any{
|
||||
"metadata": map[string]any{"labels": map[string]string{"app": "metis-remote", "metis-run": "build"}},
|
||||
"spec": map[string]any{"nodeName": "titan-04"},
|
||||
"status": map[string]any{"phase": "Pending"},
|
||||
},
|
||||
map[string]any{
|
||||
"metadata": map[string]any{"labels": map[string]string{"app": "metis-remote", "metis-run": "build"}},
|
||||
"spec": map[string]any{"nodeName": "titan-05"},
|
||||
"status": map[string]any{"phase": "Succeeded"},
|
||||
},
|
||||
},
|
||||
})
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer kube.Close()
|
||||
|
||||
origFactory := kubeClientFactory
|
||||
kubeClientFactory = func() (*kubeClient, error) {
|
||||
return kubeClientFactoryForURL(kube.URL, kube.Client()), nil
|
||||
}
|
||||
t.Cleanup(func() { kubeClientFactory = origFactory })
|
||||
|
||||
loads := clusterActiveRemotePodLoads("maintenance", "build")
|
||||
if loads["titan-04"] != 2 {
|
||||
t.Fatalf("expected titan-04 load 2, got %#v", loads)
|
||||
}
|
||||
if _, ok := loads["titan-05"]; ok {
|
||||
t.Fatalf("expected succeeded pod to be ignored, got %#v", loads)
|
||||
}
|
||||
}
|
||||
|
||||
@ -6,6 +6,7 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@ -21,6 +22,7 @@ func TestSettingsHelpersAndSmallUtilities(t *testing.T) {
|
||||
t.Setenv("METIS_MAX_DEVICE_BYTES", "12345")
|
||||
t.Setenv("METIS_DEFAULT_FLASH_HOST", "flash-1")
|
||||
t.Setenv("METIS_LOCAL_HOST", "local-1")
|
||||
t.Setenv("METIS_REMOTE_POD_TIMEOUT_SEC", "1800")
|
||||
|
||||
settings := FromEnv()
|
||||
if got, want := settings.CacheDir, filepath.Join(dataDir, "cache"); got != want {
|
||||
@ -32,6 +34,9 @@ func TestSettingsHelpersAndSmallUtilities(t *testing.T) {
|
||||
if settings.MaxDeviceBytes != 12345 {
|
||||
t.Fatalf("expected MaxDeviceBytes=12345, got %d", settings.MaxDeviceBytes)
|
||||
}
|
||||
if settings.RemotePodTimeout != 1800 {
|
||||
t.Fatalf("expected RemotePodTimeout=1800, got %d", settings.RemotePodTimeout)
|
||||
}
|
||||
if !reflect.DeepEqual(splitList("a, b,, c"), []string{"a", "b", "c"}) {
|
||||
t.Fatalf("splitList mismatch")
|
||||
}
|
||||
@ -125,6 +130,17 @@ func TestAppJobDeviceAndStateHelpers(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildAndReplaceRejectDuplicateActiveNodeJobs(t *testing.T) {
|
||||
app := newTestApp(t)
|
||||
active := app.newJob("replace", "titan-15", "titan-22", "/dev/sdz")
|
||||
if _, err := app.Build("titan-15"); err == nil || !strings.Contains(err.Error(), active.ID) {
|
||||
t.Fatalf("expected build conflict mentioning %s, got %v", active.ID, err)
|
||||
}
|
||||
if _, err := app.Replace("titan-15", "titan-22", "/dev/sdz"); err == nil || !strings.Contains(err.Error(), "active replace job") {
|
||||
t.Fatalf("expected replace conflict, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAppPersistenceAndTargets(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
invPath := filepath.Join(dir, "inventory.yaml")
|
||||
|
||||
@ -270,6 +270,17 @@ func (a *App) heartbeatRemoteJob(jobID string) {
|
||||
}
|
||||
j.Message = fmt.Sprintf("Validating %s on %s and resolving the latest Harbor artifact", prettyDeviceTarget(j.Device), j.Host)
|
||||
case "flash":
|
||||
if j.Total > 0 && j.Written > 0 {
|
||||
actual := 88 + (float64(j.Written)/float64(j.Total))*10
|
||||
if actual > 98 {
|
||||
actual = 98
|
||||
}
|
||||
if actual > j.ProgressPct {
|
||||
j.ProgressPct = actual
|
||||
}
|
||||
j.Message = fmt.Sprintf("Writing %s of %s on %s", humanBytes(j.Written), humanBytes(j.Total), j.Host)
|
||||
return
|
||||
}
|
||||
progress, message := flashStageHeartbeat(j.Host, j.Artifact, elapsed)
|
||||
if progress > j.ProgressPct {
|
||||
j.ProgressPct = progress
|
||||
|
||||
@ -81,6 +81,8 @@ func (a *App) ensureDevice(host, path string) (*Device, error) {
|
||||
|
||||
func (a *App) selectBuilderHost(arch, flashHost string) (clusterNode, error) {
|
||||
nodes := clusterNodes()
|
||||
activeBuilds := clusterActiveRemotePodLoads(a.settings.Namespace, "build")
|
||||
activeRemotePods := clusterActiveRemotePodLoads(a.settings.Namespace, "")
|
||||
storageNodes := map[string]struct{}{}
|
||||
for _, node := range a.inventory.Nodes {
|
||||
if len(node.LonghornDisks) > 0 {
|
||||
@ -119,6 +121,12 @@ func (a *App) selectBuilderHost(arch, flashHost string) (clusterNode, error) {
|
||||
if flashHost != "" && node.Name == flashHost {
|
||||
score += 5
|
||||
}
|
||||
if count := activeBuilds[node.Name]; count > 0 {
|
||||
score -= 100 * count
|
||||
}
|
||||
if count := activeRemotePods[node.Name]; count > 0 {
|
||||
score -= 15 * count
|
||||
}
|
||||
candidates = append(candidates, scored{node: node, score: score})
|
||||
}
|
||||
sort.Slice(candidates, func(i, j int) bool {
|
||||
|
||||
@ -1,6 +1,9 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
@ -66,3 +69,60 @@ func TestSelectBuilderHostErrorBranch(t *testing.T) {
|
||||
t.Fatal("expected selectBuilderHost error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSelectBuilderHostAvoidsBusyBuilderWhenPeersAreFree(t *testing.T) {
|
||||
kube := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.Method == http.MethodGet && r.URL.Path == "/api/v1/nodes":
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"items": []any{
|
||||
map[string]any{
|
||||
"metadata": map[string]any{
|
||||
"name": "titan-04",
|
||||
"labels": map[string]string{
|
||||
"kubernetes.io/arch": "arm64",
|
||||
"hardware": "rpi5",
|
||||
"node-role.kubernetes.io/worker": "true",
|
||||
},
|
||||
},
|
||||
"spec": map[string]any{"unschedulable": false},
|
||||
},
|
||||
map[string]any{
|
||||
"metadata": map[string]any{
|
||||
"name": "titan-05",
|
||||
"labels": map[string]string{
|
||||
"kubernetes.io/arch": "arm64",
|
||||
"hardware": "rpi5",
|
||||
"node-role.kubernetes.io/worker": "true",
|
||||
},
|
||||
},
|
||||
"spec": map[string]any{"unschedulable": false},
|
||||
},
|
||||
},
|
||||
})
|
||||
case r.Method == http.MethodGet && r.URL.Path == "/api/v1/namespaces/maintenance/pods":
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"items": []any{
|
||||
map[string]any{
|
||||
"metadata": map[string]any{"labels": map[string]string{"app": "metis-remote", "metis-run": "build"}},
|
||||
"spec": map[string]any{"nodeName": "titan-04"},
|
||||
"status": map[string]any{"phase": "Running"},
|
||||
},
|
||||
},
|
||||
})
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer kube.Close()
|
||||
installKubeFactory(t, kube)
|
||||
app := newTestApp(t)
|
||||
app.settings.Namespace = "maintenance"
|
||||
node, err := app.selectBuilderHost("arm64", "")
|
||||
if err != nil {
|
||||
t.Fatalf("selectBuilderHost: %v", err)
|
||||
}
|
||||
if node.Name != "titan-05" {
|
||||
t.Fatalf("expected titan-05 builder, got %s", node.Name)
|
||||
}
|
||||
}
|
||||
|
||||
@ -43,3 +43,19 @@ func TestFlashStageHeartbeatProgresses(t *testing.T) {
|
||||
t.Fatalf("expected flushing message, got %q", m3)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRemoteProgressLogsFindsLatestMarker(t *testing.T) {
|
||||
logs := strings.Join([]string{
|
||||
"plain log line",
|
||||
ProgressLogLine(RemoteProgressUpdate{Stage: "build", ProgressPct: 40, Message: "phase one"}),
|
||||
ProgressLogLine(RemoteProgressUpdate{Stage: "build", ProgressPct: 72, Message: "phase two"}),
|
||||
`{"local_path":"/tmp/out.img.xz"}`,
|
||||
}, "\n")
|
||||
update, ok := parseRemoteProgressLogs(logs)
|
||||
if !ok {
|
||||
t.Fatal("expected progress marker")
|
||||
}
|
||||
if update.ProgressPct != 72 || update.Message != "phase two" {
|
||||
t.Fatalf("unexpected update: %#v", update)
|
||||
}
|
||||
}
|
||||
|
||||
77
pkg/service/remote_status.go
Normal file
77
pkg/service/remote_status.go
Normal file
@ -0,0 +1,77 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const progressLogPrefix = "METIS_PROGRESS "
|
||||
|
||||
// RemoteProgressUpdate is emitted by remote workers so the UI can show
|
||||
// concrete stage transitions instead of relying only on elapsed-time guesses.
|
||||
type RemoteProgressUpdate struct {
|
||||
Stage string `json:"stage,omitempty"`
|
||||
ProgressPct float64 `json:"progress_pct,omitempty"`
|
||||
Message string `json:"message,omitempty"`
|
||||
WrittenBytes int64 `json:"written_bytes,omitempty"`
|
||||
TotalBytes int64 `json:"total_bytes,omitempty"`
|
||||
}
|
||||
|
||||
// ProgressLogLine formats a progress update for remote worker stdout.
|
||||
func ProgressLogLine(update RemoteProgressUpdate) string {
|
||||
data, err := json.Marshal(update)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return progressLogPrefix + string(data)
|
||||
}
|
||||
|
||||
func parseRemoteProgressLogs(logs string) (RemoteProgressUpdate, bool) {
|
||||
scanner := bufio.NewScanner(strings.NewReader(logs))
|
||||
scanner.Buffer(make([]byte, 0, 4096), 1<<20)
|
||||
var latest RemoteProgressUpdate
|
||||
found := false
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if !strings.HasPrefix(line, progressLogPrefix) {
|
||||
continue
|
||||
}
|
||||
raw := strings.TrimSpace(strings.TrimPrefix(line, progressLogPrefix))
|
||||
var update RemoteProgressUpdate
|
||||
if err := json.Unmarshal([]byte(raw), &update); err != nil {
|
||||
continue
|
||||
}
|
||||
latest = update
|
||||
found = true
|
||||
}
|
||||
return latest, found
|
||||
}
|
||||
|
||||
func (a *App) applyRemoteProgress(jobID string, update RemoteProgressUpdate) {
|
||||
if strings.TrimSpace(jobID) == "" {
|
||||
return
|
||||
}
|
||||
a.setJob(jobID, func(j *Job) {
|
||||
if j == nil || j.Status != JobRunning {
|
||||
return
|
||||
}
|
||||
if stage := strings.TrimSpace(update.Stage); stage != "" && stage != j.Stage {
|
||||
j.Stage = stage
|
||||
j.StageStartedAt = time.Now().UTC()
|
||||
}
|
||||
if update.ProgressPct > j.ProgressPct {
|
||||
j.ProgressPct = update.ProgressPct
|
||||
}
|
||||
if message := strings.TrimSpace(update.Message); message != "" {
|
||||
j.Message = message
|
||||
}
|
||||
if update.WrittenBytes > 0 {
|
||||
j.Written = update.WrittenBytes
|
||||
}
|
||||
if update.TotalBytes > 0 {
|
||||
j.Total = update.TotalBytes
|
||||
}
|
||||
})
|
||||
}
|
||||
@ -2,6 +2,7 @@ package service
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"html/template"
|
||||
"net/http"
|
||||
"strings"
|
||||
@ -99,7 +100,7 @@ func (a *App) handleBuild(w http.ResponseWriter, r *http.Request) {
|
||||
node := values["node"]
|
||||
job, err := a.Build(node)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||
http.Error(w, err.Error(), statusForJobError(err))
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusAccepted, job)
|
||||
@ -116,12 +117,20 @@ func (a *App) handleReplace(w http.ResponseWriter, r *http.Request) {
|
||||
device := values["device"]
|
||||
job, err := a.Replace(node, host, device)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||
http.Error(w, err.Error(), statusForJobError(err))
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusAccepted, job)
|
||||
}
|
||||
|
||||
func statusForJobError(err error) int {
|
||||
var conflict *activeNodeJobError
|
||||
if errors.As(err, &conflict) {
|
||||
return http.StatusConflict
|
||||
}
|
||||
return http.StatusBadRequest
|
||||
}
|
||||
|
||||
func (a *App) handleWatch(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
||||
|
||||
@ -229,6 +229,21 @@ func TestRequestValuesFormAndAuthHelpers(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleBuildReturnsConflictForDuplicateActiveNodeJob(t *testing.T) {
|
||||
app := newTestApp(t)
|
||||
app.newJob("build", "titan-15", "", "")
|
||||
handler := app.Handler()
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/build", strings.NewReader(`{"node":"titan-15"}`))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("X-Auth-Request-User", "brad")
|
||||
req.Header.Set("X-Auth-Request-Groups", "admin")
|
||||
resp := httptest.NewRecorder()
|
||||
handler.ServeHTTP(resp, req)
|
||||
if resp.Code != http.StatusConflict {
|
||||
t.Fatalf("expected conflict, got %d: %s", resp.Code, resp.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPHandlersExerciseErrorBranches(t *testing.T) {
|
||||
kube := fakeKubeServer(t)
|
||||
installKubeFactory(t, kube)
|
||||
|
||||
@ -33,6 +33,7 @@ type Settings struct {
|
||||
HarborUsername string
|
||||
HarborPassword string
|
||||
HostTmpDir string
|
||||
RemotePodTimeout int64
|
||||
}
|
||||
|
||||
// FromEnv builds service settings with sensible defaults for local dev and in-cluster use.
|
||||
@ -64,6 +65,7 @@ func FromEnv() Settings {
|
||||
HarborUsername: getenvDefault("METIS_HARBOR_USERNAME", ""),
|
||||
HarborPassword: getenvDefault("METIS_HARBOR_PASSWORD", ""),
|
||||
HostTmpDir: getenvDefault("METIS_HOST_TMP_DIR", "/tmp/metis-flash-test"),
|
||||
RemotePodTimeout: getenvInt64("METIS_REMOTE_POD_TIMEOUT_SEC", 1800),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -166,6 +166,8 @@ func fakeKubeServer(t *testing.T) *httptest.Server {
|
||||
},
|
||||
},
|
||||
})
|
||||
case r.Method == http.MethodGet && r.URL.Path == "/api/v1/namespaces/maintenance/pods":
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{"items": []any{}})
|
||||
case r.Method == http.MethodPost && strings.Contains(r.URL.Path, "/pods"):
|
||||
w.WriteHeader(http.StatusCreated)
|
||||
case r.Method == http.MethodDelete && strings.Contains(r.URL.Path, "/pods/"):
|
||||
|
||||
@ -9,6 +9,8 @@ from pathlib import Path
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
QUALITY_SUCCESS_STATES = {"ok", "pass", "passed", "success", "compliant"}
|
||||
|
||||
|
||||
def _escape_label(value: str) -> str:
|
||||
return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
|
||||
@ -81,6 +83,33 @@ def _post_text(url: str, payload: str) -> None:
|
||||
raise RuntimeError(f"metrics push failed status={resp.status}")
|
||||
|
||||
|
||||
def _read_http(url: str) -> str:
|
||||
try:
|
||||
with urllib.request.urlopen(url, timeout=10) as resp:
|
||||
return resp.read().decode("utf-8", errors="replace")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def _fetch_existing_counter(pushgateway_url: str, metric: str, labels: dict[str, str]) -> float:
|
||||
text = _read_http(f"{pushgateway_url.rstrip('/')}/metrics")
|
||||
if not text:
|
||||
return 0.0
|
||||
for line in text.splitlines():
|
||||
if not line.startswith(metric + "{"):
|
||||
continue
|
||||
if any(f'{key}="{value}"' not in line for key, value in labels.items()):
|
||||
continue
|
||||
parts = line.split()
|
||||
if len(parts) < 2:
|
||||
continue
|
||||
try:
|
||||
return float(parts[1])
|
||||
except ValueError:
|
||||
return 0.0
|
||||
return 0.0
|
||||
|
||||
|
||||
def _count_source_files_over_limit(repo_root: Path, max_lines: int = 500) -> int:
|
||||
"""Count source files above the configured line budget."""
|
||||
|
||||
@ -100,6 +129,45 @@ def _count_source_files_over_limit(repo_root: Path, max_lines: int = 500) -> int
|
||||
return count
|
||||
|
||||
|
||||
def _load_json(path: Path) -> dict | None:
|
||||
if not path.exists():
|
||||
return None
|
||||
try:
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return None
|
||||
return payload if isinstance(payload, dict) else None
|
||||
|
||||
|
||||
def _sonarqube_check_status(build_dir: Path) -> str:
|
||||
report = _load_json(Path(os.getenv("QUALITY_GATE_SONARQUBE_REPORT", str(build_dir / "sonarqube-quality-gate.json"))))
|
||||
if not report:
|
||||
return "not_applicable"
|
||||
status_candidates = [
|
||||
report.get("status"),
|
||||
((report.get("projectStatus") or {}).get("status") if isinstance(report.get("projectStatus"), dict) else None),
|
||||
((report.get("qualityGate") or {}).get("status") if isinstance(report.get("qualityGate"), dict) else None),
|
||||
]
|
||||
for value in status_candidates:
|
||||
if isinstance(value, str):
|
||||
return "ok" if value.strip().lower() in QUALITY_SUCCESS_STATES else "failed"
|
||||
return "failed"
|
||||
|
||||
|
||||
def _supply_chain_check_status(build_dir: Path) -> str:
|
||||
report = _load_json(Path(os.getenv("QUALITY_GATE_IRONBANK_REPORT", str(build_dir / "ironbank-compliance.json"))))
|
||||
if not report:
|
||||
return "not_applicable"
|
||||
compliant = report.get("compliant")
|
||||
if isinstance(compliant, bool):
|
||||
return "ok" if compliant else "failed"
|
||||
status_candidates = [report.get("status"), report.get("result"), report.get("compliance")]
|
||||
for value in status_candidates:
|
||||
if isinstance(value, str):
|
||||
return "ok" if value.strip().lower() in QUALITY_SUCCESS_STATES else "failed"
|
||||
return "failed"
|
||||
|
||||
|
||||
def main() -> int:
|
||||
coverage_path = os.getenv("COVERAGE_JSON", "build/coverage.json")
|
||||
junit_path = os.getenv("JUNIT_XML", "build/junit.xml")
|
||||
@ -113,6 +181,7 @@ def main() -> int:
|
||||
commit = os.getenv("GIT_COMMIT", "")
|
||||
strict = os.getenv("METRICS_STRICT", "") == "1"
|
||||
repo_root = Path(__file__).resolve().parents[1]
|
||||
build_dir = repo_root / "build"
|
||||
|
||||
if not os.path.exists(coverage_path):
|
||||
raise RuntimeError(f"missing coverage file {coverage_path}")
|
||||
@ -133,6 +202,29 @@ def main() -> int:
|
||||
or totals["errors"] > 0
|
||||
):
|
||||
outcome = "failed"
|
||||
checks = {
|
||||
"tests": "ok" if outcome == "ok" else "failed",
|
||||
"coverage": "ok" if coverage >= 95.0 else "failed",
|
||||
"loc": "ok" if source_lines_over_500 == 0 else "failed",
|
||||
"docs_naming": "not_applicable",
|
||||
"gate_glue": "ok",
|
||||
"sonarqube": _sonarqube_check_status(build_dir),
|
||||
"supply_chain": _supply_chain_check_status(build_dir),
|
||||
}
|
||||
ok_count = _fetch_existing_counter(
|
||||
pushgateway_url,
|
||||
"platform_quality_gate_runs_total",
|
||||
{"job": "platform-quality-ci", "suite": suite, "status": "ok"},
|
||||
)
|
||||
failed_count = _fetch_existing_counter(
|
||||
pushgateway_url,
|
||||
"platform_quality_gate_runs_total",
|
||||
{"job": "platform-quality-ci", "suite": suite, "status": "failed"},
|
||||
)
|
||||
if outcome == "ok":
|
||||
ok_count += 1
|
||||
else:
|
||||
failed_count += 1
|
||||
|
||||
labels = {
|
||||
"job": "platform-quality-ci",
|
||||
@ -142,6 +234,9 @@ def main() -> int:
|
||||
"commit": commit,
|
||||
}
|
||||
payload_lines = [
|
||||
"# TYPE platform_quality_gate_runs_total counter",
|
||||
f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count:.0f}',
|
||||
f'platform_quality_gate_runs_total{{suite="{suite}",status="failed"}} {failed_count:.0f}',
|
||||
"# TYPE metis_quality_gate_tests_total gauge",
|
||||
f'metis_quality_gate_tests_total{{suite="{suite}",result="total"}} {totals["tests"]}',
|
||||
f'metis_quality_gate_tests_total{{suite="{suite}",result="passed"}} {passed}',
|
||||
@ -157,9 +252,14 @@ def main() -> int:
|
||||
f'platform_quality_gate_workspace_line_coverage_percent{{suite="{suite}"}} {coverage:.3f}',
|
||||
"# TYPE platform_quality_gate_source_lines_over_500_total gauge",
|
||||
f'platform_quality_gate_source_lines_over_500_total{{suite="{suite}"}} {source_lines_over_500}',
|
||||
"# TYPE metis_quality_gate_checks_total gauge",
|
||||
"# TYPE metis_quality_gate_build_info gauge",
|
||||
f"metis_quality_gate_build_info{_label_str(labels)} 1",
|
||||
]
|
||||
payload_lines.extend(
|
||||
f'metis_quality_gate_checks_total{{suite="{suite}",check="{check_name}",result="{check_status}"}} 1'
|
||||
for check_name, check_status in checks.items()
|
||||
)
|
||||
payload = "\n".join(payload_lines) + "\n"
|
||||
|
||||
try:
|
||||
|
||||
@ -6,16 +6,37 @@ build_dir="${repo_root}/build"
|
||||
gopath_bin="$(go env GOPATH)/bin"
|
||||
junit_report="${gopath_bin}/go-junit-report"
|
||||
|
||||
run_with_retry() {
|
||||
local attempts="$1"
|
||||
shift
|
||||
local try=1
|
||||
local delay=3
|
||||
local rc=0
|
||||
while true; do
|
||||
"$@" && return 0
|
||||
rc=$?
|
||||
if [[ "${try}" -ge "${attempts}" ]]; then
|
||||
return "${rc}"
|
||||
fi
|
||||
echo "[quality] retry ${try}/${attempts} after rc=${rc}: $*" >&2
|
||||
sleep "${delay}"
|
||||
delay=$((delay * 2))
|
||||
try=$((try + 1))
|
||||
done
|
||||
}
|
||||
|
||||
mkdir -p "${build_dir}"
|
||||
|
||||
if [ ! -x "${junit_report}" ]; then
|
||||
go install github.com/jstemmer/go-junit-report/v2@latest
|
||||
run_with_retry 3 go install github.com/jstemmer/go-junit-report/v2@latest
|
||||
fi
|
||||
|
||||
cd "${repo_root}"
|
||||
export GOPROXY="${GOPROXY:-https://proxy.golang.org,direct}"
|
||||
run_with_retry 4 go mod download
|
||||
|
||||
set +e
|
||||
go test -v -count=1 -coverprofile="${build_dir}/coverage.out" ./... > "${build_dir}/test.out" 2>&1
|
||||
run_with_retry 3 go test -v -count=1 -coverprofile="${build_dir}/coverage.out" ./... > "${build_dir}/test.out" 2>&1
|
||||
test_rc=$?
|
||||
set -e
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user