service: smooth metis build and flash progress
This commit is contained in:
parent
801374d184
commit
c459e5c028
@ -50,6 +50,7 @@ type Job struct {
|
|||||||
Device string `json:"device,omitempty"`
|
Device string `json:"device,omitempty"`
|
||||||
Status JobStatus `json:"status"`
|
Status JobStatus `json:"status"`
|
||||||
Stage string `json:"stage,omitempty"`
|
Stage string `json:"stage,omitempty"`
|
||||||
|
StageStartedAt time.Time `json:"stage_started_at,omitempty"`
|
||||||
Message string `json:"message,omitempty"`
|
Message string `json:"message,omitempty"`
|
||||||
Artifact string `json:"artifact,omitempty"`
|
Artifact string `json:"artifact,omitempty"`
|
||||||
ProgressPct float64 `json:"progress_pct"`
|
ProgressPct float64 `json:"progress_pct"`
|
||||||
|
|||||||
@ -186,7 +186,7 @@ func (a *App) runRemotePod(jobID, podName string, podSpec map[string]any) (strin
|
|||||||
}
|
}
|
||||||
lastState = state
|
lastState = state
|
||||||
if strings.TrimSpace(jobID) != "" {
|
if strings.TrimSpace(jobID) != "" {
|
||||||
a.setJob(jobID, func(_ *Job) {})
|
a.heartbeatRemoteJob(jobID)
|
||||||
}
|
}
|
||||||
switch state.Phase {
|
switch state.Phase {
|
||||||
case "Succeeded":
|
case "Succeeded":
|
||||||
|
|||||||
@ -3,6 +3,7 @@ package service
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
@ -79,6 +80,7 @@ func (a *App) runBuild(job *Job, flash bool) {
|
|||||||
a.setJob(job.ID, func(j *Job) {
|
a.setJob(job.ID, func(j *Job) {
|
||||||
j.Status = JobRunning
|
j.Status = JobRunning
|
||||||
j.Stage = "build"
|
j.Stage = "build"
|
||||||
|
j.StageStartedAt = time.Now().UTC()
|
||||||
j.Message = fmt.Sprintf("Building on %s (%s) and publishing to Harbor", builder.Name, builder.Arch)
|
j.Message = fmt.Sprintf("Building on %s (%s) and publishing to Harbor", builder.Name, builder.Arch)
|
||||||
j.ProgressPct = 8
|
j.ProgressPct = 8
|
||||||
j.Artifact = artifactRef + ":latest"
|
j.Artifact = artifactRef + ":latest"
|
||||||
@ -140,7 +142,8 @@ func (a *App) runBuild(job *Job, flash bool) {
|
|||||||
|
|
||||||
a.setJob(job.ID, func(j *Job) {
|
a.setJob(job.ID, func(j *Job) {
|
||||||
j.Stage = "preflight"
|
j.Stage = "preflight"
|
||||||
j.Message = fmt.Sprintf("Preparing to flash from Harbor on %s", j.Host)
|
j.StageStartedAt = time.Now().UTC()
|
||||||
|
j.Message = fmt.Sprintf("Validating %s and preparing the latest Harbor artifact for %s", prettyDeviceTarget(j.Device), j.Host)
|
||||||
j.ProgressPct = 78
|
j.ProgressPct = 78
|
||||||
j.Artifact = artifactRef + ":latest"
|
j.Artifact = artifactRef + ":latest"
|
||||||
})
|
})
|
||||||
@ -201,6 +204,7 @@ func (a *App) flashArtifact(jobID, artifactRef string) error {
|
|||||||
}
|
}
|
||||||
a.setJob(jobID, func(j *Job) {
|
a.setJob(jobID, func(j *Job) {
|
||||||
j.Stage = "flash"
|
j.Stage = "flash"
|
||||||
|
j.StageStartedAt = time.Now().UTC()
|
||||||
j.Message = fmt.Sprintf("Pulling %s and writing it on %s", artifactRef+":latest", j.Host)
|
j.Message = fmt.Sprintf("Pulling %s and writing it on %s", artifactRef+":latest", j.Host)
|
||||||
j.ProgressPct = 84
|
j.ProgressPct = 84
|
||||||
})
|
})
|
||||||
@ -220,6 +224,97 @@ func (a *App) flashArtifact(jobID, artifactRef string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (a *App) heartbeatRemoteJob(jobID string) {
|
||||||
|
if strings.TrimSpace(jobID) == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
a.setJob(jobID, func(j *Job) {
|
||||||
|
if j == nil || j.Status != JobRunning {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
stageStart := j.StageStartedAt
|
||||||
|
if stageStart.IsZero() {
|
||||||
|
stageStart = j.StartedAt
|
||||||
|
}
|
||||||
|
elapsed := time.Since(stageStart)
|
||||||
|
switch j.Stage {
|
||||||
|
case "build":
|
||||||
|
progress, message := buildStageHeartbeat(j.Node, j.Builder, elapsed)
|
||||||
|
if progress > j.ProgressPct {
|
||||||
|
j.ProgressPct = progress
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(message) != "" {
|
||||||
|
j.Message = message
|
||||||
|
}
|
||||||
|
case "preflight":
|
||||||
|
if j.ProgressPct < 80 {
|
||||||
|
j.ProgressPct = 80
|
||||||
|
}
|
||||||
|
j.Message = fmt.Sprintf("Validating %s on %s and resolving the latest Harbor artifact", prettyDeviceTarget(j.Device), j.Host)
|
||||||
|
case "flash":
|
||||||
|
progress, message := flashStageHeartbeat(j.Host, j.Artifact, elapsed)
|
||||||
|
if progress > j.ProgressPct {
|
||||||
|
j.ProgressPct = progress
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(message) != "" {
|
||||||
|
j.Message = message
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildStageHeartbeat(node, builder string, elapsed time.Duration) (float64, string) {
|
||||||
|
seconds := elapsed.Seconds()
|
||||||
|
switch {
|
||||||
|
case seconds < 20:
|
||||||
|
return ramp(seconds, 0, 20, 8, 14), fmt.Sprintf("Scheduling a remote builder on %s for %s", builder, node)
|
||||||
|
case seconds < 120:
|
||||||
|
return ramp(seconds, 20, 120, 14, 30), fmt.Sprintf("Injecting %s recovery config into the base image on %s", node, builder)
|
||||||
|
case seconds < 360:
|
||||||
|
return ramp(seconds, 120, 360, 30, 58), fmt.Sprintf("Building the replacement image filesystem for %s on %s", node, builder)
|
||||||
|
case seconds < 540:
|
||||||
|
return ramp(seconds, 360, 540, 58, 70), fmt.Sprintf("Compressing the replacement image for %s before upload", node)
|
||||||
|
default:
|
||||||
|
return math.Min(76, ramp(seconds, 540, 900, 70, 76)), fmt.Sprintf("Publishing %s to Harbor and refreshing the latest tag", node)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func flashStageHeartbeat(host, artifact string, elapsed time.Duration) (float64, string) {
|
||||||
|
seconds := elapsed.Seconds()
|
||||||
|
switch {
|
||||||
|
case seconds < 10:
|
||||||
|
return ramp(seconds, 0, 10, 84, 88), fmt.Sprintf("Pulling %s from Harbor on %s", artifact, host)
|
||||||
|
case seconds < 45:
|
||||||
|
return ramp(seconds, 10, 45, 88, 96), fmt.Sprintf("Writing the latest image to the selected target on %s", host)
|
||||||
|
default:
|
||||||
|
return math.Min(98, ramp(seconds, 45, 120, 96, 98)), fmt.Sprintf("Flushing buffers and finishing the write on %s", host)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func prettyDeviceTarget(path string) string {
|
||||||
|
switch {
|
||||||
|
case strings.HasPrefix(path, "hosttmp://"):
|
||||||
|
return "/tmp"
|
||||||
|
case strings.TrimSpace(path) == "":
|
||||||
|
return "the selected target"
|
||||||
|
default:
|
||||||
|
return path
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func ramp(value, start, end, min, max float64) float64 {
|
||||||
|
if end <= start {
|
||||||
|
return max
|
||||||
|
}
|
||||||
|
if value <= start {
|
||||||
|
return min
|
||||||
|
}
|
||||||
|
if value >= end {
|
||||||
|
return max
|
||||||
|
}
|
||||||
|
return min + ((value-start)/(end-start))*(max-min)
|
||||||
|
}
|
||||||
|
|
||||||
func (a *App) ensureDevice(host, path string) (*Device, error) {
|
func (a *App) ensureDevice(host, path string) (*Device, error) {
|
||||||
if strings.TrimSpace(path) == "" {
|
if strings.TrimSpace(path) == "" {
|
||||||
return nil, fmt.Errorf("select removable media before starting a flash run")
|
return nil, fmt.Errorf("select removable media before starting a flash run")
|
||||||
|
|||||||
45
pkg/service/remote_progress_test.go
Normal file
45
pkg/service/remote_progress_test.go
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
package service
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBuildStageHeartbeatProgresses(t *testing.T) {
|
||||||
|
p1, m1 := buildStageHeartbeat("titan-13", "titan-04", 10*time.Second)
|
||||||
|
p2, m2 := buildStageHeartbeat("titan-13", "titan-04", 3*time.Minute)
|
||||||
|
p3, m3 := buildStageHeartbeat("titan-13", "titan-04", 11*time.Minute)
|
||||||
|
|
||||||
|
if !(p1 > 8 && p1 < p2 && p2 < p3 && p3 <= 76) {
|
||||||
|
t.Fatalf("unexpected build progress values: %v %v %v", p1, p2, p3)
|
||||||
|
}
|
||||||
|
if !strings.Contains(m1, "Scheduling") {
|
||||||
|
t.Fatalf("expected scheduling message, got %q", m1)
|
||||||
|
}
|
||||||
|
if !strings.Contains(m2, "replacement image filesystem") {
|
||||||
|
t.Fatalf("expected filesystem message, got %q", m2)
|
||||||
|
}
|
||||||
|
if !strings.Contains(m3, "Harbor") {
|
||||||
|
t.Fatalf("expected Harbor message, got %q", m3)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFlashStageHeartbeatProgresses(t *testing.T) {
|
||||||
|
p1, m1 := flashStageHeartbeat("titan-22", "registry.bstein.dev/metis/titan-13:latest", 5*time.Second)
|
||||||
|
p2, m2 := flashStageHeartbeat("titan-22", "registry.bstein.dev/metis/titan-13:latest", 20*time.Second)
|
||||||
|
p3, m3 := flashStageHeartbeat("titan-22", "registry.bstein.dev/metis/titan-13:latest", 90*time.Second)
|
||||||
|
|
||||||
|
if !(p1 > 84 && p1 < p2 && p2 < p3 && p3 <= 98) {
|
||||||
|
t.Fatalf("unexpected flash progress values: %v %v %v", p1, p2, p3)
|
||||||
|
}
|
||||||
|
if !strings.Contains(m1, "Pulling") {
|
||||||
|
t.Fatalf("expected pulling message, got %q", m1)
|
||||||
|
}
|
||||||
|
if !strings.Contains(m2, "Writing") {
|
||||||
|
t.Fatalf("expected writing message, got %q", m2)
|
||||||
|
}
|
||||||
|
if !strings.Contains(m3, "Flushing") {
|
||||||
|
t.Fatalf("expected flushing message, got %q", m3)
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -686,9 +686,13 @@ var metisPage = template.Must(template.New("metis").Parse(`<!doctype html>
|
|||||||
const started = fmtTime(job.started_at) + (job.device ? ' · ' + job.device : '') + (job.host ? ' · ' + job.host : '');
|
const started = fmtTime(job.started_at) + (job.device ? ' · ' + job.device : '') + (job.host ? ' · ' + job.host : '');
|
||||||
const timingBits = [];
|
const timingBits = [];
|
||||||
if(job.stage){ timingBits.push('stage: ' + job.stage); }
|
if(job.stage){ timingBits.push('stage: ' + job.stage); }
|
||||||
const duration = fmtDuration(job.started_at, job.finished_at);
|
const stageDuration = fmtDuration(job.stage_started_at || job.started_at, job.finished_at);
|
||||||
if(duration){
|
if(stageDuration){
|
||||||
timingBits.push((job.status === 'running' ? 'elapsed ' : 'duration ') + duration);
|
timingBits.push((job.status === 'running' ? 'stage elapsed ' : 'stage duration ') + stageDuration);
|
||||||
|
}
|
||||||
|
const totalDuration = fmtDuration(job.started_at, job.finished_at);
|
||||||
|
if(totalDuration && job.stage_started_at){
|
||||||
|
timingBits.push((job.status === 'running' ? 'total elapsed ' : 'total duration ') + totalDuration);
|
||||||
}
|
}
|
||||||
if(job.updated_at && job.status === 'running'){
|
if(job.updated_at && job.status === 'running'){
|
||||||
timingBits.push('last update ' + fmtDuration(job.updated_at, new Date().toISOString()) + ' ago');
|
timingBits.push('last update ' + fmtDuration(job.updated_at, new Date().toISOString()) + ' ago');
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user