test: make adaptive sync probe confirm calibration
This commit is contained in:
parent
8b8fbec63f
commit
3b6c049a73
19
AGENTS.md
19
AGENTS.md
@ -429,3 +429,22 @@ judge whether the correction helped.
|
||||
- [x] Update manual probe contract tests for provisional calibration controls and output.
|
||||
- [x] Run shell syntax checks, focused contract tests, and package checks.
|
||||
- [x] Push clean semver `0.17.20` for installed client/server testing.
|
||||
|
||||
## 0.17.21 Calibrate-Then-Confirm Probe Checklist
|
||||
|
||||
Context: 0.17.20 made adaptive runs capable of provisional calibration between measured
|
||||
segments, but that still did not strictly guarantee the user-requested flow: run the probe,
|
||||
calibrate the server while it is running, then run a post-calibration test segment. It also
|
||||
still ignored analyzer-failure captures that contained a bounded raw activity delta. 0.17.21
|
||||
makes the probe behavior explicit: calibration segments mutate active server calibration,
|
||||
confirmation segments do not mutate it, and adaptive runs fail unless confirmation passes.
|
||||
|
||||
- [x] Treat `LESAVKA_SYNC_CALIBRATION_SEGMENTS` as calibration windows in adaptive confirm mode.
|
||||
- [x] Add post-calibration confirmation windows via `LESAVKA_SYNC_CONFIRMATION_SEGMENTS`.
|
||||
- [x] Disable calibration apply during confirmation windows so they are a clean test.
|
||||
- [x] Require confirmation pass by default in adaptive confirm mode.
|
||||
- [x] Add bounded raw-activity provisional calibration for analyzer failures that still report a raw A/V delta.
|
||||
- [x] Include confirmation summaries and segment phase in adaptive artifacts.
|
||||
- [x] Update manual probe contract tests for calibrate-then-confirm behavior.
|
||||
- [x] Run shell syntax checks, focused contract tests, and package checks.
|
||||
- [x] Push clean semver `0.17.21` for installed client/server testing.
|
||||
|
||||
6
Cargo.lock
generated
6
Cargo.lock
generated
@ -1652,7 +1652,7 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
|
||||
|
||||
[[package]]
|
||||
name = "lesavka_client"
|
||||
version = "0.17.20"
|
||||
version = "0.17.21"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-stream",
|
||||
@ -1686,7 +1686,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lesavka_common"
|
||||
version = "0.17.20"
|
||||
version = "0.17.21"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"base64",
|
||||
@ -1698,7 +1698,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lesavka_server"
|
||||
version = "0.17.20"
|
||||
version = "0.17.21"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"base64",
|
||||
|
||||
@ -4,7 +4,7 @@ path = "src/main.rs"
|
||||
|
||||
[package]
|
||||
name = "lesavka_client"
|
||||
version = "0.17.20"
|
||||
version = "0.17.21"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lesavka_common"
|
||||
version = "0.17.20"
|
||||
version = "0.17.21"
|
||||
edition = "2024"
|
||||
build = "build.rs"
|
||||
|
||||
|
||||
@ -38,6 +38,11 @@ LESAVKA_SYNC_PROVISIONAL_MAX_P95_MS=${LESAVKA_SYNC_PROVISIONAL_MAX_P95_MS:-350}
|
||||
LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS=${LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS:-250}
|
||||
LESAVKA_SYNC_PROVISIONAL_GAIN=${LESAVKA_SYNC_PROVISIONAL_GAIN:-0.5}
|
||||
LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US=${LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US:-150000}
|
||||
LESAVKA_SYNC_RAW_FAILURE_CALIBRATION=${LESAVKA_SYNC_RAW_FAILURE_CALIBRATION:-${LESAVKA_SYNC_PROVISIONAL_CALIBRATION}}
|
||||
LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS=${LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS:-350}
|
||||
LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION=${LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION:-${LESAVKA_SYNC_ADAPTIVE_CALIBRATION}}
|
||||
LESAVKA_SYNC_CONFIRMATION_SEGMENTS=${LESAVKA_SYNC_CONFIRMATION_SEGMENTS:-1}
|
||||
LESAVKA_SYNC_REQUIRE_CONFIRMATION_PASS=${LESAVKA_SYNC_REQUIRE_CONFIRMATION_PASS:-${LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION}}
|
||||
STIMULUS_PORT=${STIMULUS_PORT:-18444}
|
||||
STIMULUS_SETTLE_SECONDS=${STIMULUS_SETTLE_SECONDS:-10}
|
||||
LOCAL_OUTPUT_DIR=${LOCAL_OUTPUT_DIR:-"${REPO_ROOT}/tmp"}
|
||||
@ -66,6 +71,14 @@ if ! [[ "${LESAVKA_SYNC_CALIBRATION_SEGMENTS}" =~ ^[1-9][0-9]*$ ]]; then
|
||||
echo "LESAVKA_SYNC_CALIBRATION_SEGMENTS must be a positive integer" >&2
|
||||
exit 2
|
||||
fi
|
||||
if [[ "${LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION}" != "1" ]]; then
|
||||
LESAVKA_SYNC_CONFIRMATION_SEGMENTS=0
|
||||
fi
|
||||
if ! [[ "${LESAVKA_SYNC_CONFIRMATION_SEGMENTS}" =~ ^[0-9]+$ ]]; then
|
||||
echo "LESAVKA_SYNC_CONFIRMATION_SEGMENTS must be a non-negative integer" >&2
|
||||
exit 2
|
||||
fi
|
||||
LESAVKA_SYNC_TOTAL_SEGMENTS=$((LESAVKA_SYNC_CALIBRATION_SEGMENTS + LESAVKA_SYNC_CONFIRMATION_SEGMENTS))
|
||||
|
||||
cleanup() {
|
||||
set +e
|
||||
@ -280,20 +293,34 @@ latest_report_json() {
|
||||
| cut -d' ' -f2-
|
||||
}
|
||||
|
||||
latest_analysis_failure_json() {
|
||||
local report_root="${1:-${ARTIFACT_DIR}}"
|
||||
find "${report_root}" -mindepth 2 -maxdepth 2 -type f -name analysis-failure.json -printf '%T@ %p\n' 2>/dev/null \
|
||||
| sort -n \
|
||||
| tail -n 1 \
|
||||
| cut -d' ' -f2-
|
||||
}
|
||||
|
||||
maybe_apply_probe_calibration() {
|
||||
local report_root="${1:-${ARTIFACT_DIR}}"
|
||||
local label="${2:-mirrored run}"
|
||||
local allow_apply="${3:-1}"
|
||||
local report_json
|
||||
report_json="$(latest_report_json "${report_root}")"
|
||||
echo "==> probe calibration decision (${label})"
|
||||
local analysis_failure_json=""
|
||||
if [[ -z "${report_json}" || ! -f "${report_json}" ]]; then
|
||||
analysis_failure_json="$(latest_analysis_failure_json "${report_root}")"
|
||||
fi
|
||||
echo "==> probe calibration decision (${label})"
|
||||
if [[ -z "${report_json}" || ! -f "${report_json}" ]] && [[ -z "${analysis_failure_json}" || ! -f "${analysis_failure_json}" ]]; then
|
||||
echo " ↪ report_json=missing"
|
||||
echo " ↪ calibration apply skipped: analyzer report was not produced"
|
||||
echo " ↪ analysis_failure_json=missing"
|
||||
echo " ↪ calibration apply skipped: analyzer evidence was not produced"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local summary
|
||||
if ! summary="$(python3 - "${report_json}" "${LESAVKA_SYNC_CALIBRATION_TARGET}" <<'PY'
|
||||
if ! summary="$(python3 - "${report_json:-}" "${analysis_failure_json:-}" "${LESAVKA_SYNC_CALIBRATION_TARGET}" <<'PY'
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
@ -301,9 +328,17 @@ import shlex
|
||||
import sys
|
||||
|
||||
report_path = sys.argv[1]
|
||||
target = sys.argv[2].strip().lower()
|
||||
with open(report_path, "r", encoding="utf-8") as handle:
|
||||
failure_path = sys.argv[2]
|
||||
target = sys.argv[3].strip().lower()
|
||||
|
||||
report = {}
|
||||
failure = {}
|
||||
if report_path:
|
||||
with open(report_path, "r", encoding="utf-8") as handle:
|
||||
report = json.load(handle)
|
||||
elif failure_path:
|
||||
with open(failure_path, "r", encoding="utf-8") as handle:
|
||||
failure = json.load(handle)
|
||||
|
||||
cal = report.get("calibration", {})
|
||||
verdict = report.get("verdict", {})
|
||||
@ -351,11 +386,28 @@ provisional_max_p95_ms = env_float("LESAVKA_SYNC_PROVISIONAL_MAX_P95_MS", 350.0)
|
||||
provisional_max_drift_ms = env_float("LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS", 250.0)
|
||||
provisional_gain = env_float("LESAVKA_SYNC_PROVISIONAL_GAIN", 0.5)
|
||||
provisional_max_step_us = env_int("LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US", 150000)
|
||||
raw_failure_enabled = env_bool("LESAVKA_SYNC_RAW_FAILURE_CALIBRATION", False)
|
||||
raw_failure_max_abs_delta_ms = env_float("LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS", 350.0)
|
||||
|
||||
ready_audio_recommendation = int(cal.get("recommended_audio_offset_adjust_us") or 0)
|
||||
ready_video_recommendation = int(cal.get("recommended_video_offset_adjust_us") or 0)
|
||||
provisional_audio_recommendation = int(round(-median_skew_ms * 1000.0))
|
||||
provisional_video_recommendation = int(round(median_skew_ms * 1000.0))
|
||||
decision_source = "report"
|
||||
raw_activity_delta_ms = None
|
||||
failure_reason = ""
|
||||
if not report:
|
||||
decision_source = "analysis_failure"
|
||||
failure_reason = str(failure.get("reason", "analyzer failed"))
|
||||
paired_pulses = int(failure.get("paired_pulses", 0) or 0)
|
||||
raw_value = failure.get("raw_activity_delta_ms")
|
||||
if raw_value is not None:
|
||||
raw_activity_delta_ms = as_float(raw_value)
|
||||
median_skew_ms = raw_activity_delta_ms
|
||||
p95_abs_skew_ms = abs(raw_activity_delta_ms)
|
||||
drift_ms = 0.0
|
||||
provisional_audio_recommendation = int(round(-median_skew_ms * 1000.0))
|
||||
provisional_video_recommendation = int(round(median_skew_ms * 1000.0))
|
||||
|
||||
audio_recommendation = ready_audio_recommendation
|
||||
video_recommendation = ready_video_recommendation
|
||||
@ -364,7 +416,34 @@ video_delta = video_recommendation if target == "video" else 0
|
||||
decision_mode = "ready" if ready else "refused"
|
||||
decision_note = "analyzer marked this report calibration-ready" if ready else "analyzer did not mark this report calibration-ready"
|
||||
|
||||
if not ready and provisional_enabled:
|
||||
if not report:
|
||||
if not raw_failure_enabled:
|
||||
decision_note = "raw analyzer-failure calibration disabled"
|
||||
elif raw_activity_delta_ms is None:
|
||||
decision_note = "raw analyzer-failure calibration refused: no raw activity delta was reported"
|
||||
elif abs(raw_activity_delta_ms) > raw_failure_max_abs_delta_ms:
|
||||
decision_note = (
|
||||
"raw analyzer-failure calibration refused: "
|
||||
f"abs(raw_activity_delta_ms) {abs(raw_activity_delta_ms):.1f} > {raw_failure_max_abs_delta_ms:.1f}"
|
||||
)
|
||||
else:
|
||||
audio_recommendation = provisional_audio_recommendation
|
||||
video_recommendation = provisional_video_recommendation
|
||||
if target == "audio":
|
||||
audio_delta = clamp(audio_recommendation * provisional_gain, provisional_max_step_us)
|
||||
video_delta = 0
|
||||
else:
|
||||
audio_delta = 0
|
||||
video_delta = clamp(video_recommendation * provisional_gain, provisional_max_step_us)
|
||||
if audio_delta == 0 and video_delta == 0:
|
||||
decision_note = "raw analyzer-failure calibration skipped: rounded correction was zero"
|
||||
else:
|
||||
decision_mode = "raw_provisional"
|
||||
decision_note = (
|
||||
"bounded provisional correction from analyzer-failure raw activity; "
|
||||
"not safe to save until a confirming coded report"
|
||||
)
|
||||
elif not ready and provisional_enabled:
|
||||
refusal_reasons = []
|
||||
if paired_pulses < provisional_min_pairs:
|
||||
refusal_reasons.append(f"paired_pulses {paired_pulses} < {provisional_min_pairs}")
|
||||
@ -395,9 +474,11 @@ if not ready and provisional_enabled:
|
||||
|
||||
fields = {
|
||||
"report_json": report_path,
|
||||
"analysis_failure_json": failure_path,
|
||||
"calibration_ready": str(ready).lower(),
|
||||
"calibration_target": target,
|
||||
"calibration_decision_mode": decision_mode,
|
||||
"calibration_decision_source": decision_source,
|
||||
"calibration_decision_note": decision_note,
|
||||
"calibration_audio_recommendation_us": audio_recommendation,
|
||||
"calibration_video_recommendation_us": video_recommendation,
|
||||
@ -414,7 +495,11 @@ fields = {
|
||||
"provisional_max_drift_ms": f"{provisional_max_drift_ms:.1f}",
|
||||
"provisional_gain": f"{provisional_gain:.3f}",
|
||||
"provisional_max_step_us": provisional_max_step_us,
|
||||
"verdict_status": verdict.get("status", ""),
|
||||
"raw_failure_calibration_enabled": str(raw_failure_enabled).lower(),
|
||||
"raw_failure_max_abs_delta_ms": f"{raw_failure_max_abs_delta_ms:.1f}",
|
||||
"raw_activity_delta_ms": "" if raw_activity_delta_ms is None else f"{raw_activity_delta_ms:+.1f}",
|
||||
"analysis_failure_reason": failure_reason,
|
||||
"verdict_status": verdict.get("status", failure.get("status", "")),
|
||||
"paired_pulses": paired_pulses,
|
||||
"median_skew_ms": f"{median_skew_ms:+.1f}",
|
||||
"p95_abs_skew_ms": f"{p95_abs_skew_ms:.1f}",
|
||||
@ -431,6 +516,7 @@ PY
|
||||
eval "${summary}"
|
||||
printf '%s\n' "${summary}" >"${report_root}/calibration-decision.env"
|
||||
echo " ↪ report_json=${report_json}"
|
||||
echo " ↪ analysis_failure_json=${analysis_failure_json}"
|
||||
echo " ↪ verdict_status=${verdict_status}"
|
||||
echo " ↪ paired_pulses=${paired_pulses}"
|
||||
echo " ↪ median_skew_ms=${median_skew_ms}"
|
||||
@ -439,6 +525,7 @@ PY
|
||||
echo " ↪ calibration_ready=${calibration_ready}"
|
||||
echo " ↪ calibration_target=${calibration_target}"
|
||||
echo " ↪ calibration_decision_mode=${calibration_decision_mode}"
|
||||
echo " ↪ calibration_decision_source=${calibration_decision_source}"
|
||||
echo " ↪ recommended_audio_offset_adjust_us=${calibration_audio_recommendation_us}"
|
||||
echo " ↪ recommended_video_offset_adjust_us=${calibration_video_recommendation_us}"
|
||||
echo " ↪ ready_audio_offset_adjust_us=${calibration_ready_audio_recommendation_us}"
|
||||
@ -451,9 +538,17 @@ PY
|
||||
echo " ↪ provisional_max_drift_ms=${provisional_max_drift_ms}"
|
||||
echo " ↪ provisional_gain=${provisional_gain}"
|
||||
echo " ↪ provisional_max_step_us=${provisional_max_step_us}"
|
||||
echo " ↪ raw_failure_calibration_enabled=${raw_failure_calibration_enabled}"
|
||||
echo " ↪ raw_failure_max_abs_delta_ms=${raw_failure_max_abs_delta_ms}"
|
||||
[[ -n "${raw_activity_delta_ms}" ]] && echo " ↪ raw_activity_delta_ms=${raw_activity_delta_ms}"
|
||||
[[ -n "${analysis_failure_reason}" ]] && echo " ↪ analysis_failure_reason=${analysis_failure_reason}"
|
||||
echo " ↪ calibration_note=${calibration_note}"
|
||||
echo " ↪ calibration_decision_note=${calibration_decision_note}"
|
||||
|
||||
if [[ "${allow_apply}" != "1" ]]; then
|
||||
echo " ↪ confirmation segment: calibration apply disabled so this segment tests the active calibration"
|
||||
return 0
|
||||
fi
|
||||
if [[ "${LESAVKA_SYNC_APPLY_CALIBRATION}" != "1" ]]; then
|
||||
echo " ↪ calibration apply disabled; set LESAVKA_SYNC_APPLY_CALIBRATION=1 to apply ready or provisional recommendations"
|
||||
return 0
|
||||
@ -583,15 +678,26 @@ run_browser_capture_with_real_driver() {
|
||||
run_mirrored_segments() {
|
||||
local run_status=0
|
||||
local segment
|
||||
for segment in $(seq 1 "${LESAVKA_SYNC_CALIBRATION_SEGMENTS}"); do
|
||||
local segment_label="segment ${segment}/${LESAVKA_SYNC_CALIBRATION_SEGMENTS}"
|
||||
for segment in $(seq 1 "${LESAVKA_SYNC_TOTAL_SEGMENTS}"); do
|
||||
local phase="calibration"
|
||||
local phase_index="${segment}"
|
||||
local phase_count="${LESAVKA_SYNC_CALIBRATION_SEGMENTS}"
|
||||
local allow_calibration_apply=1
|
||||
if (( segment > LESAVKA_SYNC_CALIBRATION_SEGMENTS )); then
|
||||
phase="confirmation"
|
||||
phase_index=$((segment - LESAVKA_SYNC_CALIBRATION_SEGMENTS))
|
||||
phase_count="${LESAVKA_SYNC_CONFIRMATION_SEGMENTS}"
|
||||
allow_calibration_apply=0
|
||||
fi
|
||||
local segment_label="${phase} segment ${phase_index}/${phase_count} (overall ${segment}/${LESAVKA_SYNC_TOTAL_SEGMENTS})"
|
||||
local segment_dir="${ARTIFACT_DIR}/segment-${segment}"
|
||||
mkdir -p "${segment_dir}"
|
||||
echo "==> mirrored calibration ${segment_label}"
|
||||
printf 'segment_phase=%s\n' "${phase}" >"${segment_dir}/segment-phase.env"
|
||||
echo "==> mirrored ${segment_label}"
|
||||
print_upstream_calibration_state "before ${segment_label}" "${segment_dir}/calibration-before.env"
|
||||
print_upstream_sync_state "before ${segment_label}" "${segment_dir}/planner-before.env"
|
||||
if run_browser_capture_with_real_driver "${segment_label}" "${segment_dir}" "${segment}"; then
|
||||
maybe_apply_probe_calibration "${segment_dir}" "${segment_label}"
|
||||
maybe_apply_probe_calibration "${segment_dir}" "${segment_label}" "${allow_calibration_apply}"
|
||||
print_upstream_sync_state "after ${segment_label}" "${segment_dir}/planner-after.env"
|
||||
print_upstream_calibration_state "after ${segment_label}" "${segment_dir}/calibration-after.env"
|
||||
else
|
||||
@ -600,8 +706,8 @@ run_mirrored_segments() {
|
||||
print_upstream_calibration_state "after failed ${segment_label}" "${segment_dir}/calibration-after-failed.env"
|
||||
break
|
||||
fi
|
||||
if (( segment < LESAVKA_SYNC_CALIBRATION_SEGMENTS )); then
|
||||
echo "==> settling ${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS}s before next calibration segment"
|
||||
if (( segment < LESAVKA_SYNC_TOTAL_SEGMENTS )); then
|
||||
echo "==> settling ${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS}s before next segment"
|
||||
sleep "${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS}"
|
||||
fi
|
||||
done
|
||||
@ -610,7 +716,7 @@ run_mirrored_segments() {
|
||||
|
||||
summarize_adaptive_probe_metrics() {
|
||||
echo "==> summarizing segmented probe metrics"
|
||||
python3 - "${ARTIFACT_DIR}" "${LESAVKA_SYNC_CALIBRATION_SEGMENTS}" <<'PY'
|
||||
python3 - "${ARTIFACT_DIR}" "${LESAVKA_SYNC_TOTAL_SEGMENTS}" "${LESAVKA_SYNC_CALIBRATION_SEGMENTS}" <<'PY'
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
@ -619,6 +725,7 @@ from pathlib import Path
|
||||
|
||||
root = Path(sys.argv[1])
|
||||
segment_count = int(sys.argv[2])
|
||||
calibration_segment_count = int(sys.argv[3])
|
||||
|
||||
|
||||
def read_env(path):
|
||||
@ -688,9 +795,14 @@ for segment in range(1, segment_count + 1):
|
||||
calibration_before = read_env(segment_dir / "calibration-before.env")
|
||||
calibration_after = read_env(segment_dir / "calibration-after.env")
|
||||
decision = read_env(segment_dir / "calibration-decision.env")
|
||||
phase = read_env(segment_dir / "segment-phase.env").get(
|
||||
"segment_phase",
|
||||
"calibration" if segment <= calibration_segment_count else "confirmation",
|
||||
)
|
||||
|
||||
row = {
|
||||
"segment": segment,
|
||||
"segment_phase": phase,
|
||||
"report_json": str(report_path) if report_path else "",
|
||||
"analysis_failure_json": str(failure_path) if failure_path else "",
|
||||
"analysis_failure_reason": failure.get("reason", ""),
|
||||
@ -708,6 +820,7 @@ for segment in range(1, segment_count + 1):
|
||||
"calibration_ready": bool(calibration.get("ready", False)),
|
||||
"calibration_note": calibration.get("note", ""),
|
||||
"decision_mode": decision.get("calibration_decision_mode", ""),
|
||||
"decision_source": decision.get("calibration_decision_source", ""),
|
||||
"decision_note": decision.get("calibration_decision_note", ""),
|
||||
"decision_video_delta_us": as_float(decision.get("calibration_apply_video_delta_us")),
|
||||
"decision_audio_delta_us": as_float(decision.get("calibration_apply_audio_delta_us")),
|
||||
@ -742,6 +855,8 @@ with jsonl_path.open("w", encoding="utf-8") as handle:
|
||||
handle.write(json.dumps(row, sort_keys=True) + "\n")
|
||||
|
||||
good_rows = [row for row in rows if row.get("probe_passed")]
|
||||
confirmation_rows = [row for row in rows if row.get("segment_phase") == "confirmation"]
|
||||
passing_confirmation_rows = [row for row in confirmation_rows if row.get("probe_passed")]
|
||||
target_path = root / "blind-targets.json"
|
||||
if good_rows:
|
||||
target = {
|
||||
@ -771,10 +886,70 @@ else:
|
||||
}
|
||||
target_path.write_text(json.dumps(target, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
||||
|
||||
confirmation_path = root / "confirmation-summary.json"
|
||||
if confirmation_rows:
|
||||
best_confirmation = min(
|
||||
[
|
||||
row for row in confirmation_rows
|
||||
if isinstance(row.get("probe_p95_abs_skew_ms"), (int, float))
|
||||
],
|
||||
key=lambda row: row["probe_p95_abs_skew_ms"],
|
||||
default=None,
|
||||
)
|
||||
confirmation = {
|
||||
"required": True,
|
||||
"passed": bool(passing_confirmation_rows),
|
||||
"confirmation_segments": [row["segment"] for row in confirmation_rows],
|
||||
"passing_confirmation_segments": [row["segment"] for row in passing_confirmation_rows],
|
||||
"best_confirmation_segment": best_confirmation["segment"] if best_confirmation else None,
|
||||
"best_confirmation_status": best_confirmation["probe_status"] if best_confirmation else "missing",
|
||||
"best_confirmation_p95_abs_skew_ms": best_confirmation["probe_p95_abs_skew_ms"] if best_confirmation else None,
|
||||
}
|
||||
else:
|
||||
confirmation = {
|
||||
"required": False,
|
||||
"passed": False,
|
||||
"confirmation_segments": [],
|
||||
"passing_confirmation_segments": [],
|
||||
}
|
||||
confirmation_path.write_text(json.dumps(confirmation, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
||||
|
||||
print(f" ↪ segment_metrics_csv={csv_path}")
|
||||
print(f" ↪ segment_metrics_jsonl={jsonl_path}")
|
||||
print(f" ↪ blind_targets_json={target_path}")
|
||||
print(f" ↪ blind_targets_ready={str(bool(target.get('ready'))).lower()}")
|
||||
print(f" ↪ confirmation_summary_json={confirmation_path}")
|
||||
print(f" ↪ confirmation_passed={str(bool(confirmation.get('passed'))).lower()}")
|
||||
PY
|
||||
}
|
||||
|
||||
check_confirmation_result() {
|
||||
if [[ "${LESAVKA_SYNC_REQUIRE_CONFIRMATION_PASS}" != "1" ]]; then
|
||||
return 0
|
||||
fi
|
||||
local confirmation_json="${ARTIFACT_DIR}/confirmation-summary.json"
|
||||
if [[ ! -f "${confirmation_json}" ]]; then
|
||||
echo "==> confirmation check failed"
|
||||
echo " ↪ confirmation_summary_json=missing"
|
||||
return 1
|
||||
fi
|
||||
python3 - "${confirmation_json}" <<'PY'
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
confirmation = json.loads(Path(sys.argv[1]).read_text(encoding="utf-8"))
|
||||
if confirmation.get("passed"):
|
||||
print("==> confirmation check passed")
|
||||
print(f" ↪ passing_confirmation_segments={confirmation.get('passing_confirmation_segments', [])}")
|
||||
sys.exit(0)
|
||||
|
||||
print("==> confirmation check failed")
|
||||
print(f" ↪ confirmation_segments={confirmation.get('confirmation_segments', [])}")
|
||||
print(f" ↪ best_confirmation_segment={confirmation.get('best_confirmation_segment')}")
|
||||
print(f" ↪ best_confirmation_status={confirmation.get('best_confirmation_status')}")
|
||||
print(f" ↪ best_confirmation_p95_abs_skew_ms={confirmation.get('best_confirmation_p95_abs_skew_ms')}")
|
||||
sys.exit(1)
|
||||
PY
|
||||
}
|
||||
|
||||
@ -795,6 +970,9 @@ run_mirrored_segments || run_status=$?
|
||||
print_upstream_sync_state "after mirrored run" "${ARTIFACT_DIR}/planner-after.env"
|
||||
print_upstream_calibration_state "after mirrored run" "${ARTIFACT_DIR}/calibration-after.env"
|
||||
summarize_adaptive_probe_metrics
|
||||
if ! check_confirmation_result; then
|
||||
run_status=1
|
||||
fi
|
||||
|
||||
if ((run_status != 0)); then
|
||||
echo "==> mirrored probe failed"
|
||||
|
||||
@ -10,7 +10,7 @@ bench = false
|
||||
|
||||
[package]
|
||||
name = "lesavka_server"
|
||||
version = "0.17.20"
|
||||
version = "0.17.21"
|
||||
edition = "2024"
|
||||
autobins = false
|
||||
|
||||
|
||||
@ -133,6 +133,13 @@ fn mirrored_sync_script_uses_real_client_capture_path() {
|
||||
"LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS=${LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS:-250}",
|
||||
"LESAVKA_SYNC_PROVISIONAL_GAIN=${LESAVKA_SYNC_PROVISIONAL_GAIN:-0.5}",
|
||||
"LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US=${LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US:-150000}",
|
||||
"LESAVKA_SYNC_RAW_FAILURE_CALIBRATION=${LESAVKA_SYNC_RAW_FAILURE_CALIBRATION:-${LESAVKA_SYNC_PROVISIONAL_CALIBRATION}}",
|
||||
"LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS=${LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS:-350}",
|
||||
"LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION=${LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION:-${LESAVKA_SYNC_ADAPTIVE_CALIBRATION}}",
|
||||
"LESAVKA_SYNC_CONFIRMATION_SEGMENTS=${LESAVKA_SYNC_CONFIRMATION_SEGMENTS:-1}",
|
||||
"LESAVKA_SYNC_REQUIRE_CONFIRMATION_PASS=${LESAVKA_SYNC_REQUIRE_CONFIRMATION_PASS:-${LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION}}",
|
||||
"LESAVKA_SYNC_CONFIRMATION_SEGMENTS must be a non-negative integer",
|
||||
"LESAVKA_SYNC_TOTAL_SEGMENTS=$((LESAVKA_SYNC_CALIBRATION_SEGMENTS + LESAVKA_SYNC_CONFIRMATION_SEGMENTS))",
|
||||
"LESAVKA_SYNC_ADAPTIVE_CALIBRATION",
|
||||
"LESAVKA_SYNC_CALIBRATION_SEGMENTS=4",
|
||||
"browser_consumer_reuse_session=${reuse_browser_session}",
|
||||
@ -142,13 +149,19 @@ fn mirrored_sync_script_uses_real_client_capture_path() {
|
||||
"LESAVKA_SYNC_CALIBRATION_SEGMENTS must be a positive integer",
|
||||
"run_mirrored_segments",
|
||||
"summarize_adaptive_probe_metrics",
|
||||
"for segment in $(seq 1 \"${LESAVKA_SYNC_CALIBRATION_SEGMENTS}\")",
|
||||
"for segment in $(seq 1 \"${LESAVKA_SYNC_TOTAL_SEGMENTS}\")",
|
||||
"segment_phase",
|
||||
"confirmation segment: calibration apply disabled so this segment tests the active calibration",
|
||||
"segment-${segment}",
|
||||
"calibration-before.env",
|
||||
"planner-before.env",
|
||||
"calibration-decision.env",
|
||||
"segment-metrics.csv",
|
||||
"segment-metrics.jsonl",
|
||||
"confirmation-summary.json",
|
||||
"confirmation_passed",
|
||||
"check_confirmation_result",
|
||||
"confirmation check failed",
|
||||
"analysis_failure_reason",
|
||||
"probe_activity_start_delta_ms",
|
||||
"blind-targets.json",
|
||||
@ -157,12 +170,14 @@ fn mirrored_sync_script_uses_real_client_capture_path() {
|
||||
"decision_provisional_video_recommendation_us",
|
||||
"planner_live_lag_ms_after",
|
||||
"probe_p95_abs_skew_ms",
|
||||
"settling ${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS}s before next calibration segment",
|
||||
"settling ${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS}s before next segment",
|
||||
"print_upstream_calibration_state \"before mirrored run\"",
|
||||
"maybe_apply_probe_calibration",
|
||||
"calibration_ready=${calibration_ready}",
|
||||
"calibration_decision_mode=${calibration_decision_mode}",
|
||||
"bounded provisional correction from median skew",
|
||||
"bounded provisional correction from analyzer-failure raw activity",
|
||||
"raw_failure_calibration_enabled",
|
||||
"provisional calibration not saved",
|
||||
"calibration apply refused: ${calibration_decision_note}",
|
||||
"calibrate \"${calibration_apply_audio_delta_us}\" \"${calibration_apply_video_delta_us}\"",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user