test: tighten adaptive probe evidence
This commit is contained in:
parent
c82c61c652
commit
c874ddbf99
16
AGENTS.md
16
AGENTS.md
@ -479,3 +479,19 @@ low paired-pulse counts as product failure.
|
|||||||
- [x] Update manual probe contract coverage for the audio-gain control.
|
- [x] Update manual probe contract coverage for the audio-gain control.
|
||||||
- [x] Run focused analyzer/manual-probe tests and package checks.
|
- [x] Run focused analyzer/manual-probe tests and package checks.
|
||||||
- [x] Push clean semver `0.17.23` for installed client/server testing.
|
- [x] Push clean semver `0.17.23` for installed client/server testing.
|
||||||
|
|
||||||
|
## 0.17.24 Probe Truthfulness And Localization Checklist
|
||||||
|
|
||||||
|
Context: the 0.17.23 run proved adaptive calibration is now live-editing the server,
|
||||||
|
but confirmation still failed. Segment 3 passed and triggered a provisional calibration
|
||||||
|
nudge, while the confirmation segment failed with a near-centered median but high p95/drift.
|
||||||
|
This means the fastest high-quality path is localization tooling, not another static offset
|
||||||
|
guess.
|
||||||
|
|
||||||
|
- [x] Treat the latest failure as timing instability/outlier drift until the probe proves otherwise.
|
||||||
|
- [x] Fix analyzer-failure raw activity delta parsing so bounded raw-delta calibration can use the evidence it prints.
|
||||||
|
- [x] Stop marking `blind-targets.json` ready from calibration-only passes when confirmation segments exist and fail.
|
||||||
|
- [x] Emit combined `segment-events.csv` and `segment-events.jsonl` artifacts so each run exposes per-pulse skew and confidence across segments.
|
||||||
|
- [ ] Use the next run to decide whether bad p95 is caused by low-confidence analyzer pairings, camera/mic capture instability, or server planner/output jitter.
|
||||||
|
- [ ] Add stage-local timing evidence for stimulus schedule, client capture onsets, server output timing, and browser/device capture if the event table still cannot isolate the source.
|
||||||
|
- [ ] Only save calibration defaults after a confirmation segment passes.
|
||||||
|
|||||||
6
Cargo.lock
generated
6
Cargo.lock
generated
@ -1652,7 +1652,7 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lesavka_client"
|
name = "lesavka_client"
|
||||||
version = "0.17.23"
|
version = "0.17.24"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-stream",
|
"async-stream",
|
||||||
@ -1686,7 +1686,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lesavka_common"
|
name = "lesavka_common"
|
||||||
version = "0.17.23"
|
version = "0.17.24"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"base64",
|
"base64",
|
||||||
@ -1698,7 +1698,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lesavka_server"
|
name = "lesavka_server"
|
||||||
version = "0.17.23"
|
version = "0.17.24"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"base64",
|
"base64",
|
||||||
|
|||||||
@ -4,7 +4,7 @@ path = "src/main.rs"
|
|||||||
|
|
||||||
[package]
|
[package]
|
||||||
name = "lesavka_client"
|
name = "lesavka_client"
|
||||||
version = "0.17.23"
|
version = "0.17.24"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lesavka_common"
|
name = "lesavka_common"
|
||||||
version = "0.17.23"
|
version = "0.17.24"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
build = "build.rs"
|
build = "build.rs"
|
||||||
|
|
||||||
|
|||||||
@ -260,8 +260,8 @@ if not reason:
|
|||||||
reason = lines[-1].strip() if lines else "analyzer failed"
|
reason = lines[-1].strip() if lines else "analyzer failed"
|
||||||
|
|
||||||
raw_match = re.search(
|
raw_match = re.search(
|
||||||
r"raw activity delta was ([+-]?[0-9]+(?:\\.[0-9]+)?) ms "
|
r"raw activity delta was ([+-]?[0-9]+(?:\.[0-9]+)?) ms "
|
||||||
r"\\(video=([0-9]+(?:\\.[0-9]+)?)s audio=([0-9]+(?:\\.[0-9]+)?)s\\)",
|
r"\(video=([0-9]+(?:\.[0-9]+)?)s audio=([0-9]+(?:\.[0-9]+)?)s\)",
|
||||||
text,
|
text,
|
||||||
)
|
)
|
||||||
paired_match = re.search(r"saw ([0-9]+)", reason)
|
paired_match = re.search(r"saw ([0-9]+)", reason)
|
||||||
|
|||||||
@ -789,6 +789,7 @@ def range_for(rows, key):
|
|||||||
|
|
||||||
|
|
||||||
rows = []
|
rows = []
|
||||||
|
event_rows = []
|
||||||
for segment in range(1, segment_count + 1):
|
for segment in range(1, segment_count + 1):
|
||||||
segment_dir = root / f"segment-{segment}"
|
segment_dir = root / f"segment-{segment}"
|
||||||
report_path = latest_report(segment_dir)
|
report_path = latest_report(segment_dir)
|
||||||
@ -857,6 +858,21 @@ for segment in range(1, segment_count + 1):
|
|||||||
}
|
}
|
||||||
rows.append(row)
|
rows.append(row)
|
||||||
|
|
||||||
|
for event in report.get("paired_events", []):
|
||||||
|
if not isinstance(event, dict):
|
||||||
|
continue
|
||||||
|
event_rows.append({
|
||||||
|
"segment": segment,
|
||||||
|
"segment_phase": phase,
|
||||||
|
"probe_status": row["probe_status"],
|
||||||
|
"probe_passed": row["probe_passed"],
|
||||||
|
"event_id": event.get("event_id"),
|
||||||
|
"video_time_s": as_float(str(event.get("video_time_s", ""))),
|
||||||
|
"audio_time_s": as_float(str(event.get("audio_time_s", ""))),
|
||||||
|
"skew_ms": as_float(str(event.get("skew_ms", ""))),
|
||||||
|
"confidence": as_float(str(event.get("confidence", ""))),
|
||||||
|
})
|
||||||
|
|
||||||
csv_path = root / "segment-metrics.csv"
|
csv_path = root / "segment-metrics.csv"
|
||||||
jsonl_path = root / "segment-metrics.jsonl"
|
jsonl_path = root / "segment-metrics.jsonl"
|
||||||
fieldnames = list(rows[0].keys()) if rows else ["segment"]
|
fieldnames = list(rows[0].keys()) if rows else ["segment"]
|
||||||
@ -868,21 +884,50 @@ with jsonl_path.open("w", encoding="utf-8") as handle:
|
|||||||
for row in rows:
|
for row in rows:
|
||||||
handle.write(json.dumps(row, sort_keys=True) + "\n")
|
handle.write(json.dumps(row, sort_keys=True) + "\n")
|
||||||
|
|
||||||
|
events_csv_path = root / "segment-events.csv"
|
||||||
|
events_jsonl_path = root / "segment-events.jsonl"
|
||||||
|
event_fieldnames = list(event_rows[0].keys()) if event_rows else [
|
||||||
|
"segment",
|
||||||
|
"segment_phase",
|
||||||
|
"probe_status",
|
||||||
|
"probe_passed",
|
||||||
|
"event_id",
|
||||||
|
"video_time_s",
|
||||||
|
"audio_time_s",
|
||||||
|
"skew_ms",
|
||||||
|
"confidence",
|
||||||
|
]
|
||||||
|
with events_csv_path.open("w", newline="", encoding="utf-8") as handle:
|
||||||
|
writer = csv.DictWriter(handle, fieldnames=event_fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
writer.writerows(event_rows)
|
||||||
|
with events_jsonl_path.open("w", encoding="utf-8") as handle:
|
||||||
|
for row in event_rows:
|
||||||
|
handle.write(json.dumps(row, sort_keys=True) + "\n")
|
||||||
|
|
||||||
good_rows = [row for row in rows if row.get("probe_passed")]
|
good_rows = [row for row in rows if row.get("probe_passed")]
|
||||||
confirmation_rows = [row for row in rows if row.get("segment_phase") == "confirmation"]
|
confirmation_rows = [row for row in rows if row.get("segment_phase") == "confirmation"]
|
||||||
passing_confirmation_rows = [row for row in confirmation_rows if row.get("probe_passed")]
|
passing_confirmation_rows = [row for row in confirmation_rows if row.get("probe_passed")]
|
||||||
target_path = root / "blind-targets.json"
|
target_path = root / "blind-targets.json"
|
||||||
if good_rows:
|
target_source_rows = passing_confirmation_rows if confirmation_rows else good_rows
|
||||||
|
if target_source_rows:
|
||||||
target = {
|
target = {
|
||||||
"ready": True,
|
"ready": True,
|
||||||
"source": "probe-passing segmented mirrored run",
|
"source": (
|
||||||
"good_segments": [row["segment"] for row in good_rows],
|
"passing confirmation segment"
|
||||||
"planner_live_lag_ms_after": range_for(good_rows, "planner_live_lag_ms_after"),
|
if confirmation_rows else
|
||||||
"planner_skew_ms_after": range_for(good_rows, "planner_skew_ms_after"),
|
"probe-passing segmented mirrored run"
|
||||||
"active_audio_offset_us_after": range_for(good_rows, "active_audio_offset_us_after"),
|
),
|
||||||
"active_video_offset_us_after": range_for(good_rows, "active_video_offset_us_after"),
|
"good_segments": [row["segment"] for row in target_source_rows],
|
||||||
"probe_p95_abs_skew_ms": range_for(good_rows, "probe_p95_abs_skew_ms"),
|
"candidate_good_calibration_segments": [
|
||||||
"probe_median_skew_ms": range_for(good_rows, "probe_median_skew_ms"),
|
row["segment"] for row in good_rows if row.get("segment_phase") != "confirmation"
|
||||||
|
],
|
||||||
|
"planner_live_lag_ms_after": range_for(target_source_rows, "planner_live_lag_ms_after"),
|
||||||
|
"planner_skew_ms_after": range_for(target_source_rows, "planner_skew_ms_after"),
|
||||||
|
"active_audio_offset_us_after": range_for(target_source_rows, "active_audio_offset_us_after"),
|
||||||
|
"active_video_offset_us_after": range_for(target_source_rows, "active_video_offset_us_after"),
|
||||||
|
"probe_p95_abs_skew_ms": range_for(target_source_rows, "probe_p95_abs_skew_ms"),
|
||||||
|
"probe_median_skew_ms": range_for(target_source_rows, "probe_median_skew_ms"),
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
sortable = [
|
sortable = [
|
||||||
@ -892,8 +937,15 @@ else:
|
|||||||
best = min(sortable, key=lambda row: row["probe_p95_abs_skew_ms"], default=None)
|
best = min(sortable, key=lambda row: row["probe_p95_abs_skew_ms"], default=None)
|
||||||
target = {
|
target = {
|
||||||
"ready": False,
|
"ready": False,
|
||||||
"reason": "no segment produced a passing probe verdict; refusing to invent blind targets",
|
"reason": (
|
||||||
|
"confirmation did not pass; refusing to promote calibration-only segments to blind targets"
|
||||||
|
if confirmation_rows else
|
||||||
|
"no segment produced a passing probe verdict; refusing to invent blind targets"
|
||||||
|
),
|
||||||
"segments_seen": len(rows),
|
"segments_seen": len(rows),
|
||||||
|
"candidate_good_calibration_segments": [
|
||||||
|
row["segment"] for row in good_rows if row.get("segment_phase") != "confirmation"
|
||||||
|
],
|
||||||
"best_segment": best["segment"] if best else None,
|
"best_segment": best["segment"] if best else None,
|
||||||
"best_probe_status": best["probe_status"] if best else "missing",
|
"best_probe_status": best["probe_status"] if best else "missing",
|
||||||
"best_probe_p95_abs_skew_ms": best["probe_p95_abs_skew_ms"] if best else None,
|
"best_probe_p95_abs_skew_ms": best["probe_p95_abs_skew_ms"] if best else None,
|
||||||
@ -930,6 +982,8 @@ confirmation_path.write_text(json.dumps(confirmation, indent=2, sort_keys=True)
|
|||||||
|
|
||||||
print(f" ↪ segment_metrics_csv={csv_path}")
|
print(f" ↪ segment_metrics_csv={csv_path}")
|
||||||
print(f" ↪ segment_metrics_jsonl={jsonl_path}")
|
print(f" ↪ segment_metrics_jsonl={jsonl_path}")
|
||||||
|
print(f" ↪ segment_events_csv={events_csv_path}")
|
||||||
|
print(f" ↪ segment_events_jsonl={events_jsonl_path}")
|
||||||
print(f" ↪ blind_targets_json={target_path}")
|
print(f" ↪ blind_targets_json={target_path}")
|
||||||
print(f" ↪ blind_targets_ready={str(bool(target.get('ready'))).lower()}")
|
print(f" ↪ blind_targets_ready={str(bool(target.get('ready'))).lower()}")
|
||||||
print(f" ↪ confirmation_summary_json={confirmation_path}")
|
print(f" ↪ confirmation_summary_json={confirmation_path}")
|
||||||
|
|||||||
@ -10,7 +10,7 @@ bench = false
|
|||||||
|
|
||||||
[package]
|
[package]
|
||||||
name = "lesavka_server"
|
name = "lesavka_server"
|
||||||
version = "0.17.23"
|
version = "0.17.24"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
autobins = false
|
autobins = false
|
||||||
|
|
||||||
|
|||||||
@ -88,12 +88,18 @@ fn browser_sync_script_can_delegate_to_a_real_path_driver() {
|
|||||||
"for attempt in 1 2 3 4 5",
|
"for attempt in 1 2 3 4 5",
|
||||||
"capture fetch attempt ${attempt} failed; retrying",
|
"capture fetch attempt ${attempt} failed; retrying",
|
||||||
"failed to fetch browser capture from ${TETHYS_HOST}:${REMOTE_CAPTURE}",
|
"failed to fetch browser capture from ${TETHYS_HOST}:${REMOTE_CAPTURE}",
|
||||||
|
r"raw activity delta was ([+-]?[0-9]+(?:\.[0-9]+)?) ms ",
|
||||||
|
r"\(video=([0-9]+(?:\.[0-9]+)?)s audio=([0-9]+(?:\.[0-9]+)?)s\)",
|
||||||
] {
|
] {
|
||||||
assert!(
|
assert!(
|
||||||
BROWSER_SYNC_SCRIPT.contains(expected),
|
BROWSER_SYNC_SCRIPT.contains(expected),
|
||||||
"browser sync script should contain {expected}"
|
"browser sync script should contain {expected}"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
assert!(
|
||||||
|
!BROWSER_SYNC_SCRIPT.contains(r"(?:\\.[0-9]+)?"),
|
||||||
|
"browser sync raw-delta parser should not require a literal backslash before decimals"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -162,6 +168,8 @@ fn mirrored_sync_script_uses_real_client_capture_path() {
|
|||||||
"calibration-decision.env",
|
"calibration-decision.env",
|
||||||
"segment-metrics.csv",
|
"segment-metrics.csv",
|
||||||
"segment-metrics.jsonl",
|
"segment-metrics.jsonl",
|
||||||
|
"segment-events.csv",
|
||||||
|
"segment-events.jsonl",
|
||||||
"confirmation-summary.json",
|
"confirmation-summary.json",
|
||||||
"confirmation_passed",
|
"confirmation_passed",
|
||||||
"check_confirmation_result",
|
"check_confirmation_result",
|
||||||
@ -170,6 +178,8 @@ fn mirrored_sync_script_uses_real_client_capture_path() {
|
|||||||
"probe_activity_start_delta_ms",
|
"probe_activity_start_delta_ms",
|
||||||
"blind-targets.json",
|
"blind-targets.json",
|
||||||
"no segment produced a passing probe verdict; refusing to invent blind targets",
|
"no segment produced a passing probe verdict; refusing to invent blind targets",
|
||||||
|
"confirmation did not pass; refusing to promote calibration-only segments to blind targets",
|
||||||
|
"candidate_good_calibration_segments",
|
||||||
"decision_mode",
|
"decision_mode",
|
||||||
"decision_provisional_video_recommendation_us",
|
"decision_provisional_video_recommendation_us",
|
||||||
"planner_live_lag_ms_after",
|
"planner_live_lag_ms_after",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user