test(server-rc): retry remote artifact transfer

This commit is contained in:
Brad Stein 2026-05-04 21:38:30 -03:00
parent b49395e927
commit dc4b5f6e8b
6 changed files with 74 additions and 12 deletions

6
Cargo.lock generated
View File

@ -1652,7 +1652,7 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
[[package]]
name = "lesavka_client"
version = "0.19.23"
version = "0.19.24"
dependencies = [
"anyhow",
"async-stream",
@ -1686,7 +1686,7 @@ dependencies = [
[[package]]
name = "lesavka_common"
version = "0.19.23"
version = "0.19.24"
dependencies = [
"anyhow",
"base64",
@ -1698,7 +1698,7 @@ dependencies = [
[[package]]
name = "lesavka_server"
version = "0.19.23"
version = "0.19.24"
dependencies = [
"anyhow",
"base64",

View File

@ -4,7 +4,7 @@ path = "src/main.rs"
[package]
name = "lesavka_client"
version = "0.19.23"
version = "0.19.24"
edition = "2024"
[dependencies]

View File

@ -1,6 +1,6 @@
[package]
name = "lesavka_common"
version = "0.19.23"
version = "0.19.24"
edition = "2024"
build = "build.rs"

View File

@ -58,6 +58,8 @@ REMOTE_ANALYZE=${REMOTE_ANALYZE:-1}
REMOTE_ANALYZE_BIN=${REMOTE_ANALYZE_BIN:-/tmp/lesavka-sync-analyze}
REMOTE_ANALYZE_COPY=${REMOTE_ANALYZE_COPY:-1}
FETCH_CAPTURE=${FETCH_CAPTURE:-1}
REMOTE_ARTIFACT_RETRIES=${REMOTE_ARTIFACT_RETRIES:-3}
REMOTE_ARTIFACT_RETRY_DELAY_SECONDS=${REMOTE_ARTIFACT_RETRY_DELAY_SECONDS:-5}
REMOTE_SERVER_PREFLIGHT=${REMOTE_SERVER_PREFLIGHT:-1}
REMOTE_EXPECT_CAM_OUTPUT=${REMOTE_EXPECT_CAM_OUTPUT:-uvc}
REMOTE_EXPECT_UVC_CODEC=${REMOTE_EXPECT_UVC_CODEC:-mjpeg}
@ -505,6 +507,36 @@ start_server_tunnel() {
wait_for_server_tunnel "${local_port}"
}
retry_remote_artifact_command() {
local description=$1
shift
local attempt status
for attempt in $(seq 1 "${REMOTE_ARTIFACT_RETRIES}"); do
set +e
"$@"
status=$?
set -e
if [[ "${status}" -eq 0 ]]; then
return 0
fi
if [[ "${attempt}" -lt "${REMOTE_ARTIFACT_RETRIES}" ]]; then
printf '%s failed with status %s; retrying %s/%s in %ss\n' \
"${description}" "${status}" "$((attempt + 1))" "${REMOTE_ARTIFACT_RETRIES}" \
"${REMOTE_ARTIFACT_RETRY_DELAY_SECONDS}" >&2
sleep "${REMOTE_ARTIFACT_RETRY_DELAY_SECONDS}"
fi
done
printf '%s failed after %s attempt(s)\n' "${description}" "${REMOTE_ARTIFACT_RETRIES}" >&2
return "${status}"
}
run_remote_sync_analysis_once() {
local output_path=$1
ssh ${SSH_OPTS} "${TETHYS_HOST}" \
"chmod +x '${REMOTE_ANALYZE_BIN}' && '${REMOTE_ANALYZE_BIN}' '${remote_fetch_capture}' --json --event-width-codes '${PROBE_EVENT_WIDTH_CODES}' ${analysis_window_arg}" \
> "${output_path}"
}
resolve_server_addr() {
if [[ "${LESAVKA_SERVER_ADDR}" != "auto" ]]; then
RESOLVED_LESAVKA_SERVER_ADDR="${LESAVKA_SERVER_ADDR}"
@ -3140,7 +3172,9 @@ if [[ -f "${LOCAL_CAPTURE_LOG}" ]] \
capture_streamon_timeout=1
fi
if ssh ${SSH_OPTS} "${TETHYS_HOST}" "test -f '${REMOTE_CAPTURE}'"; then
if retry_remote_artifact_command \
"checking remote capture on ${TETHYS_HOST}" \
ssh ${SSH_OPTS} "${TETHYS_HOST}" "test -f '${REMOTE_CAPTURE}'"; then
remote_fetch_capture="${REMOTE_CAPTURE}"
analysis_status=0
if [[ "${ANALYSIS_NORMALIZE}" != "0" ]]; then
@ -3173,6 +3207,8 @@ REMOTE_NORMALIZE_SCRIPT
if [[ "${REMOTE_ANALYZE}" != "0" ]]; then
if [[ "${REMOTE_ANALYZE_COPY}" != "0" ]]; then
echo "==> copying sync analyzer to ${TETHYS_HOST}:${REMOTE_ANALYZE_BIN}"
retry_remote_artifact_command \
"copying sync analyzer to ${TETHYS_HOST}" \
scp ${SSH_OPTS} "${ANALYZE_BIN}" "${TETHYS_HOST}:${REMOTE_ANALYZE_BIN}"
fi
analysis_window_arg="$(compute_analysis_window_arg)"
@ -3180,17 +3216,33 @@ REMOTE_NORMALIZE_SCRIPT
echo " ↪ analyzer timeline window: ${analysis_window_arg#--analysis-window-s }"
fi
echo "==> analyzing capture on ${TETHYS_HOST}"
analysis_tmp="${LOCAL_ANALYSIS_JSON}.tmp"
set +e
ssh ${SSH_OPTS} "${TETHYS_HOST}" \
"chmod +x '${REMOTE_ANALYZE_BIN}' && '${REMOTE_ANALYZE_BIN}' '${remote_fetch_capture}' --json --event-width-codes '${PROBE_EVENT_WIDTH_CODES}' ${analysis_window_arg}" \
> "${LOCAL_ANALYSIS_JSON}"
retry_remote_artifact_command \
"running remote sync analysis on ${TETHYS_HOST}" \
run_remote_sync_analysis_once "${analysis_tmp}"
analysis_status=$?
set -e
if [[ "${analysis_status}" -eq 0 ]]; then
mv "${analysis_tmp}" "${LOCAL_ANALYSIS_JSON}"
else
rm -f "${analysis_tmp}"
fi
fi
if [[ "${FETCH_CAPTURE}" != "0" ]]; then
echo "==> fetching capture back to ${LOCAL_CAPTURE}"
scp ${SSH_OPTS} "${TETHYS_HOST}:${remote_fetch_capture}" "${LOCAL_CAPTURE}"
fetch_capture_status=0
retry_remote_artifact_command \
"fetching capture from ${TETHYS_HOST}" \
scp ${SSH_OPTS} "${TETHYS_HOST}:${remote_fetch_capture}" "${LOCAL_CAPTURE}" \
|| fetch_capture_status=$?
if [[ "${fetch_capture_status}" -ne 0 ]]; then
echo "warning: failed to fetch capture artifact; continuing with remote analysis JSON when available" >&2
if [[ "${REMOTE_ANALYZE}" == "0" ]]; then
exit "${fetch_capture_status}"
fi
fi
fi
if [[ "${analysis_status}" -ne 0 ]]; then
echo "remote analysis failed with status ${analysis_status}; capture preserved at ${LOCAL_CAPTURE}" >&2

View File

@ -10,7 +10,7 @@ bench = false
[package]
name = "lesavka_server"
version = "0.19.23"
version = "0.19.24"
edition = "2024"
autobins = false

View File

@ -184,6 +184,16 @@ fn upstream_sync_script_tunnels_auto_server_addr_through_ssh() {
"\"${LESAVKA_OUTPUT_DELAY_PROBE_VIDEO_DELAY_US}\"",
"REMOTE_PULSE_CAPTURE_TOOL=${REMOTE_PULSE_CAPTURE_TOOL:-gst}",
"REMOTE_CAPTURE_ALLOW_ALSA_FALLBACK=${REMOTE_CAPTURE_ALLOW_ALSA_FALLBACK:-0}",
"REMOTE_ARTIFACT_RETRIES=${REMOTE_ARTIFACT_RETRIES:-3}",
"REMOTE_ARTIFACT_RETRY_DELAY_SECONDS=${REMOTE_ARTIFACT_RETRY_DELAY_SECONDS:-5}",
"retry_remote_artifact_command",
"run_remote_sync_analysis_once",
"checking remote capture on ${TETHYS_HOST}",
"copying sync analyzer to ${TETHYS_HOST}",
"running remote sync analysis on ${TETHYS_HOST}",
"fetching capture from ${TETHYS_HOST}",
"warning: failed to fetch capture artifact; continuing with remote analysis JSON when available",
"analysis_tmp=\"${LOCAL_ANALYSIS_JSON}.tmp\"",
"REMOTE_PULSE_AUDIO_ANCHOR_SILENCE=${REMOTE_PULSE_AUDIO_ANCHOR_SILENCE:-1}",
"anchoring Pulse capture audio timeline with generated silence",
"audiotestsrc wave=silence is-live=true do-timestamp=true",