mirror of
https://github.com/huggingface/xet-core.git
synced 2026-06-04 13:30:29 +08:00
## Summary - Rewrites smoke tests to drive everything through the `hf` CLI rather than the huggingface_hub Python API, covering the actual user-facing surface area of hf-xet - Moves smoke tests and diagnostic scripts into a `scripts/` directory for cleaner repo layout - Adds storage bucket test suite exercising the full bucket lifecycle - Adds 50 MB and 100 MB files to repo upload/download tests ## Test matrix (14 tests, all passing) **Repository tests** (`hf upload` / `hf download`) - Upload single file, upload folder - Download individual files + SHA-256 verify - Download entire repo + SHA-256 verify - Overwrite file and verify new content served - Delete file and confirm absent **Bucket tests** (`hf buckets`) - `cp` upload / download + verify - `sync` upload / download + verify - Recursive list confirms expected paths - Overwrite via `cp` + verify - `sync --delete` removes extraneous remote files - `rm` + confirm absent from listing ## Test plan - [x] Run `HF_TOKEN=... ./scripts/smoke_tests/run.sh` and confirm all 14 tests pass - [x] Run `./scripts/smoke_tests/run.sh --skip-buckets` for repo-only path - [x] Run with `--hf-xet-version <version>` to confirm PyPI cache bypass works 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
240 lines
7.3 KiB
Bash
Executable File
240 lines
7.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# hf-xet-diag-macos.sh — macOS-only diagnostics runner
|
|
# Runs a target command, periodically snapshots stacks with `sample`,
|
|
# detects hangs, and can dump cores with `lldb`.
|
|
# Installs hf-xet debug symbols if available.
|
|
# Output directory defaults to include mangled command string for easy correlation.
|
|
|
|
set -Eeuo pipefail
|
|
|
|
# Defaults
|
|
INTERVAL=120
|
|
OUTDIR=""
|
|
OUTDIR_SET=""
|
|
|
|
print_usage() {
|
|
cat <<'USAGE'
|
|
Usage: hf-xet-diag-macos.sh [options] -- <command> [args...]
|
|
|
|
Runs a target command, periodically snapshots stacks (via `sample`),
|
|
detects hangs, and can dump cores (via `lldb`).
|
|
Also installs hf-xet debug symbols if available.
|
|
|
|
Requires: sample, lldb (Xcode Command Line Tools), curl, unzip, pip
|
|
|
|
Options:
|
|
-i, --interval SECONDS Stack snapshot cadence (default: 120)
|
|
-o, --outdir DIR Output directory (default: diag_<CMD>_<timestamp>)
|
|
-h, --help Show this help
|
|
|
|
Examples:
|
|
./hf-xet-diag-macos.sh -- python hfxet-test.py "Qwen/Qwen2.5-VL-3B-Instruct"
|
|
./hf-xet-diag-macos.sh -i 60 -- ./myapp --flag
|
|
USAGE
|
|
}
|
|
|
|
# --- option parsing ---
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
-i|--interval) INTERVAL="${2:-}"; shift 2 ;;
|
|
-o|--outdir) OUTDIR="${2:-}"; OUTDIR_SET=1; shift 2 ;;
|
|
-h|--help) print_usage; exit 0 ;;
|
|
--) shift; break ;;
|
|
*) break ;;
|
|
esac
|
|
done
|
|
if [[ $# -lt 1 ]]; then
|
|
echo "ERROR: No command provided."
|
|
print_usage; exit 2
|
|
fi
|
|
CMD=( "$@" )
|
|
|
|
# Tool availability check
|
|
missing=()
|
|
for cmd in sample lldb curl unzip; do
|
|
if ! command -v "$cmd" >/dev/null 2>&1; then
|
|
missing+=("$cmd")
|
|
fi
|
|
done
|
|
if [ ${#missing[@]} -ne 0 ]; then
|
|
echo "Missing required tools: ${missing[*]}"
|
|
echo "Install Xcode Command Line Tools with:"
|
|
echo " xcode-select --install"
|
|
exit 2
|
|
fi
|
|
|
|
# If no outdir given, generate one based on command
|
|
if [[ -z "$OUTDIR_SET" ]]; then
|
|
CMD_STR="${CMD[*]}"
|
|
SAFE_CMD=$(echo "$CMD_STR" | tr -c 'A-Za-z0-9' '_' )
|
|
OUTDIR="diag_${SAFE_CMD}_$(date +%Y%m%d%H%M%S)"
|
|
fi
|
|
|
|
mkdir -p "$OUTDIR"/{stacks,dumps}
|
|
CONSOLE_LOG="$OUTDIR/console.log"
|
|
ENV_LOG="$OUTDIR/env.log"
|
|
PID_FILE="$OUTDIR/pid"
|
|
|
|
echo "Diagnostics output: $OUTDIR"
|
|
echo "Stack trace interval: ${INTERVAL}s"
|
|
echo "Command: ${CMD[*]}"
|
|
|
|
# --- collect some quick system info ---
|
|
{
|
|
echo "=== $(date "+%Y-%m-%dT%H:%M:%S%z") ==="
|
|
echo "uname -a:"; uname -a
|
|
echo
|
|
echo "top snapshot:"; top -l 1 | grep -E "^CPU|^Phys" || true
|
|
echo
|
|
echo "ulimit -a:"; ulimit -a || true
|
|
echo
|
|
echo "python version:"; python3 -VV || true
|
|
echo
|
|
} > "$ENV_LOG" 2>&1 || true
|
|
|
|
# --- download hf-xet dbg symbols ---
|
|
WHEEL_VERSION=$(pip show hf-xet | awk '/^Version:/{printf $2}')
|
|
if [ -z "$WHEEL_VERSION" ]; then
|
|
echo "Error: hf-xet package is not installed. Please install it before running this script." >&2
|
|
exit 1
|
|
fi
|
|
echo "hf-xet wheel version: $WHEEL_VERSION"
|
|
SYMBOL_DIR="symbols-$WHEEL_VERSION"
|
|
|
|
if [ -d "$SYMBOL_DIR" ]; then
|
|
echo "Existing symbols dir found, assuming previously installed."
|
|
else
|
|
SITE_PACKAGES="$(pip show hf-xet | awk '/^Location:/{printf $2}')"
|
|
WHEEL_DIR="$SITE_PACKAGES/hf_xet"
|
|
DIST_INFO="$SITE_PACKAGES/hf_xet-$WHEEL_VERSION.dist-info"
|
|
WHEEL_FILE="$DIST_INFO/WHEEL"
|
|
|
|
# Reconstruct wheel name from wheel version and wheel tag
|
|
WHEEL_TAG=$(awk '/^Tag:/{printf $2}' $WHEEL_FILE)
|
|
SYMBOL_FILENAME="hf_xet-$WHEEL_VERSION-$WHEEL_TAG.dylib.dSYM"
|
|
|
|
echo "Downloading debug symbols: $SYMBOL_FILENAME"
|
|
# If the version is of format "1.1.10rc0", change it to our release tag format like "1.1.10-rc0"
|
|
RELEASE_TAG=$(echo -n "$WHEEL_VERSION" | sed 's/\([0-9]\)\(rc.*\)$/\1-\2/')
|
|
DOWNLOAD_URL="https://github.com/huggingface/xet-core/releases/download/v${RELEASE_TAG}/dbg-symbols.zip"
|
|
curl -fL "$DOWNLOAD_URL" -o dbg-symbols.zip
|
|
if [ $? -ne 0 ]; then
|
|
echo "Error: Failed to download debug symbols from $DOWNLOAD_URL" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Extract just the needed symbol file
|
|
unzip dbg-symbols.zip -d "$SYMBOL_DIR"
|
|
|
|
# Copy to package directory
|
|
cp -r "$SYMBOL_DIR/dbg-symbols/$SYMBOL_FILENAME" "$WHEEL_DIR/"
|
|
echo "Installed dbg symbol $SYMBOL_FILENAME to $WHEEL_DIR"
|
|
fi
|
|
|
|
# --- launch target ---
|
|
SCRIPT_START_TIME=$(date +%s)
|
|
REF_FILE="$OUTDIR/.ref_timestamp"
|
|
touch "$REF_FILE" # Reference file for finding logs created after this point
|
|
# Ensure REF_FILE is cleaned up on exit
|
|
trap 'rm -f "$REF_FILE"' EXIT
|
|
echo "Launching target at $(date "+%Y-%m-%dT%H:%M:%S%z") ..." | tee -a "$CONSOLE_LOG"
|
|
(
|
|
"${CMD[@]}" & echo $! > "$PID_FILE"
|
|
) 2>&1 | tee -a "$CONSOLE_LOG" &
|
|
LOGGER_BG=$!
|
|
|
|
# read PID
|
|
for _ in {1..50}; do
|
|
[[ -s "$PID_FILE" ]] && break
|
|
sleep 0.1
|
|
done
|
|
if [[ ! -s "$PID_FILE" ]]; then
|
|
echo "ERROR: Could not determine child PID." | tee -a "$CONSOLE_LOG"
|
|
exit 1
|
|
fi
|
|
TARGET_PID="$(cat "$PID_FILE")"
|
|
echo "Started PID: $TARGET_PID" | tee -a "$CONSOLE_LOG"
|
|
|
|
# --- stack capture + hang detection ---
|
|
declare -a LAST_STACKS=()
|
|
|
|
capture_stack() {
|
|
local ts stack_file
|
|
ts="$(date +%Y%m%d%H%M%S)"
|
|
stack_file="$OUTDIR/stacks/stack_${ts}.txt"
|
|
|
|
sample "$TARGET_PID" 5 -file "$stack_file" || true
|
|
echo "$(date "+%Y-%m-%dT%H:%M:%S%z") captured stack -> $stack_file" | tee -a "$CONSOLE_LOG"
|
|
|
|
LAST_STACKS+=("$stack_file")
|
|
if (( ${#LAST_STACKS[@]} > 3 )); then
|
|
LAST_STACKS=("${LAST_STACKS[@]: -3}")
|
|
fi
|
|
check_hang
|
|
}
|
|
|
|
check_hang() {
|
|
# need three snapshots to decide
|
|
if (( ${#LAST_STACKS[@]} < 3 )); then return; fi
|
|
|
|
# normalize: strip addresses and empty lines
|
|
norm1=$(sed 's/0x[0-9a-f]\+//g' "${LAST_STACKS[0]}" | grep -v '^$')
|
|
norm2=$(sed 's/0x[0-9a-f]\+//g' "${LAST_STACKS[1]}" | grep -v '^$')
|
|
norm3=$(sed 's/0x[0-9a-f]\+//g' "${LAST_STACKS[2]}" | grep -v '^$')
|
|
|
|
diff12=$(diff <(echo "$norm1") <(echo "$norm2") || true)
|
|
diff23=$(diff <(echo "$norm2") <(echo "$norm3") || true)
|
|
|
|
# If either diff is non-empty => stacks changed -> NOT a hang
|
|
if [[ -n "$diff12" || -n "$diff23" ]]; then
|
|
return
|
|
fi
|
|
|
|
# Otherwise both diffs empty => stacks the same across 3 snapshots => HANG
|
|
echo "⚠️ Hang detected at $(date "+%Y-%m-%dT%H:%M:%S%z") — taking core dump." | tee -a "$CONSOLE_LOG"
|
|
take_core_dump
|
|
LAST_STACKS=()
|
|
}
|
|
|
|
take_core_dump() {
|
|
local ts core_file
|
|
ts="$(date +%Y%m%d%H%M%S)"
|
|
core_file="$OUTDIR/dumps/dump_${TARGET_PID}_${ts}.core"
|
|
|
|
lldb -p "$TARGET_PID" -o "process save-core $core_file" -o "quit" >>"$CONSOLE_LOG" 2>&1 || true
|
|
echo "Core dump saved: $core_file" | tee -a "$CONSOLE_LOG"
|
|
}
|
|
|
|
# --- monitoring loop ---
|
|
LAST_SNAPSHOT_AT=0
|
|
while kill -0 "$TARGET_PID" 2>/dev/null; do
|
|
now=$(date +%s)
|
|
if (( now - LAST_SNAPSHOT_AT >= INTERVAL )); then
|
|
capture_stack || true
|
|
LAST_SNAPSHOT_AT=$now
|
|
fi
|
|
sleep 1
|
|
done
|
|
|
|
echo "Process $TARGET_PID has exited at $(date "+%Y-%m-%dT%H:%M:%S%z")." | tee -a "$CONSOLE_LOG"
|
|
|
|
# --- collect xet log files from this execution ---
|
|
HF_HOME="${HF_HOME:-$HOME/.cache/huggingface}"
|
|
XET_LOG_DIR="$HF_HOME/xet/logs"
|
|
if [[ -d "$XET_LOG_DIR" ]]; then
|
|
echo "Collecting xet logs from $XET_LOG_DIR ..." | tee -a "$CONSOLE_LOG"
|
|
mkdir -p "$OUTDIR/xet_logs"
|
|
|
|
# Find log files created after script start using reference file
|
|
find "$XET_LOG_DIR" -name "xet_*.log" -type f -newer "$REF_FILE" 2>/dev/null | while read -r logfile; do
|
|
cp "$logfile" "$OUTDIR/xet_logs/" 2>/dev/null && \
|
|
echo " Copied: $(basename "$logfile")" | tee -a "$CONSOLE_LOG"
|
|
done
|
|
else
|
|
echo "No xet log directory found at $XET_LOG_DIR" | tee -a "$CONSOLE_LOG"
|
|
fi
|
|
|
|
echo "Logs and stacks are in: $OUTDIR"
|
|
disown "$LOGGER_BG" 2>/dev/null || true
|
|
|