From ade634a690e3b489df1f5ad8624595eaabdb06d9 Mon Sep 17 00:00:00 2001 From: Tommaso Bonato Date: Tue, 10 Feb 2026 16:46:19 +0100 Subject: [PATCH] Add comprehensive test suite for all topologies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four test scripts covering 126 tests total: - run_all_tests.sh: 22 functional tests (DF/SF × 4 routing × 1/10 flows) - run_reachability_tests.sh: 18 reachability tests (one-to-all, all-to-one, same-switch, cross-group/partition, max distance) - run_edge_case_tests.sh: 49 edge case tests (cwnd/queue variations, tiny/ large flows, incast/outcast, staggered starts, cross-topology consistency) - run_e2e_tests.sh: 37 E2E stress tests (bisection, permutation, 1024-node FT, 1134-host SF p7q9, heavy incast, mixed sizes, all routing combos) All paths are relative (SCRIPT_DIR-based) for portability. Generated TM/output files excluded via .gitignore. --- tests/.gitignore | 8 + tests/run_all_tests.sh | 254 +++++++++++++++++ tests/run_e2e_tests.sh | 415 ++++++++++++++++++++++++++++ tests/run_edge_case_tests.sh | 474 ++++++++++++++++++++++++++++++++ tests/run_reachability_tests.sh | 313 +++++++++++++++++++++ 5 files changed, 1464 insertions(+) create mode 100644 tests/.gitignore create mode 100755 tests/run_all_tests.sh create mode 100755 tests/run_e2e_tests.sh create mode 100755 tests/run_edge_case_tests.sh create mode 100755 tests/run_reachability_tests.sh diff --git a/tests/.gitignore b/tests/.gitignore new file mode 100644 index 0000000..0dea38f --- /dev/null +++ b/tests/.gitignore @@ -0,0 +1,8 @@ +# Generated test artifacts (created at runtime) +*.tm +*.txt +*.dat +edge_tms/ +edge_out/ +e2e_tms/ +e2e_out/ diff --git a/tests/run_all_tests.sh b/tests/run_all_tests.sh new file mode 100755 index 0000000..3b6e6d5 --- /dev/null +++ b/tests/run_all_tests.sh @@ -0,0 +1,254 @@ +#!/usr/bin/env bash +# Comprehensive topology & routing tests for HTSIM dragonfly and slimfly +# Verifies: (1) all flows finish (2) retransmission recovery (3) expected runtime +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +HTSIM_DIR="$REPO_DIR" +UEC_BIN="$HTSIM_DIR/htsim/sim/datacenter/htsim_uec" +TOPO_DIR="$HTSIM_DIR/htsim/sim/datacenter/topologies" +TEST_DIR="$SCRIPT_DIR" + +PASS=0 +FAIL=0 +TOTAL=0 + +# ============================================================ +# Traffic matrix creation helpers +# ============================================================ + +# Create a single-flow TM: src 0 -> dst , 1 MiB +make_single_flow_tm() { + local num_hosts=$1 + local outfile=$2 + local dst=$((num_hosts - 1)) + cat > "$outfile" <$dst start 0 size 1048576 +EOF +} + +# Create a medium TM: 10 random-ish flows across different switches, 1 MiB each +make_medium_tm() { + local num_hosts=$1 + local outfile=$2 + local p=$3 # hosts per switch + local num_flows=10 + + # Pick flows that span different groups/partitions + # Spread src and dst across the host range + local step=$(( num_hosts / (num_flows + 1) )) + [ "$step" -lt 1 ] && step=1 + + echo "Nodes $num_hosts" > "$outfile" + echo "Connections $num_flows" >> "$outfile" + for i in $(seq 0 $((num_flows - 1))); do + local src=$(( (i * step) % num_hosts )) + local dst=$(( (src + num_hosts / 2 + i * 7) % num_hosts )) + # Ensure src != dst + [ "$src" -eq "$dst" ] && dst=$(( (dst + 1) % num_hosts )) + echo "$src->$dst start 0 size 1048576" >> "$outfile" + done +} + +# ============================================================ +# Test runner +# ============================================================ +run_test() { + local label="$1" + local binary="$2" + local topo_path="$3" + local routing="$4" + local tm_file="$5" + local num_hosts="$6" + local workload_type="$7" # "small" or "medium" + local num_flows="$8" + local topology="${9:-}" # optional: dragonfly, slimfly + + TOTAL=$((TOTAL + 1)) + local outfile="$TEST_DIR/out_${label}.txt" + + echo -n " [$TOTAL] $label ... " + + # Build command + local cmd + if [ -n "$topology" ]; then + cmd="$binary -topology $topology -topo $topo_path -routing $routing -tm $tm_file" + else + cmd="$binary -topo $topo_path -routing $routing -tm $tm_file" + fi + + # Run with timeout (60s should be plenty for these workloads) + if timeout 60 $cmd > "$outfile" 2>&1; then + local exit_code=0 + else + local exit_code=$? + fi + + # === Check 1: Did it complete? === + if [ $exit_code -ne 0 ]; then + echo "FAIL (exit code $exit_code)" + FAIL=$((FAIL + 1)) + cat "$outfile" | tail -5 + return + fi + + if ! grep -q "Done" "$outfile"; then + echo "FAIL (no 'Done' in output)" + FAIL=$((FAIL + 1)) + cat "$outfile" | tail -5 + return + fi + + # === Check 2: Parse packet stats === + local stats_line + stats_line=$(grep "^New:" "$outfile" || grep "New:" "$outfile" || true) + if [ -z "$stats_line" ]; then + echo "FAIL (no stats line)" + FAIL=$((FAIL + 1)) + return + fi + + local new_pkts rtx_pkts ack_pkts nack_pkts bounced_pkts + new_pkts=$(echo "$stats_line" | grep -oP 'New: \K[0-9]+') + rtx_pkts=$(echo "$stats_line" | grep -oP 'Rtx: \K[0-9]+') + ack_pkts=$(echo "$stats_line" | grep -oP 'ACKs: \K[0-9]+') + nack_pkts=$(echo "$stats_line" | grep -oP 'NACKs: \K[0-9]+') + bounced_pkts=$(echo "$stats_line" | grep -oP 'Bounced: \K[0-9]+') + + # === Check 3: All flows completed (New packets > 0, ACKs received) === + if [ "$new_pkts" -eq 0 ]; then + echo "FAIL (0 new packets sent)" + FAIL=$((FAIL + 1)) + return + fi + + if [ "$ack_pkts" -eq 0 ]; then + echo "FAIL (0 ACKs received - flows didn't complete)" + FAIL=$((FAIL + 1)) + return + fi + + # === Check 4: Expected packet count === + # 1 MiB = 1048576 bytes, packet_size = 4160, data ≈ 4096 bytes + # So ~256 packets per flow (1048576/4096). With 4160B packets, ~252. + # Allow range 240-270 per flow to account for protocol specifics. + local expected_min=$((num_flows * 240)) + local expected_max=$((num_flows * 280)) + + local pkt_check="OK" + if [ "$new_pkts" -lt "$expected_min" ] || [ "$new_pkts" -gt "$expected_max" ]; then + pkt_check="WARN(expected ${expected_min}-${expected_max})" + fi + + # === Check 5: Retransmission check === + local rtx_check="OK" + if [ "$workload_type" = "small" ] && [ "$rtx_pkts" -gt 5 ]; then + # Single flow with no contention should have minimal retransmits + rtx_check="WARN(${rtx_pkts}rtx for single flow)" + fi + + # === Check 6: Recovery check - if there were NACKs/bounces, Rtx should be > 0 === + local recovery_check="OK" + if [ "$nack_pkts" -gt 0 ] || [ "$bounced_pkts" -gt 0 ]; then + if [ "$rtx_pkts" -gt 0 ]; then + recovery_check="OK(recovered: ${rtx_pkts}rtx for ${nack_pkts}nack+${bounced_pkts}bounce)" + else + recovery_check="WARN(losses but 0 rtx)" + fi + fi + + echo "PASS | New:$new_pkts Rtx:$rtx_pkts ACK:$ack_pkts NACK:$nack_pkts Bounce:$bounced_pkts | pkt:$pkt_check rtx:$rtx_check recovery:$recovery_check" + PASS=$((PASS + 1)) +} + +# ============================================================ +# Generate all traffic matrices +# ============================================================ +echo "=== Generating traffic matrices ===" + +# Dragonfly p3a6h3: 342 hosts, p=3 +make_single_flow_tm 342 "$TEST_DIR/df_p3a6h3_1flow.tm" +make_medium_tm 342 "$TEST_DIR/df_p3a6h3_10flow.tm" 3 + +# Dragonfly p4a8h4: 1056 hosts, p=4 +make_single_flow_tm 1056 "$TEST_DIR/df_p4a8h4_1flow.tm" +make_medium_tm 1056 "$TEST_DIR/df_p4a8h4_10flow.tm" 4 + +# Slimfly p4q5: 200 hosts, p=4 +make_single_flow_tm 200 "$TEST_DIR/sf_p4q5_1flow.tm" +make_medium_tm 200 "$TEST_DIR/sf_p4q5_10flow.tm" 4 + +echo "Done generating TMs" +echo "" + +# ============================================================ +# Dragonfly p3a6h3 tests (342 hosts, has host_table → SOURCE works) +# ============================================================ +echo "=== Dragonfly p3a6h3 (342 hosts) ===" +DF_P3_PATH="$TOPO_DIR/dragonfly/p3a6h3" + +for routing in MINIMAL VALIANT UGAL_L SOURCE; do + run_test "df_p3a6h3_${routing}_1flow" "$UEC_BIN" "$DF_P3_PATH" "$routing" \ + "$TEST_DIR/df_p3a6h3_1flow.tm" 342 "small" 1 "dragonfly" +done +echo "" +echo " -- Medium workload (10 flows) --" +for routing in MINIMAL VALIANT UGAL_L SOURCE; do + run_test "df_p3a6h3_${routing}_10flow" "$UEC_BIN" "$DF_P3_PATH" "$routing" \ + "$TEST_DIR/df_p3a6h3_10flow.tm" 342 "medium" 10 "dragonfly" +done +echo "" + +# ============================================================ +# Dragonfly p4a8h4 tests (1056 hosts, NO host_table → skip SOURCE) +# ============================================================ +echo "=== Dragonfly p4a8h4 (1056 hosts) ===" +DF_P4_PATH="$TOPO_DIR/dragonfly/p4a8h4" + +for routing in MINIMAL VALIANT UGAL_L; do + run_test "df_p4a8h4_${routing}_1flow" "$UEC_BIN" "$DF_P4_PATH" "$routing" \ + "$TEST_DIR/df_p4a8h4_1flow.tm" 1056 "small" 1 "dragonfly" +done +echo "" +echo " -- Medium workload (10 flows) --" +for routing in MINIMAL VALIANT UGAL_L; do + run_test "df_p4a8h4_${routing}_10flow" "$UEC_BIN" "$DF_P4_PATH" "$routing" \ + "$TEST_DIR/df_p4a8h4_10flow.tm" 1056 "medium" 10 "dragonfly" +done +echo "" + +# ============================================================ +# Slimfly p4q5 tests (200 hosts, has host_table → SOURCE works) +# ============================================================ +echo "=== Slimfly p4q5 (200 hosts) ===" +SF_P4_PATH="$TOPO_DIR/slimfly/p4q5" + +for routing in MINIMAL VALIANT UGAL_L SOURCE; do + run_test "sf_p4q5_${routing}_1flow" "$UEC_BIN" "$SF_P4_PATH" "$routing" \ + "$TEST_DIR/sf_p4q5_1flow.tm" 200 "small" 1 "slimfly" +done +echo "" +echo " -- Medium workload (10 flows) --" +for routing in MINIMAL VALIANT UGAL_L SOURCE; do + run_test "sf_p4q5_${routing}_10flow" "$UEC_BIN" "$SF_P4_PATH" "$routing" \ + "$TEST_DIR/sf_p4q5_10flow.tm" 200 "medium" 10 "slimfly" +done +echo "" + +# ============================================================ +# Summary +# ============================================================ +echo "=========================================" +echo " RESULTS: $PASS passed / $FAIL failed / $TOTAL total" +echo "=========================================" + +if [ $FAIL -gt 0 ]; then + echo "SOME TESTS FAILED" + exit 1 +else + echo "ALL TESTS PASSED" + exit 0 +fi diff --git a/tests/run_e2e_tests.sh b/tests/run_e2e_tests.sh new file mode 100755 index 0000000..2651da0 --- /dev/null +++ b/tests/run_e2e_tests.sh @@ -0,0 +1,415 @@ +#!/usr/bin/env bash +# End-to-end stress & integration tests for the unified htsim_uec binary +# Covers: larger topologies, heavy workloads, all routing combos, all-to-all, +# FT 1024 nodes, SF p7q9 (1134 hosts), bisection traffic, mixed sizes +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +HTSIM_DIR="$REPO_DIR" +UEC_BIN="$HTSIM_DIR/htsim/sim/datacenter/htsim_uec" +TOPO_DIR="$HTSIM_DIR/htsim/sim/datacenter/topologies" +TEST_DIR="$SCRIPT_DIR" +TM_DIR="$TEST_DIR/e2e_tms" +OUT_DIR="$TEST_DIR/e2e_out" + +mkdir -p "$TM_DIR" "$OUT_DIR" + +PASS=0 +FAIL=0 +TOTAL=0 + +# ============================================================ +# Helper +# ============================================================ +run_e2e_test() { + local label="$1" + local cmd="$2" + local expected_flows="$3" # 0 = just check basic completion + local timeout_sec="${4:-120}" + + TOTAL=$((TOTAL + 1)) + local outfile="$OUT_DIR/out_${label}.txt" + echo -n " [$TOTAL] $label ... " + + if timeout "$timeout_sec" bash -c "$cmd" > "$outfile" 2>&1; then + local exit_code=0 + else + local exit_code=$? + fi + + if [ $exit_code -ne 0 ]; then + if [ $exit_code -eq 124 ]; then + echo "FAIL (timeout after ${timeout_sec}s)" + else + echo "FAIL (exit $exit_code)" + tail -5 "$outfile" + fi + FAIL=$((FAIL + 1)); return + fi + + if ! grep -q "Done" "$outfile"; then + echo "FAIL (no 'Done')" + tail -5 "$outfile" + FAIL=$((FAIL + 1)); return + fi + + local stats_line new_pkts ack_pkts rtx_pkts nack_pkts bounced_pkts + stats_line=$(grep "^New:" "$outfile" || grep "New:" "$outfile" | tail -1 || true) + if [ -z "$stats_line" ]; then + echo "FAIL (no stats)" + FAIL=$((FAIL + 1)); return + fi + new_pkts=$(echo "$stats_line" | grep -oP 'New: \K[0-9]+') + ack_pkts=$(echo "$stats_line" | grep -oP 'ACKs: \K[0-9]+') + rtx_pkts=$(echo "$stats_line" | grep -oP 'Rtx: \K[0-9]+' || echo 0) + nack_pkts=$(echo "$stats_line" | grep -oP 'NACKs: \K[0-9]+' || echo 0) + bounced_pkts=$(echo "$stats_line" | grep -oP 'Bounced: \K[0-9]+' || echo 0) + + if [ "$new_pkts" -eq 0 ] || [ "$ack_pkts" -eq 0 ]; then + echo "FAIL (New=$new_pkts ACK=$ack_pkts)" + FAIL=$((FAIL + 1)); return + fi + + if [ "$expected_flows" -gt 0 ]; then + local finished + finished=$(grep -c "finished at" "$outfile" || echo 0) + if [ "$finished" -ne "$expected_flows" ]; then + echo "FAIL (finished $finished of $expected_flows flows)" + FAIL=$((FAIL + 1)); return + fi + fi + + echo "PASS | New:$new_pkts Rtx:$rtx_pkts ACK:$ack_pkts NACK:$nack_pkts Bounce:$bounced_pkts" + PASS=$((PASS + 1)) +} + +# ============================================================ +# TM generators +# ============================================================ +make_bisection_tm() { + local num_hosts=$1 outfile=$2 + local half=$((num_hosts / 2)) + { + echo "Nodes $num_hosts" + echo "Connections $half" + for i in $(seq 0 $((half - 1))); do + echo "$i->$((i + half)) start 0 size 1048576" + done + } > "$outfile" +} + +make_all_to_all_tm() { + local num_hosts=$1 outfile=$2 max_flows=${3:-0} + local count=0 + local lines="" + for src in $(seq 0 $((num_hosts - 1))); do + for dst in $(seq 0 $((num_hosts - 1))); do + if [ "$src" -ne "$dst" ]; then + lines+="$src->$dst start 0 size 4096"$'\n' + count=$((count + 1)) + if [ "$max_flows" -gt 0 ] && [ "$count" -ge "$max_flows" ]; then + break 2 + fi + fi + done + done + { + echo "Nodes $num_hosts" + echo "Connections $count" + printf "%s" "$lines" + } > "$outfile" +} + +make_random_permutation_tm() { + local num_hosts=$1 outfile=$2 flow_size=${3:-1048576} + # Each host sends to (host + num_hosts/2) % num_hosts — a simple permutation + local half=$((num_hosts / 2)) + { + echo "Nodes $num_hosts" + echo "Connections $num_hosts" + for src in $(seq 0 $((num_hosts - 1))); do + local dst=$(( (src + half) % num_hosts )) + echo "$src->$dst start 0 size $flow_size" + done + } > "$outfile" +} + +make_heavy_incast_tm() { + local num_hosts=$1 outfile=$2 num_senders=$3 target=$4 + { + echo "Nodes $num_hosts" + echo "Connections $num_senders" + for src in $(seq 0 $((num_senders - 1))); do + local s=$src + [ "$s" -eq "$target" ] && s=$((num_hosts - 1)) + echo "$s->$target start 0 size 1048576" + done + } > "$outfile" +} + +make_mixed_size_tm() { + local num_hosts=$1 outfile=$2 + # Flows with varying sizes: 4KB, 64KB, 256KB, 1MB, 4MB + { + echo "Nodes $num_hosts" + echo "Connections 5" + echo "0->$((num_hosts/5)) start 0 size 4096" + echo "$((num_hosts/5))->$((2*num_hosts/5)) start 0 size 65536" + echo "$((2*num_hosts/5))->$((3*num_hosts/5)) start 0 size 262144" + echo "$((3*num_hosts/5))->$((4*num_hosts/5)) start 0 size 1048576" + echo "1->$((num_hosts-1)) start 0 size 4194304" + } > "$outfile" +} + +# ============================================================ +# Generate TMs +# ============================================================ +echo "=== Generating E2E traffic matrices ===" + +# --- Fat Tree 128 --- +make_bisection_tm 128 "$TM_DIR/ft128_bisection.tm" +make_random_permutation_tm 128 "$TM_DIR/ft128_permutation.tm" +make_heavy_incast_tm 128 "$TM_DIR/ft128_incast32.tm" 32 64 +make_mixed_size_tm 128 "$TM_DIR/ft128_mixed.tm" + +# --- Fat Tree 1024 --- +cat > "$TM_DIR/ft1024_1flow.tm" <<'EOF' +Nodes 1024 +Connections 1 +0->1023 start 0 size 1048576 +EOF + +cat > "$TM_DIR/ft1024_20flow.tm" <$((1023 - i*50)) start 0 size 1048576"; done) +EOF + +make_heavy_incast_tm 1024 "$TM_DIR/ft1024_incast32.tm" 32 512 +make_mixed_size_tm 1024 "$TM_DIR/ft1024_mixed.tm" + +# --- Dragonfly p3a6h3 (342 hosts) --- +make_bisection_tm 342 "$TM_DIR/df342_bisection.tm" +make_random_permutation_tm 342 "$TM_DIR/df342_permutation.tm" +make_heavy_incast_tm 342 "$TM_DIR/df342_incast16.tm" 16 170 +make_mixed_size_tm 342 "$TM_DIR/df342_mixed.tm" + +# --- Dragonfly p4a8h4 (1056 hosts) --- +cat > "$TM_DIR/df1056_20flow.tm" <$((1055 - i*50)) start 0 size 1048576"; done) +EOF +make_heavy_incast_tm 1056 "$TM_DIR/df1056_incast16.tm" 16 528 +make_mixed_size_tm 1056 "$TM_DIR/df1056_mixed.tm" + +# --- Slimfly p4q5 (200 hosts) --- +make_bisection_tm 200 "$TM_DIR/sf200_bisection.tm" +make_random_permutation_tm 200 "$TM_DIR/sf200_permutation.tm" +make_heavy_incast_tm 200 "$TM_DIR/sf200_incast16.tm" 16 100 +make_mixed_size_tm 200 "$TM_DIR/sf200_mixed.tm" + +# --- Slimfly p7q9 (1134 hosts) --- +cat > "$TM_DIR/sf1134_1flow.tm" <<'EOF' +Nodes 1134 +Connections 1 +0->1133 start 0 size 1048576 +EOF + +cat > "$TM_DIR/sf1134_20flow.tm" <$((1133 - i*50)) start 0 size 1048576"; done) +EOF +make_heavy_incast_tm 1134 "$TM_DIR/sf1134_incast16.tm" 16 567 +make_mixed_size_tm 1134 "$TM_DIR/sf1134_mixed.tm" + +echo "Done generating E2E TMs" +echo "" + +FT128="$TOPO_DIR/fat_tree_128_1os.topo" +FT1024="$TOPO_DIR/fat_tree_1024_1os.topo" +DF_P3="$TOPO_DIR/dragonfly/p3a6h3" +DF_P4="$TOPO_DIR/dragonfly/p4a8h4" +SF_P4="$TOPO_DIR/slimfly/p4q5" +SF_P7="$TOPO_DIR/slimfly/p7q9" + +# ============================================================ +# Section 1: Fat Tree E2E +# ============================================================ +echo "=== Section 1: Fat Tree 128 — E2E patterns ===" + +run_e2e_test "ft128_bisection_64flows" \ + "$UEC_BIN -topo $FT128 -tm $TM_DIR/ft128_bisection.tm -cwnd 30 -q 88" \ + 64 + +run_e2e_test "ft128_permutation_128flows" \ + "$UEC_BIN -topo $FT128 -tm $TM_DIR/ft128_permutation.tm -cwnd 30 -q 88" \ + 128 + +run_e2e_test "ft128_incast_32to1" \ + "$UEC_BIN -topo $FT128 -tm $TM_DIR/ft128_incast32.tm -cwnd 30 -q 88" \ + 32 + +run_e2e_test "ft128_mixed_sizes" \ + "$UEC_BIN -topo $FT128 -tm $TM_DIR/ft128_mixed.tm -cwnd 30 -q 88" \ + 5 + +echo "" +echo "=== Section 2: Fat Tree 1024 — larger scale ===" + +run_e2e_test "ft1024_1flow" \ + "$UEC_BIN -topo $FT1024 -tm $TM_DIR/ft1024_1flow.tm -cwnd 30 -q 88" \ + 1 180 + +run_e2e_test "ft1024_20flows" \ + "$UEC_BIN -topo $FT1024 -tm $TM_DIR/ft1024_20flow.tm -cwnd 30 -q 88" \ + 20 180 + +run_e2e_test "ft1024_incast_32to1" \ + "$UEC_BIN -topo $FT1024 -tm $TM_DIR/ft1024_incast32.tm -cwnd 30 -q 88" \ + 32 180 + +run_e2e_test "ft1024_mixed_sizes" \ + "$UEC_BIN -topo $FT1024 -tm $TM_DIR/ft1024_mixed.tm -cwnd 30 -q 88" \ + 5 180 + +echo "" + +# ============================================================ +# Section 3: Dragonfly E2E +# ============================================================ +echo "=== Section 3: Dragonfly p3a6h3 (342 hosts) — E2E patterns ===" + +run_e2e_test "df342_bisection_MINIMAL" \ + "$UEC_BIN -topology dragonfly -topo $DF_P3 -tm $TM_DIR/df342_bisection.tm -routing MINIMAL -q 88" \ + 171 + +run_e2e_test "df342_bisection_VALIANT" \ + "$UEC_BIN -topology dragonfly -topo $DF_P3 -tm $TM_DIR/df342_bisection.tm -routing VALIANT -q 88" \ + 171 + +run_e2e_test "df342_permutation_MINIMAL" \ + "$UEC_BIN -topology dragonfly -topo $DF_P3 -tm $TM_DIR/df342_permutation.tm -routing MINIMAL -q 88" \ + 342 + +run_e2e_test "df342_permutation_UGAL_L" \ + "$UEC_BIN -topology dragonfly -topo $DF_P3 -tm $TM_DIR/df342_permutation.tm -routing UGAL_L -q 88" \ + 342 + +run_e2e_test "df342_incast_16_MINIMAL" \ + "$UEC_BIN -topology dragonfly -topo $DF_P3 -tm $TM_DIR/df342_incast16.tm -routing MINIMAL -q 88" \ + 16 + +run_e2e_test "df342_mixed_SOURCE" \ + "$UEC_BIN -topology dragonfly -topo $DF_P3 -tm $TM_DIR/df342_mixed.tm -routing SOURCE -q 88" \ + 5 + +echo "" +echo "=== Section 4: Dragonfly p4a8h4 (1056 hosts) — larger scale ===" + +run_e2e_test "df1056_20flows_MINIMAL" \ + "$UEC_BIN -topology dragonfly -topo $DF_P4 -tm $TM_DIR/df1056_20flow.tm -routing MINIMAL -q 88" \ + 20 180 + +run_e2e_test "df1056_20flows_VALIANT" \ + "$UEC_BIN -topology dragonfly -topo $DF_P4 -tm $TM_DIR/df1056_20flow.tm -routing VALIANT -q 88" \ + 20 180 + +run_e2e_test "df1056_incast_16_UGAL_L" \ + "$UEC_BIN -topology dragonfly -topo $DF_P4 -tm $TM_DIR/df1056_incast16.tm -routing UGAL_L -q 88" \ + 16 180 + +run_e2e_test "df1056_mixed_MINIMAL" \ + "$UEC_BIN -topology dragonfly -topo $DF_P4 -tm $TM_DIR/df1056_mixed.tm -routing MINIMAL -q 88" \ + 5 180 + +echo "" + +# ============================================================ +# Section 5: SlimFly E2E +# ============================================================ +echo "=== Section 5: Slimfly p4q5 (200 hosts) — E2E patterns ===" + +run_e2e_test "sf200_bisection_MINIMAL" \ + "$UEC_BIN -topology slimfly -topo $SF_P4 -tm $TM_DIR/sf200_bisection.tm -routing MINIMAL -q 88" \ + 100 + +run_e2e_test "sf200_bisection_VALIANT" \ + "$UEC_BIN -topology slimfly -topo $SF_P4 -tm $TM_DIR/sf200_bisection.tm -routing VALIANT -q 88" \ + 100 + +run_e2e_test "sf200_permutation_MINIMAL" \ + "$UEC_BIN -topology slimfly -topo $SF_P4 -tm $TM_DIR/sf200_permutation.tm -routing MINIMAL -q 88" \ + 200 + +run_e2e_test "sf200_permutation_SOURCE" \ + "$UEC_BIN -topology slimfly -topo $SF_P4 -tm $TM_DIR/sf200_permutation.tm -routing SOURCE -q 88" \ + 200 + +run_e2e_test "sf200_incast_16_MINIMAL" \ + "$UEC_BIN -topology slimfly -topo $SF_P4 -tm $TM_DIR/sf200_incast16.tm -routing MINIMAL -q 88" \ + 16 + +run_e2e_test "sf200_mixed_UGAL_L" \ + "$UEC_BIN -topology slimfly -topo $SF_P4 -tm $TM_DIR/sf200_mixed.tm -routing UGAL_L -q 88" \ + 5 + +echo "" +echo "=== Section 6: Slimfly p7q9 (1134 hosts) — larger scale ===" + +run_e2e_test "sf1134_1flow_MINIMAL" \ + "$UEC_BIN -topology slimfly -topo $SF_P7 -tm $TM_DIR/sf1134_1flow.tm -routing MINIMAL -q 88" \ + 1 180 + +run_e2e_test "sf1134_20flows_MINIMAL" \ + "$UEC_BIN -topology slimfly -topo $SF_P7 -tm $TM_DIR/sf1134_20flow.tm -routing MINIMAL -q 88" \ + 20 180 + +run_e2e_test "sf1134_20flows_SOURCE" \ + "$UEC_BIN -topology slimfly -topo $SF_P7 -tm $TM_DIR/sf1134_20flow.tm -routing SOURCE -q 88" \ + 20 180 + +run_e2e_test "sf1134_incast_16_MINIMAL" \ + "$UEC_BIN -topology slimfly -topo $SF_P7 -tm $TM_DIR/sf1134_incast16.tm -routing MINIMAL -q 88" \ + 16 180 + +run_e2e_test "sf1134_mixed_MINIMAL" \ + "$UEC_BIN -topology slimfly -topo $SF_P7 -tm $TM_DIR/sf1134_mixed.tm -routing MINIMAL -q 88" \ + 5 180 + +echo "" + +# ============================================================ +# Section 7: All routing algorithms × all topologies (single flow) +# ============================================================ +echo "=== Section 7: Routing algorithm matrix (1 flow each) ===" + +for routing in MINIMAL VALIANT UGAL_L SOURCE; do + run_e2e_test "matrix_df342_${routing}" \ + "$UEC_BIN -topology dragonfly -topo $DF_P3 -tm $TM_DIR/df342_mixed.tm -routing $routing -q 88" \ + 5 +done + +for routing in MINIMAL VALIANT UGAL_L SOURCE; do + run_e2e_test "matrix_sf200_${routing}" \ + "$UEC_BIN -topology slimfly -topo $SF_P4 -tm $TM_DIR/sf200_mixed.tm -routing $routing -q 88" \ + 5 +done + +echo "" + +# ============================================================ +# Summary +# ============================================================ +echo "=========================================" +echo " E2E RESULTS: $PASS passed / $FAIL failed / $TOTAL total" +echo "=========================================" +if [ $FAIL -gt 0 ]; then + echo "SOME TESTS FAILED" + exit 1 +else + echo "ALL E2E TESTS PASSED" + exit 0 +fi diff --git a/tests/run_edge_case_tests.sh b/tests/run_edge_case_tests.sh new file mode 100755 index 0000000..8f82578 --- /dev/null +++ b/tests/run_edge_case_tests.sh @@ -0,0 +1,474 @@ +#!/usr/bin/env bash +# Edge case & parameter variation tests for the unified htsim_uec binary +# Covers: default topology, explicit FT flag, cwnd variations, queue sizes, +# tiny/large flows, staggered starts, single-packet flows, incast +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +HTSIM_DIR="$REPO_DIR" +UEC_BIN="$HTSIM_DIR/htsim/sim/datacenter/htsim_uec" +TOPO_DIR="$HTSIM_DIR/htsim/sim/datacenter/topologies" +TEST_DIR="$SCRIPT_DIR" +TM_DIR="$TEST_DIR/edge_tms" +OUT_DIR="$TEST_DIR/edge_out" + +mkdir -p "$TM_DIR" "$OUT_DIR" + +PASS=0 +FAIL=0 +TOTAL=0 + +# ============================================================ +# Helpers +# ============================================================ +run_edge_test() { + local label="$1" + local cmd="$2" + local check_type="$3" # "basic" | "flow_count:N" | "no_crash" + + TOTAL=$((TOTAL + 1)) + local outfile="$OUT_DIR/out_${label}.txt" + echo -n " [$TOTAL] $label ... " + + if timeout 90 bash -c "$cmd" > "$outfile" 2>&1; then + local exit_code=0 + else + local exit_code=$? + fi + + # --- no_crash: only checks it doesn't segfault --- + if [ "$check_type" = "no_crash" ]; then + if [ $exit_code -eq 139 ] || [ $exit_code -eq 136 ]; then + echo "FAIL (segfault/FPE, exit $exit_code)" + FAIL=$((FAIL + 1)); return + fi + echo "PASS (exit $exit_code)" + PASS=$((PASS + 1)); return + fi + + # --- expected_abort: expect assertion failure (exit 134) or nonzero exit --- + if [ "$check_type" = "expected_abort" ]; then + if [ $exit_code -ne 0 ]; then + echo "PASS (correctly aborted, exit $exit_code)" + PASS=$((PASS + 1)); return + fi + echo "FAIL (expected abort but got exit 0)" + FAIL=$((FAIL + 1)); return + fi + + # --- basic / flow_count: must complete --- + if [ $exit_code -ne 0 ]; then + echo "FAIL (exit code $exit_code)" + tail -5 "$outfile" + FAIL=$((FAIL + 1)); return + fi + + if ! grep -q "Done" "$outfile"; then + echo "FAIL (no 'Done')" + tail -5 "$outfile" + FAIL=$((FAIL + 1)); return + fi + + local stats_line new_pkts ack_pkts + stats_line=$(grep "^New:" "$outfile" || grep "New:" "$outfile" | tail -1 || true) + if [ -z "$stats_line" ]; then + echo "FAIL (no stats)" + FAIL=$((FAIL + 1)); return + fi + new_pkts=$(echo "$stats_line" | grep -oP 'New: \K[0-9]+') + ack_pkts=$(echo "$stats_line" | grep -oP 'ACKs: \K[0-9]+') + + if [ "$new_pkts" -eq 0 ] || [ "$ack_pkts" -eq 0 ]; then + echo "FAIL (New=$new_pkts, ACK=$ack_pkts)" + FAIL=$((FAIL + 1)); return + fi + + # --- flow_count check --- + if [[ "$check_type" == flow_count:* ]]; then + local expected=${check_type#flow_count:} + local finished + finished=$(grep -c "finished at" "$outfile" || echo 0) + if [ "$finished" -ne "$expected" ]; then + echo "FAIL (finished $finished of $expected flows)" + FAIL=$((FAIL + 1)); return + fi + fi + + local rtx_pkts nack_pkts bounced_pkts + rtx_pkts=$(echo "$stats_line" | grep -oP 'Rtx: \K[0-9]+' || echo 0) + nack_pkts=$(echo "$stats_line" | grep -oP 'NACKs: \K[0-9]+' || echo 0) + bounced_pkts=$(echo "$stats_line" | grep -oP 'Bounced: \K[0-9]+' || echo 0) + echo "PASS | New:$new_pkts Rtx:$rtx_pkts ACK:$ack_pkts NACK:$nack_pkts Bounce:$bounced_pkts" + PASS=$((PASS + 1)) +} + +# ============================================================ +# Generate traffic matrices +# ============================================================ +echo "=== Generating edge-case traffic matrices ===" + +# --- Fat Tree 128 nodes --- +cat > "$TM_DIR/ft128_1flow.tm" <<'EOF' +Nodes 128 +Connections 1 +0->127 start 0 size 1048576 +EOF + +cat > "$TM_DIR/ft128_5flow.tm" <<'EOF' +Nodes 128 +Connections 5 +0->127 start 0 size 1048576 +1->126 start 0 size 1048576 +10->100 start 0 size 1048576 +30->90 start 0 size 1048576 +64->63 start 0 size 1048576 +EOF + +# Tiny flow: 1 packet (4096 bytes) +cat > "$TM_DIR/ft128_tiny.tm" <<'EOF' +Nodes 128 +Connections 1 +0->127 start 0 size 4096 +EOF + +# Large flow: 10 MiB +cat > "$TM_DIR/ft128_large.tm" <<'EOF' +Nodes 128 +Connections 1 +0->127 start 0 size 10485760 +EOF + +# Staggered starts (different start times in ns) +cat > "$TM_DIR/ft128_staggered.tm" <<'EOF' +Nodes 128 +Connections 4 +0->64 start 0 size 1048576 +1->65 start 1000 size 1048576 +2->66 start 5000 size 1048576 +3->67 start 10000 size 1048576 +EOF + +# Incast: many sources → one destination +cat > "$TM_DIR/ft128_incast_16to1.tm" <<'EOF' +Nodes 128 +Connections 16 +0->127 start 0 size 1048576 +1->127 start 0 size 1048576 +2->127 start 0 size 1048576 +3->127 start 0 size 1048576 +4->127 start 0 size 1048576 +5->127 start 0 size 1048576 +6->127 start 0 size 1048576 +7->127 start 0 size 1048576 +8->127 start 0 size 1048576 +9->127 start 0 size 1048576 +10->127 start 0 size 1048576 +11->127 start 0 size 1048576 +12->127 start 0 size 1048576 +13->127 start 0 size 1048576 +14->127 start 0 size 1048576 +15->127 start 0 size 1048576 +EOF + +# Outcast: one source → many destinations +cat > "$TM_DIR/ft128_outcast_1to16.tm" <<'EOF' +Nodes 128 +Connections 16 +0->8 start 0 size 1048576 +0->16 start 0 size 1048576 +0->24 start 0 size 1048576 +0->32 start 0 size 1048576 +0->40 start 0 size 1048576 +0->48 start 0 size 1048576 +0->56 start 0 size 1048576 +0->64 start 0 size 1048576 +0->72 start 0 size 1048576 +0->80 start 0 size 1048576 +0->88 start 0 size 1048576 +0->96 start 0 size 1048576 +0->104 start 0 size 1048576 +0->112 start 0 size 1048576 +0->120 start 0 size 1048576 +0->127 start 0 size 1048576 +EOF + +# Same-pod traffic (nodes 0-15 are in the same pod for 128-node FT with podsize 16) +cat > "$TM_DIR/ft128_samepod.tm" <<'EOF' +Nodes 128 +Connections 4 +0->15 start 0 size 1048576 +1->14 start 0 size 1048576 +2->13 start 0 size 1048576 +3->12 start 0 size 1048576 +EOF + +# Cross-pod traffic (nodes in different pods) +cat > "$TM_DIR/ft128_crosspod.tm" <<'EOF' +Nodes 128 +Connections 4 +0->16 start 0 size 1048576 +1->32 start 0 size 1048576 +2->48 start 0 size 1048576 +3->64 start 0 size 1048576 +EOF + +# --- Dragonfly p3a6h3 (342 hosts) edge TMs --- +cat > "$TM_DIR/df342_tiny.tm" <<'EOF' +Nodes 342 +Connections 1 +0->341 start 0 size 4096 +EOF + +cat > "$TM_DIR/df342_large.tm" <<'EOF' +Nodes 342 +Connections 1 +0->200 start 0 size 10485760 +EOF + +cat > "$TM_DIR/df342_staggered.tm" <<'EOF' +Nodes 342 +Connections 4 +0->100 start 0 size 1048576 +50->200 start 2000 size 1048576 +100->300 start 5000 size 1048576 +150->341 start 10000 size 1048576 +EOF + +cat > "$TM_DIR/df342_incast_8to1.tm" <341 start 0 size 1048576"; done) +EOF + +# --- Slimfly p4q5 (200 hosts) edge TMs --- +cat > "$TM_DIR/sf200_tiny.tm" <<'EOF' +Nodes 200 +Connections 1 +0->199 start 0 size 4096 +EOF + +cat > "$TM_DIR/sf200_large.tm" <<'EOF' +Nodes 200 +Connections 1 +0->150 start 0 size 10485760 +EOF + +cat > "$TM_DIR/sf200_staggered.tm" <<'EOF' +Nodes 200 +Connections 4 +0->50 start 0 size 1048576 +25->100 start 2000 size 1048576 +75->150 start 5000 size 1048576 +100->199 start 10000 size 1048576 +EOF + +cat > "$TM_DIR/sf200_incast_8to1.tm" <199 start 0 size 1048576"; done) +EOF + +echo "Done generating edge-case TMs" +echo "" + +# ============================================================ +# Section 1: Fat Tree — default topology (no -topology flag) +# ============================================================ +FT128="$TOPO_DIR/fat_tree_128_1os.topo" + +echo "=== Section 1: Fat Tree basic (default topology type) ===" + +run_edge_test "ft_default_no_flag" \ + "$UEC_BIN -topo $FT128 -tm $TM_DIR/ft128_1flow.tm -cwnd 30 -q 88" \ + "basic" + +run_edge_test "ft_explicit_fattree_flag" \ + "$UEC_BIN -topology fattree -topo $FT128 -tm $TM_DIR/ft128_1flow.tm -cwnd 30 -q 88" \ + "basic" + +echo "" + +# ============================================================ +# Section 2: Fat Tree — parameter variations +# ============================================================ +echo "=== Section 2: Fat Tree parameter variations ===" + +# Different cwnd values +for cwnd in 5 15 30 50 100; do + run_edge_test "ft_cwnd_${cwnd}" \ + "$UEC_BIN -topo $FT128 -tm $TM_DIR/ft128_1flow.tm -cwnd $cwnd -q 88" \ + "basic" +done +echo "" + +# Different queue sizes +echo " -- Queue size variations --" +# qsize=20 is too small for ECN thresholds — expect assertion abort +run_edge_test "ft_qsize_20_expected_abort" \ + "$UEC_BIN -topo $FT128 -tm $TM_DIR/ft128_1flow.tm -cwnd 30 -q 20" \ + "expected_abort" +for qsize in 50 88 150 300; do + run_edge_test "ft_qsize_${qsize}" \ + "$UEC_BIN -topo $FT128 -tm $TM_DIR/ft128_1flow.tm -cwnd 30 -q $qsize" \ + "basic" +done +echo "" + +# ============================================================ +# Section 3: Fat Tree — flow patterns +# ============================================================ +echo "=== Section 3: Fat Tree flow patterns ===" + +run_edge_test "ft_tiny_flow" \ + "$UEC_BIN -topo $FT128 -tm $TM_DIR/ft128_tiny.tm -cwnd 30 -q 88" \ + "flow_count:1" + +run_edge_test "ft_large_flow_10MiB" \ + "$UEC_BIN -topo $FT128 -tm $TM_DIR/ft128_large.tm -cwnd 30 -q 88" \ + "basic" + +run_edge_test "ft_5flows" \ + "$UEC_BIN -topo $FT128 -tm $TM_DIR/ft128_5flow.tm -cwnd 30 -q 88" \ + "flow_count:5" + +run_edge_test "ft_staggered_starts" \ + "$UEC_BIN -topo $FT128 -tm $TM_DIR/ft128_staggered.tm -cwnd 30 -q 88" \ + "flow_count:4" + +run_edge_test "ft_incast_16to1" \ + "$UEC_BIN -topo $FT128 -tm $TM_DIR/ft128_incast_16to1.tm -cwnd 30 -q 88" \ + "flow_count:16" + +run_edge_test "ft_outcast_1to16" \ + "$UEC_BIN -topo $FT128 -tm $TM_DIR/ft128_outcast_1to16.tm -cwnd 30 -q 88" \ + "flow_count:16" + +run_edge_test "ft_samepod" \ + "$UEC_BIN -topo $FT128 -tm $TM_DIR/ft128_samepod.tm -cwnd 30 -q 88" \ + "flow_count:4" + +run_edge_test "ft_crosspod" \ + "$UEC_BIN -topo $FT128 -tm $TM_DIR/ft128_crosspod.tm -cwnd 30 -q 88" \ + "flow_count:4" + +echo "" + +# ============================================================ +# Section 4: Dragonfly — edge cases +# ============================================================ +echo "=== Section 4: Dragonfly edge cases ===" +DF_P3="$TOPO_DIR/dragonfly/p3a6h3" + +run_edge_test "df_tiny_flow" \ + "$UEC_BIN -topology dragonfly -topo $DF_P3 -tm $TM_DIR/df342_tiny.tm -routing MINIMAL -q 88" \ + "flow_count:1" + +run_edge_test "df_large_flow_10MiB" \ + "$UEC_BIN -topology dragonfly -topo $DF_P3 -tm $TM_DIR/df342_large.tm -routing MINIMAL -q 88" \ + "basic" + +run_edge_test "df_staggered_starts" \ + "$UEC_BIN -topology dragonfly -topo $DF_P3 -tm $TM_DIR/df342_staggered.tm -routing MINIMAL -q 88" \ + "flow_count:4" + +run_edge_test "df_incast_8to1" \ + "$UEC_BIN -topology dragonfly -topo $DF_P3 -tm $TM_DIR/df342_incast_8to1.tm -routing MINIMAL -q 88" \ + "flow_count:8" + +# cwnd variations on DF (DF uses cwnd as raw bytes, not packet count) +# Values must be large enough to fit at least 1 packet (~4160 bytes) +for cwnd in 20000 100000 500000; do + run_edge_test "df_cwnd_${cwnd}" \ + "$UEC_BIN -topology dragonfly -topo $DF_P3 -tm $TM_DIR/df342_tiny.tm -routing MINIMAL -q 88 -cwnd $cwnd" \ + "basic" +done + +# queue size variations on DF +for qsize in 30 88 200; do + run_edge_test "df_qsize_${qsize}" \ + "$UEC_BIN -topology dragonfly -topo $DF_P3 -tm $TM_DIR/df342_tiny.tm -routing MINIMAL -q $qsize" \ + "basic" +done + +echo "" + +# ============================================================ +# Section 5: SlimFly — edge cases +# ============================================================ +echo "=== Section 5: SlimFly edge cases ===" +SF_P4="$TOPO_DIR/slimfly/p4q5" + +run_edge_test "sf_tiny_flow" \ + "$UEC_BIN -topology slimfly -topo $SF_P4 -tm $TM_DIR/sf200_tiny.tm -routing MINIMAL -q 88" \ + "flow_count:1" + +run_edge_test "sf_large_flow_10MiB" \ + "$UEC_BIN -topology slimfly -topo $SF_P4 -tm $TM_DIR/sf200_large.tm -routing MINIMAL -q 88" \ + "basic" + +run_edge_test "sf_staggered_starts" \ + "$UEC_BIN -topology slimfly -topo $SF_P4 -tm $TM_DIR/sf200_staggered.tm -routing MINIMAL -q 88" \ + "flow_count:4" + +run_edge_test "sf_incast_8to1" \ + "$UEC_BIN -topology slimfly -topo $SF_P4 -tm $TM_DIR/sf200_incast_8to1.tm -routing MINIMAL -q 88" \ + "flow_count:8" + +# cwnd variations on SF (SF uses cwnd as raw bytes, not packet count) +# Values must be large enough to fit at least 1 packet (~4160 bytes) +for cwnd in 20000 100000 500000; do + run_edge_test "sf_cwnd_${cwnd}" \ + "$UEC_BIN -topology slimfly -topo $SF_P4 -tm $TM_DIR/sf200_tiny.tm -routing MINIMAL -q 88 -cwnd $cwnd" \ + "basic" +done + +# queue size variations on SF +for qsize in 30 88 200; do + run_edge_test "sf_qsize_${qsize}" \ + "$UEC_BIN -topology slimfly -topo $SF_P4 -tm $TM_DIR/sf200_tiny.tm -routing MINIMAL -q $qsize" \ + "basic" +done + +echo "" + +# ============================================================ +# Section 6: Cross-topology consistency +# Same workload pattern on all 3 topologies should all complete +# ============================================================ +echo "=== Section 6: Cross-topology consistency ===" + +# 1-flow, 1 MiB on each topology +run_edge_test "consistency_ft_1flow" \ + "$UEC_BIN -topology fattree -topo $FT128 -tm $TM_DIR/ft128_1flow.tm -cwnd 30 -q 88" \ + "flow_count:1" + +# DF 1-flow with each routing algo +for routing in MINIMAL VALIANT UGAL_L SOURCE; do + run_edge_test "consistency_df_${routing}" \ + "$UEC_BIN -topology dragonfly -topo $DF_P3 -tm $TM_DIR/df342_tiny.tm -routing $routing -q 88" \ + "flow_count:1" +done + +# SF 1-flow with each routing algo +for routing in MINIMAL VALIANT UGAL_L SOURCE; do + run_edge_test "consistency_sf_${routing}" \ + "$UEC_BIN -topology slimfly -topo $SF_P4 -tm $TM_DIR/sf200_tiny.tm -routing $routing -q 88" \ + "flow_count:1" +done + +echo "" + +# ============================================================ +# Summary +# ============================================================ +echo "=========================================" +echo " EDGE CASE RESULTS: $PASS passed / $FAIL failed / $TOTAL total" +echo "=========================================" +if [ $FAIL -gt 0 ]; then + echo "SOME TESTS FAILED" + exit 1 +else + echo "ALL EDGE CASE TESTS PASSED" + exit 0 +fi diff --git a/tests/run_reachability_tests.sh b/tests/run_reachability_tests.sh new file mode 100755 index 0000000..adc901d --- /dev/null +++ b/tests/run_reachability_tests.sh @@ -0,0 +1,313 @@ +#!/usr/bin/env bash +# Extended reachability & connectivity tests for HTSIM dragonfly and slimfly +# Tests: one-to-all, all-to-one, same-switch, cross-group, edge cases +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +HTSIM_DIR="$REPO_DIR" +UEC_BIN="$HTSIM_DIR/htsim/sim/datacenter/htsim_uec" +TOPO_DIR="$HTSIM_DIR/htsim/sim/datacenter/topologies" +TEST_DIR="$SCRIPT_DIR" + +PASS=0 +FAIL=0 +TOTAL=0 + +FLOW_SIZE=4096 # 1 packet — minimal for reachability check + +# ============================================================ +# TM generators +# ============================================================ + +# One-to-all: node 0 → every other node +make_one_to_all_tm() { + local num_hosts=$1 outfile=$2 + local n=$((num_hosts - 1)) + { + echo "Nodes $num_hosts" + echo "Connections $n" + for dst in $(seq 1 $((num_hosts - 1))); do + echo "0->$dst start 0 size $FLOW_SIZE" + done + } > "$outfile" +} + +# All-to-one: every node → node 0 +make_all_to_one_tm() { + local num_hosts=$1 outfile=$2 + local n=$((num_hosts - 1)) + { + echo "Nodes $num_hosts" + echo "Connections $n" + for src in $(seq 1 $((num_hosts - 1))); do + echo "$src->0 start 0 size $FLOW_SIZE" + done + } > "$outfile" +} + +# Same-switch: flows between hosts on the same switch (intra-switch) +make_same_switch_tm() { + local num_hosts=$1 p=$2 outfile=$3 + local num_switches=$((num_hosts / p)) + local count=0 + local lines="" + # For each switch, first host → last host on that switch + for sw in $(seq 0 $((num_switches - 1))); do + local src=$((sw * p)) + local dst=$((sw * p + p - 1)) + if [ "$src" -ne "$dst" ]; then + lines+="$src->$dst start 0 size $FLOW_SIZE"$'\n' + count=$((count + 1)) + fi + done + { + echo "Nodes $num_hosts" + echo "Connections $count" + printf "%s" "$lines" + } > "$outfile" +} + +# Cross-group: for dragonfly, pick one host from each group → host in a different group +# groups = a*h+1, each group has a switches, each switch has p hosts +make_cross_group_tm() { + local num_hosts=$1 p=$2 a=$3 h=$4 outfile=$5 + local num_groups=$(( a * h + 1 )) + local count=0 + local lines="" + for g in $(seq 0 $((num_groups - 1))); do + local src=$((g * a * p)) # first host in group g + local dst_group=$(( (g + num_groups / 2) % num_groups )) + local dst=$((dst_group * a * p)) # first host in opposite group + if [ "$src" -ne "$dst" ] && [ "$src" -lt "$num_hosts" ] && [ "$dst" -lt "$num_hosts" ]; then + lines+="$src->$dst start 0 size $FLOW_SIZE"$'\n' + count=$((count + 1)) + fi + done + { + echo "Nodes $num_hosts" + echo "Connections $count" + printf "%s" "$lines" + } > "$outfile" +} + +# Cross-partition: for slimfly, pick hosts across the two partitions (switch 0..q²-1 and q²..2q²-1) +make_cross_partition_tm() { + local num_hosts=$1 p=$2 q=$3 outfile=$4 + local q2=$((q * q)) + local count=0 + local lines="" + # For each switch in partition 0, connect to corresponding switch in partition 1 + for sw in $(seq 0 $((q2 - 1))); do + local src=$((sw * p)) # first host on switch sw (partition 0) + local dst=$(( (q2 + sw) * p )) # first host on switch q²+sw (partition 1) + if [ "$src" -lt "$num_hosts" ] && [ "$dst" -lt "$num_hosts" ]; then + lines+="$src->$dst start 0 size $FLOW_SIZE"$'\n' + count=$((count + 1)) + fi + done + { + echo "Nodes $num_hosts" + echo "Connections $count" + printf "%s" "$lines" + } > "$outfile" +} + +# Maximum-distance pairs: for dragonfly, hosts at group 0 ↔ last group +make_max_distance_df_tm() { + local num_hosts=$1 p=$2 a=$3 h=$4 outfile=$5 + local num_groups=$(( a * h + 1 )) + local last_group=$((num_groups - 1)) + local count=0 + local lines="" + # p flows from group 0 → last group + for i in $(seq 0 $((p - 1))); do + local src=$i + local dst=$((last_group * a * p + i)) + if [ "$dst" -lt "$num_hosts" ]; then + lines+="$src->$dst start 0 size $FLOW_SIZE"$'\n' + count=$((count + 1)) + fi + done + { + echo "Nodes $num_hosts" + echo "Connections $count" + printf "%s" "$lines" + } > "$outfile" +} + +# ============================================================ +# Test runner +# ============================================================ +run_reachability_test() { + local label="$1" + local binary="$2" + local topo_path="$3" + local routing="$4" + local tm_file="$5" + local expected_flows="$6" + local topology="${7:-}" # optional: dragonfly, slimfly + + TOTAL=$((TOTAL + 1)) + local outfile="$TEST_DIR/out_reach_${label}.txt" + + echo -n " [$TOTAL] $label ($expected_flows flows) ... " + + # Build command + local cmd + if [ -n "$topology" ]; then + cmd="$binary -topology $topology -topo $topo_path -routing $routing -tm $tm_file" + else + cmd="$binary -topo $topo_path -routing $routing -tm $tm_file" + fi + + # Run with timeout + if timeout 120 $cmd > "$outfile" 2>&1; then + local exit_code=0 + else + local exit_code=$? + fi + + # Check 1: exited cleanly + if [ $exit_code -ne 0 ]; then + echo "FAIL (exit code $exit_code)" + tail -3 "$outfile" + FAIL=$((FAIL + 1)) + return + fi + + # Check 2: "Done" in output + if ! grep -q "Done" "$outfile"; then + echo "FAIL (no 'Done')" + tail -3 "$outfile" + FAIL=$((FAIL + 1)) + return + fi + + # Check 3: count finished flows + local finished_count + finished_count=$(grep -c "finished at" "$outfile" || echo 0) + + if [ "$finished_count" -ne "$expected_flows" ]; then + echo "FAIL (only $finished_count of $expected_flows flows finished)" + FAIL=$((FAIL + 1)) + return + fi + + # Check 4: all flows got correct bytes + local wrong_bytes + wrong_bytes=$(grep "finished at" "$outfile" | grep -cv "total bytes $FLOW_SIZE" || true) + if [ "$wrong_bytes" -gt 0 ]; then + echo "FAIL ($wrong_bytes flows got wrong byte count)" + FAIL=$((FAIL + 1)) + return + fi + + # Check 5: New packets count (1 flow = 1 packet for 4096B) + local stats_line new_pkts + stats_line=$(grep "New:" "$outfile" | tail -1) + new_pkts=$(echo "$stats_line" | grep -oP 'New: \K[0-9]+') + + if [ "$new_pkts" -ne "$expected_flows" ]; then + echo "FAIL (New=$new_pkts, expected $expected_flows)" + FAIL=$((FAIL + 1)) + return + fi + + local rtx_pkts ack_pkts + rtx_pkts=$(echo "$stats_line" | grep -oP 'Rtx: \K[0-9]+') + ack_pkts=$(echo "$stats_line" | grep -oP 'ACKs: \K[0-9]+') + + echo "PASS (all $finished_count flows finished, New=$new_pkts Rtx=$rtx_pkts ACK=$ack_pkts)" + PASS=$((PASS + 1)) +} + +# ============================================================ +# Generate TMs +# ============================================================ +echo "=== Generating reachability traffic matrices ===" + +# --- Dragonfly p3a6h3: 342 hosts, p=3, a=6, h=3 --- +make_one_to_all_tm 342 "$TEST_DIR/df_p3_one2all.tm" +make_all_to_one_tm 342 "$TEST_DIR/df_p3_all2one.tm" +make_same_switch_tm 342 3 "$TEST_DIR/df_p3_sameswitch.tm" +make_cross_group_tm 342 3 6 3 "$TEST_DIR/df_p3_crossgroup.tm" +make_max_distance_df_tm 342 3 6 3 "$TEST_DIR/df_p3_maxdist.tm" + +# --- Slimfly p4q5: 200 hosts, p=4, q=5 --- +make_one_to_all_tm 200 "$TEST_DIR/sf_p4_one2all.tm" +make_all_to_one_tm 200 "$TEST_DIR/sf_p4_all2one.tm" +make_same_switch_tm 200 4 "$TEST_DIR/sf_p4_sameswitch.tm" +make_cross_partition_tm 200 4 5 "$TEST_DIR/sf_p4_crosspart.tm" + +echo "Done" +echo "" + +# ============================================================ +# Dragonfly p3a6h3 — MINIMAL & SOURCE routing +# ============================================================ +DF_P3="$TOPO_DIR/dragonfly/p3a6h3" + +echo "=== Dragonfly p3a6h3: One-to-All Reachability (node 0 → all 341 others) ===" +run_reachability_test "df_p3_one2all_MINIMAL" "$UEC_BIN" "$DF_P3" "MINIMAL" "$TEST_DIR/df_p3_one2all.tm" 341 "dragonfly" +run_reachability_test "df_p3_one2all_SOURCE" "$UEC_BIN" "$DF_P3" "SOURCE" "$TEST_DIR/df_p3_one2all.tm" 341 "dragonfly" +echo "" + +echo "=== Dragonfly p3a6h3: All-to-One Reachability (all 341 others → node 0) ===" +run_reachability_test "df_p3_all2one_MINIMAL" "$UEC_BIN" "$DF_P3" "MINIMAL" "$TEST_DIR/df_p3_all2one.tm" 341 "dragonfly" +run_reachability_test "df_p3_all2one_SOURCE" "$UEC_BIN" "$DF_P3" "SOURCE" "$TEST_DIR/df_p3_all2one.tm" 341 "dragonfly" +echo "" + +echo "=== Dragonfly p3a6h3: Same-Switch (intra-switch) ===" +run_reachability_test "df_p3_sameswitch_MINIMAL" "$UEC_BIN" "$DF_P3" "MINIMAL" "$TEST_DIR/df_p3_sameswitch.tm" 114 "dragonfly" +run_reachability_test "df_p3_sameswitch_SOURCE" "$UEC_BIN" "$DF_P3" "SOURCE" "$TEST_DIR/df_p3_sameswitch.tm" 114 "dragonfly" +echo "" + +echo "=== Dragonfly p3a6h3: Cross-Group (each group → opposite group) ===" +run_reachability_test "df_p3_crossgroup_MINIMAL" "$UEC_BIN" "$DF_P3" "MINIMAL" "$TEST_DIR/df_p3_crossgroup.tm" 19 "dragonfly" +run_reachability_test "df_p3_crossgroup_SOURCE" "$UEC_BIN" "$DF_P3" "SOURCE" "$TEST_DIR/df_p3_crossgroup.tm" 19 "dragonfly" +echo "" + +echo "=== Dragonfly p3a6h3: Max Distance (group 0 ↔ last group) ===" +run_reachability_test "df_p3_maxdist_MINIMAL" "$UEC_BIN" "$DF_P3" "MINIMAL" "$TEST_DIR/df_p3_maxdist.tm" 3 "dragonfly" +run_reachability_test "df_p3_maxdist_SOURCE" "$UEC_BIN" "$DF_P3" "SOURCE" "$TEST_DIR/df_p3_maxdist.tm" 3 "dragonfly" +echo "" + +# ============================================================ +# Slimfly p4q5 — MINIMAL & SOURCE routing +# ============================================================ +SF_P4="$TOPO_DIR/slimfly/p4q5" + +echo "=== Slimfly p4q5: One-to-All Reachability (node 0 → all 199 others) ===" +run_reachability_test "sf_p4_one2all_MINIMAL" "$UEC_BIN" "$SF_P4" "MINIMAL" "$TEST_DIR/sf_p4_one2all.tm" 199 "slimfly" +run_reachability_test "sf_p4_one2all_SOURCE" "$UEC_BIN" "$SF_P4" "SOURCE" "$TEST_DIR/sf_p4_one2all.tm" 199 "slimfly" +echo "" + +echo "=== Slimfly p4q5: All-to-One Reachability (all 199 others → node 0) ===" +run_reachability_test "sf_p4_all2one_MINIMAL" "$UEC_BIN" "$SF_P4" "MINIMAL" "$TEST_DIR/sf_p4_all2one.tm" 199 "slimfly" +run_reachability_test "sf_p4_all2one_SOURCE" "$UEC_BIN" "$SF_P4" "SOURCE" "$TEST_DIR/sf_p4_all2one.tm" 199 "slimfly" +echo "" + +echo "=== Slimfly p4q5: Same-Switch (intra-switch) ===" +run_reachability_test "sf_p4_sameswitch_MINIMAL" "$UEC_BIN" "$SF_P4" "MINIMAL" "$TEST_DIR/sf_p4_sameswitch.tm" 50 "slimfly" +run_reachability_test "sf_p4_sameswitch_SOURCE" "$UEC_BIN" "$SF_P4" "SOURCE" "$TEST_DIR/sf_p4_sameswitch.tm" 50 "slimfly" +echo "" + +echo "=== Slimfly p4q5: Cross-Partition (partition 0 → partition 1) ===" +run_reachability_test "sf_p4_crosspart_MINIMAL" "$UEC_BIN" "$SF_P4" "MINIMAL" "$TEST_DIR/sf_p4_crosspart.tm" 25 "slimfly" +run_reachability_test "sf_p4_crosspart_SOURCE" "$UEC_BIN" "$SF_P4" "SOURCE" "$TEST_DIR/sf_p4_crosspart.tm" 25 "slimfly" +echo "" + +# ============================================================ +# Summary +# ============================================================ +echo "=========================================" +echo " REACHABILITY: $PASS passed / $FAIL failed / $TOTAL total" +echo "=========================================" +if [ $FAIL -gt 0 ]; then + echo "SOME TESTS FAILED" + exit 1 +else + echo "ALL REACHABILITY TESTS PASSED" + exit 0 +fi