Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 122 additions & 85 deletions tests/functional-test-microceph.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,134 +13,152 @@ PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
echo "=== MicroCeph Functional Test for osdtrace and radostrace ==="
echo "Project root: $PROJECT_ROOT"

info() {
echo "INFO: $@"
}

err() {
echo "ERROR: $@"
}

# Cleanup function
cleanup() {
echo "=== Cleanup ==="
info "=== Cleanup ==="
# Kill any running trace processes
pkill -f osdtrace || true
pkill -f radostrace || true
pkill -f "rbd bench" || true

info "OSD trace output:"
cat /tmp/osdtrace.log
info " === END of OSD trace === "
info "RADOS trace output:"
cat /tmp/radostrace.log
info " === END of RADOS trace === "

# Remove test files
info "RADOS trace output"
rm -f /tmp/osdtrace.log /tmp/radostrace.log

# Remove test RBD resources
microceph.rbd rm test_pool/testimage 2>/dev/null || true
microceph.ceph osd pool delete test_pool test_pool --yes-i-really-really-mean-it 2>/dev/null || true

echo "Cleanup completed"
info "Cleanup completed"
}

trap cleanup EXIT

OSDTRACE_LOG="/tmp/osdtrace.log"
RADOSTRACE_LOG="/tmp/radostrace.log"

# Check if running as root or with sudo
if [ "$EUID" -ne 0 ]; then
echo "Error: This test must be run as root or with sudo"
err "This test must be run as root or with sudo"
exit 1
fi

# Check if osdtrace and radostrace binaries exist
if [ ! -f "$PROJECT_ROOT/osdtrace" ]; then
echo "Error: osdtrace binary not found at $PROJECT_ROOT/osdtrace"
echo "Please build the project first with 'make osdtrace'"
err "osdtrace binary not found at $PROJECT_ROOT/osdtrace"
err "Please build the project first with 'make osdtrace'"
exit 1
fi

if [ ! -f "$PROJECT_ROOT/radostrace" ]; then
echo "Error: radostrace binary not found at $PROJECT_ROOT/radostrace"
echo "Please build the project first with 'make radostrace'"
err "radostrace binary not found at $PROJECT_ROOT/radostrace"
err "Please build the project first with 'make radostrace'"
exit 1
fi

echo "=== Step 1: Install MicroCeph ==="
info "=== Step 1: Install MicroCeph ==="
if ! snap list | grep -q microceph; then
echo "Installing MicroCeph snap..."
info "Installing MicroCeph snap..."
snap install microceph
snap refresh --hold microceph
else
echo "MicroCeph already installed"
info "MicroCeph already installed"
fi

echo "=== Step 2: Bootstrap MicroCeph cluster ==="
info "=== Step 2: Bootstrap MicroCeph cluster ==="
if ! microceph cluster list 2>/dev/null | grep -q "$(hostname)"; then
echo "Bootstrapping MicroCeph cluster..."
info "Bootstrapping MicroCeph cluster..."
microceph cluster bootstrap
else
echo "MicroCeph cluster already bootstrapped"
info "MicroCeph cluster already bootstrapped"
fi

echo "=== Step 3: Add OSDs ==="
info "=== Step 3: Add OSDs ==="
# Check if we already have OSDs
OSD_COUNT=$(microceph.ceph osd stat | grep -oP '\d+(?= osds:)' || echo "0")
if [ "$OSD_COUNT" -lt 3 ]; then
echo "Adding 3 loop-backed OSDs (1GB each)..."
info "Adding 3 loop-backed OSDs (1GB each)..."
microceph disk add loop,1G,3
else
echo "Already have $OSD_COUNT OSDs"
info "Already have $OSD_COUNT OSDs"
fi

echo "=== Step 4: Wait for cluster to be healthy ==="
info "=== Step 4: Wait for cluster to be healthy ==="
TIMEOUT=120
ELAPSED=0
while [ $ELAPSED -lt $TIMEOUT ]; do
if microceph.ceph status | grep -q "HEALTH_OK\|HEALTH_WARN"; then
echo "Cluster is ready"
info "Cluster is ready"
break
fi
echo "Waiting for cluster to be ready... ($ELAPSED/$TIMEOUT seconds)"
info "Waiting for cluster to be ready... ($ELAPSED/$TIMEOUT seconds)"
sleep 5
ELAPSED=$((ELAPSED + 5))
done

microceph.ceph status
microceph --version

echo "=== Step 5: Get Ceph version from snap manifest ==="
CEPH_VERSION=$(cat /snap/microceph/current/snap/manifest.yaml | grep "^ceph-osd:" | awk '{print $2}')
LIBRADOS_VERSION=$(cat /snap/microceph/current/snap/manifest.yaml | grep "^librados2:" | awk '{print $2}')
info "=== Step 5: Get Ceph version from snap manifest ==="
CEPH_VERSION=$(microceph --version | awk '{gsub(";", "", $2); print $2}')

echo "Ceph OSD version: $CEPH_VERSION"
echo "Librados version: $LIBRADOS_VERSION"
info "Ceph version: $CEPH_VERSION"

echo "=== Step 6: Locate DWARF JSON files in repository ==="
info "=== Step 6: Locate DWARF JSON files in repository ==="
# Look for matching DWARF files in the repository
OSD_DWARF="$PROJECT_ROOT/files/ubuntu/osdtrace/osd-${CEPH_VERSION}_dwarf.json"
RADOS_DWARF="$PROJECT_ROOT/files/ubuntu/radostrace/${LIBRADOS_VERSION}_dwarf.json"
RADOS_DWARF="$PROJECT_ROOT/files/ubuntu/radostrace/${CEPH_VERSION}_dwarf.json"

if [ ! -f "$OSD_DWARF" ]; then
echo "Warning: OSD DWARF file not found at $OSD_DWARF"
echo "Looking for any available OSD DWARF files..."
warn "OSD DWARF file not found at $OSD_DWARF"
info "Looking for any available OSD DWARF files..."
OSD_DWARF=$(find "$PROJECT_ROOT/files/ubuntu/osdtrace/" -name "*_dwarf.json" | head -1)
if [ -z "$OSD_DWARF" ]; then
echo "Error: No OSD DWARF files found in repository"
err "No OSD DWARF files found in repository"
exit 1
fi
echo "Using: $OSD_DWARF"
info "Using: $OSD_DWARF"
fi

if [ ! -f "$RADOS_DWARF" ]; then
echo "Warning: Rados DWARF file not found at $RADOS_DWARF"
echo "Looking for any available radostrace DWARF files..."
warn "Rados DWARF file not found at $RADOS_DWARF"
info "Looking for any available radostrace DWARF files..."
RADOS_DWARF=$(find "$PROJECT_ROOT/files/ubuntu/radostrace/" -name "*_dwarf.json" | head -1)
if [ -z "$RADOS_DWARF" ]; then
echo "Error: No radostrace DWARF files found in repository"
err "No radostrace DWARF files found in repository"
exit 1
fi
echo "Using: $RADOS_DWARF"
info "Using: $RADOS_DWARF"
fi

echo "Using OSD DWARF file: $OSD_DWARF"
echo "Using Rados DWARF file: $RADOS_DWARF"
info "Using OSD DWARF file: $OSD_DWARF"
info "Using Rados DWARF file: $RADOS_DWARF"

echo "=== Step 7: Find OSD process PID ==="
info "=== Step 7: Find OSD process PID ==="
OSD_PID=$(pgrep -f "ceph-osd.*--id 1" | head -1)
if [ -z "$OSD_PID" ]; then
echo "Error: Could not find ceph-osd process"
err "Could not find ceph-osd process"
ps aux | grep ceph-osd
exit 1
fi
echo "Found OSD process: PID $OSD_PID"
info "Found OSD process: PID $OSD_PID"

echo "=== Step 8: Create RBD pool and image for testing ==="
info "=== Step 8: Create RBD pool and image for testing ==="
# Create RBD pool if it doesn't exist
if ! microceph.ceph osd pool ls | grep -q "^test_pool$"; then
microceph.ceph osd pool create test_pool 32
Expand All @@ -150,38 +168,40 @@ fi
# Create RBD image
microceph.rbd create test_pool/testimage --size 1G || true

echo "=== Step 9: Start osdtrace in background ==="
timeout 30 $PROJECT_ROOT/osdtrace -i $OSD_DWARF -p $OSD_PID --skip-version-check -x > /tmp/osdtrace.log 2>&1 &
OSDTRACE_PID=$!
echo "Started osdtrace with PID $OSDTRACE_PID"
info "=== Step 9: Start osdtrace in background ==="
timeout 30 $PROJECT_ROOT/osdtrace -i $OSD_DWARF -p $OSD_PID --skip-version-check -x >$OSDTRACE_LOG 2>&1 &
sleep 2 # ensure osdtrace starts before we get its PID
OSDTRACE_PID=$(pidof osdtrace)
info "Started osdtrace with PID $OSDTRACE_PID"
sleep 3

echo "=== Step 10: Start radostrace in background ==="
info "=== Step 10: Start radostrace in background ==="
# radostrace will trace all librados clients, including the rbd bench command
timeout 30 $PROJECT_ROOT/radostrace -i $RADOS_DWARF --skip-version-check > /tmp/radostrace.log 2>&1 &
RADOSTRACE_PID=$!
echo "Started radostrace with PID $RADOSTRACE_PID"
timeout 30 $PROJECT_ROOT/radostrace -i $RADOS_DWARF --skip-version-check >$RADOSTRACE_LOG 2>&1 &
sleep 2 # ensure radostrace starts before we get its PID
RADOSTRACE_PID=$(pidof radostrace)
info "Started radostrace with PID $RADOSTRACE_PID"
sleep 3

echo "=== Step 11: Generate I/O traffic using rbd bench ==="
info "=== Step 11: Generate I/O traffic using rbd bench ==="
# Run rbd bench for write operations
echo "Running rbd bench write..."
info "Running rbd bench write..."
microceph.rbd bench --io-type write --io-size 4M --io-threads 4 --io-total 100M test_pool/testimage &
RBD_BENCH_PID=$!

# Wait a bit for some I/O to occur
sleep 10

# Run some rados operations to generate more librados traffic
echo "Performing rados operations..."
info "Performing rados operations..."
microceph.rados -p test_pool put testobj /etc/hostname || true
microceph.rados -p test_pool get testobj /tmp/testobj || true
microceph.rados -p test_pool rm testobj || true

echo "=== Step 12: Wait for rbd bench to complete ==="
info "=== Step 12: Wait for rbd bench to complete ==="
wait $RBD_BENCH_PID 2>/dev/null || true

echo "=== Step 13: Wait for traces to complete ==="
info "=== Step 13: Wait for traces to complete ==="
sleep 5

# Kill trace processes gracefully
Expand All @@ -190,53 +210,70 @@ kill $RADOSTRACE_PID 2>/dev/null || true
wait $OSDTRACE_PID 2>/dev/null || true
wait $RADOSTRACE_PID 2>/dev/null || true

echo "=== Step 14: Verify osdtrace output ==="
if [ ! -f /tmp/osdtrace.log ]; then
echo "Error: osdtrace log file not found"
info "=== Step 14: Verify osdtrace output ==="

# 14.1 Check trace exists
OSD_LINE_COUNT=$(wc -l < $OSDTRACE_LOG)
info "osdtrace captured $OSD_LINE_COUNT lines"
if [ $OSD_LINE_COUNT -lt 5 ]; then
err "osdtrace did not capture enough trace data (expected at least 5 lines)"
exit 1
fi

OSD_LINE_COUNT=$(wc -l < /tmp/osdtrace.log)
echo "osdtrace captured $OSD_LINE_COUNT lines"
# 14.2 Check OSD IDs range is within the expected limit
MAX_OSD_ID=$(microceph.ceph osd stat | grep -oP '\d+(?= osds:)' || echo "0")
MAX_OSD_ID=$((MAX_OSD_ID - 1)) # Convert count to max ID (0-indexed)
info "Max OSD ID in cluster: $MAX_OSD_ID"

if [ $OSD_LINE_COUNT -lt 5 ]; then
echo "Error: osdtrace did not capture enough trace data (expected at least 5 lines)"
echo "osdtrace output:"
cat /tmp/osdtrace.log
osd_id_err=$(awk -v max_osd=$MAX_OSD_ID '$1=="osd" && ($2 < 0 || $2 > max_osd) {print $2; exit}' $OSDTRACE_LOG)
if [ -n "$osd_id_err" ]; then
err "Found OSD id outside the expected range, $osd_id_err"
exit 1
fi

# 14.3 Check the correct pool id is used
TEST_POOL_ID=$(microceph.ceph osd pool ls detail | grep "^pool.*'test_pool'" | grep -oP "pool \K\d+")
pool_id_err=$(awk -v p_id=$TEST_POOL_ID '$1=="osd" && $2=="pg"{split($4, a, "."); if (a[0] != p_id) {print a[0]; exit}}' $OSDTRACE_LOG)
Copy link
Owner

@taodd taodd Jan 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the array created by the split method in awk seems to start from index 1, therefore a[0] is always empty, a[1] is the pool id, a[2] is the pg id

if [ -n "$pool_id_err" ]; then
err "Unexpected pool id found in osdtrace, $pool_id_err"
exit 1
fi

echo "✓ osdtrace successfully captured trace data"
# 14.4 Check PG ranges in the test pool
TOT_PG=$(microceph.ceph osd pool get test_pool pg_num | awk '{print $2}')
pg_range_err=$(awk -v tot=$TOT_PG '$1=="osd" && $2=="pg"{split($4, a, "."); pg=strtonum(a[1]); if (pg < 0 || pg >= tot)print a[1]}' $OSDTRACE_LOG)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as above, the pg id should be a[2]

if [[ -n $pg_range_err ]]; then
err "Found PGs outside the expected range: $pg_range_err"
exit 1
fi

echo "=== Step 15: Verify radostrace output ==="
if [ ! -f /tmp/radostrace.log ]; then
echo "Error: radostrace log file not found"
# 14.5 Check for high latencies
# Maximum acceptable latency value (in microseconds) = 100s
MAX_LATENCY=100000000
high_lat=$(awk -v lmax=$MAX_LATENCY '$1=="osd" && $2=="pg" && $NF > lmax' $OSDTRACE_LOG)
if [[ -n $high_lat ]]; then
err "Found latencies over $MAX_LATENCY μs"
exit 1
fi

RADOS_LINE_COUNT=$(wc -l < /tmp/radostrace.log)
echo "radostrace captured $RADOS_LINE_COUNT lines"
info "✓ All osdtrace output fields validated successfully"

info "===Step 15: Verify radostrace output ==="
RADOS_LINE_COUNT=$(wc -l < $RADOSTRACE_LOG)
info "radostrace captured $RADOS_LINE_COUNT lines"

if [ $RADOS_LINE_COUNT -lt 3 ]; then
echo "Error: radostrace did not capture enough trace data (expected at least 3 lines)"
echo "radostrace output:"
cat /tmp/radostrace.log
err "radostrace did not capture enough trace data (expected at least 3 lines)"
exit 1
fi

echo "✓ radostrace successfully captured trace data"

echo ""
echo "=== Test Summary ==="
echo "✓ MicroCeph cluster deployed successfully"
echo "✓ osdtrace captured $OSD_LINE_COUNT lines of trace data"
echo "✓ radostrace captured $RADOS_LINE_COUNT lines of trace data"
echo "✓ All functional tests passed!"
echo ""
echo "Sample osdtrace output (first 10 lines):"
head -10 /tmp/osdtrace.log
echo ""
echo "Sample radostrace output (first 10 lines):"
head -10 /tmp/radostrace.log
info "✓ radostrace successfully captured trace data"

info "=== Test Summary ==="
info "✓ MicroCeph cluster deployed successfully"
info "✓ osdtrace captured $OSD_LINE_COUNT lines of trace data"
info "✓ osdtrace output validated: $LINES_VALIDATED lines checked, 0 errors"
Copy link
Owner

@taodd taodd Jan 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LINES_VALIDATED is undefined, do you mean to set this value to the number of traced lines in osdtrace log?

info "✓ radostrace captured $RADOS_LINE_COUNT lines of trace data"
info "✓ All functional tests passed!"

exit 0