From 8139a486a6a0045450e54b473675a3162f1e8fdb Mon Sep 17 00:00:00 2001 From: Chris Kirby Date: Wed, 14 Jan 2026 14:36:57 -0600 Subject: [PATCH 1/5] Fix trigger firing race in srch-safe-merge-pos Because the srch triggers are inherently async to the test, we can't be sure they won't fire prematurely just because a compact worker started running at an inconvenient time. Make the trigger arming silent to avoid spurious test failures. Move the trigger arming closer to the point of interest to increase the chances that we're actually testing what we want. Signed-off-by: Chris Kirby --- tests/golden/srch-safe-merge-pos | 30 ------------------------------ tests/tests/srch-safe-merge-pos.sh | 11 ++++++----- 2 files changed, 6 insertions(+), 35 deletions(-) diff --git a/tests/golden/srch-safe-merge-pos b/tests/golden/srch-safe-merge-pos index e801d5582..48934d7eb 100644 --- a/tests/golden/srch-safe-merge-pos +++ b/tests/golden/srch-safe-merge-pos @@ -1,37 +1,7 @@ == initialize per-mount values == arm compaction triggers -trigger srch_compact_logs_pad_safe armed: 1 -trigger srch_merge_stop_safe armed: 1 -trigger srch_compact_logs_pad_safe armed: 1 -trigger srch_merge_stop_safe armed: 1 -trigger srch_compact_logs_pad_safe armed: 1 -trigger srch_merge_stop_safe armed: 1 -trigger srch_compact_logs_pad_safe armed: 1 -trigger srch_merge_stop_safe armed: 1 -trigger srch_compact_logs_pad_safe armed: 1 -trigger srch_merge_stop_safe armed: 1 == compact more often == create padded sorted inputs by forcing log rotation -trigger srch_force_log_rotate armed: 1 -trigger srch_force_log_rotate armed: 1 -trigger srch_force_log_rotate armed: 1 -trigger srch_force_log_rotate armed: 1 -trigger srch_compact_logs_pad_safe armed: 1 -trigger srch_force_log_rotate armed: 1 -trigger srch_force_log_rotate armed: 1 -trigger srch_force_log_rotate armed: 1 -trigger srch_force_log_rotate armed: 1 -trigger srch_compact_logs_pad_safe armed: 1 -trigger srch_force_log_rotate armed: 1 -trigger srch_force_log_rotate armed: 1 -trigger srch_force_log_rotate armed: 1 -trigger srch_force_log_rotate armed: 1 -trigger srch_compact_logs_pad_safe armed: 1 -trigger srch_force_log_rotate armed: 1 -trigger srch_force_log_rotate armed: 1 -trigger srch_force_log_rotate armed: 1 -trigger srch_force_log_rotate armed: 1 -trigger srch_compact_logs_pad_safe armed: 1 == compaction of padded should stop at safe == verify no compaction errors == cleanup diff --git a/tests/tests/srch-safe-merge-pos.sh b/tests/tests/srch-safe-merge-pos.sh index bd388a18b..b5cd340cd 100644 --- a/tests/tests/srch-safe-merge-pos.sh +++ b/tests/tests/srch-safe-merge-pos.sh @@ -31,8 +31,8 @@ trap restore_compact_delay EXIT echo "== arm compaction triggers" for nr in $(t_fs_nrs); do - t_trigger_arm srch_compact_logs_pad_safe $nr - t_trigger_arm srch_merge_stop_safe $nr + t_trigger_arm_silent srch_compact_logs_pad_safe $nr + t_trigger_arm_silent srch_merge_stop_safe $nr done echo "== compact more often" @@ -44,11 +44,12 @@ echo "== create padded sorted inputs by forcing log rotation" sv=$(t_server_nr) for i in $(seq 1 $COMPACT_NR); do for j in $(seq 1 $COMPACT_NR); do - t_trigger_arm srch_force_log_rotate $sv - seq -f "f-$i-$j-$SEQF" 1 10 | \ bulk_create_paths -X "scoutfs.srch.t-srch-safe-merge-pos" -d "$T_D0" > \ /dev/null + + t_trigger_arm_silent srch_force_log_rotate $sv + sync test "$(t_trigger_get srch_force_log_rotate $sv)" == "0" || \ @@ -59,7 +60,7 @@ for i in $(seq 1 $COMPACT_NR); do while test $padded == 0 && sleep .5; do for nr in $(t_fs_nrs); do if [ "$(t_trigger_get srch_compact_logs_pad_safe $nr)" == "0" ]; then - t_trigger_arm srch_compact_logs_pad_safe $nr + t_trigger_arm_silent srch_compact_logs_pad_safe $nr padded=1 break fi From e24567d12db6880a24cea4ddf0d782565bec8c46 Mon Sep 17 00:00:00 2001 From: Chris Kirby Date: Wed, 14 Jan 2026 14:44:15 -0600 Subject: [PATCH 2/5] Improve tracing for get_file_block() Print the first and last entries, the entry_nr and entry_bytes. Signed-off-by: Chris Kirby --- kmod/src/scoutfs_trace.h | 32 ++++++++++++++++++++++++++------ kmod/src/srch.c | 2 +- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/kmod/src/scoutfs_trace.h b/kmod/src/scoutfs_trace.h index c71573529..7da72738c 100644 --- a/kmod/src/scoutfs_trace.h +++ b/kmod/src/scoutfs_trace.h @@ -2620,24 +2620,44 @@ TRACE_EVENT(scoutfs_block_dirty_ref, ); TRACE_EVENT(scoutfs_get_file_block, - TP_PROTO(struct super_block *sb, u64 blkno, int flags), + TP_PROTO(struct super_block *sb, u64 blkno, int flags, + struct scoutfs_srch_block *srb), - TP_ARGS(sb, blkno, flags), + TP_ARGS(sb, blkno, flags, srb), TP_STRUCT__entry( SCSB_TRACE_FIELDS __field(__u64, blkno) + __field(__u32, entry_nr) + __field(__u32, entry_bytes) __field(int, flags) + __field(__u64, first_hash) + __field(__u64, first_ino) + __field(__u64, first_id) + __field(__u64, last_hash) + __field(__u64, last_ino) + __field(__u64, last_id) ), TP_fast_assign( SCSB_TRACE_ASSIGN(sb); __entry->blkno = blkno; + __entry->entry_nr = __le32_to_cpu(srb->entry_nr); + __entry->entry_bytes = __le32_to_cpu(srb->entry_bytes); __entry->flags = flags; - ), - - TP_printk(SCSBF" blkno %llu flags 0x%x", - SCSB_TRACE_ARGS, __entry->blkno, __entry->flags) + __entry->first_hash = __le64_to_cpu(srb->first.hash); + __entry->first_ino = __le64_to_cpu(srb->first.ino); + __entry->first_id = __le64_to_cpu(srb->first.id); + __entry->last_hash = __le64_to_cpu(srb->last.hash); + __entry->last_ino = __le64_to_cpu(srb->last.ino); + __entry->last_id = __le64_to_cpu(srb->last.id); + ), + + TP_printk(SCSBF" blkno %llu nr %u bytes %u flags 0x%x first_hash 0x%llx first_ino %llu first_id 0x%llx last_hash 0x%llx last_ino %llu last_id 0x%llx", + SCSB_TRACE_ARGS, __entry->blkno, __entry->entry_nr, + __entry->entry_bytes, __entry->flags, + __entry->first_hash, __entry->first_ino, __entry->first_id, + __entry->last_hash, __entry->last_ino, __entry->last_id) ); TRACE_EVENT(scoutfs_block_stale, diff --git a/kmod/src/srch.c b/kmod/src/srch.c index e1b5fb90c..c106027e0 100644 --- a/kmod/src/srch.c +++ b/kmod/src/srch.c @@ -443,7 +443,7 @@ static int get_file_block(struct super_block *sb, sfl->blocks = cpu_to_le64(blk + 1); if (bl) { - trace_scoutfs_get_file_block(sb, bl->blkno, flags); + trace_scoutfs_get_file_block(sb, bl->blkno, flags, bl->data); } *bl_ret = bl; From ac1ef2fdd8df9133579c0121d6431d432764e624 Mon Sep 17 00:00:00 2001 From: Chris Kirby Date: Wed, 14 Jan 2026 14:47:34 -0600 Subject: [PATCH 3/5] Change the looping logic in run-tests.sh If a set of tests is provided, loop over the entire set the requested number of times. Start and stop any requested tracing across the set boundary. Signed-off-by: Chris Kirby --- tests/run-tests.sh | 164 +++++++++++++++++++++++---------------------- 1 file changed, 85 insertions(+), 79 deletions(-) diff --git a/tests/run-tests.sh b/tests/run-tests.sh index f6c506ec6..8a6be950e 100755 --- a/tests/run-tests.sh +++ b/tests/run-tests.sh @@ -90,7 +90,7 @@ done # set some T_ defaults T_TRACE_DUMP="0" -T_TRACE_PRINTK="0" +T_TRACE_PRINTK="" T_PORT_START="19700" T_LOOP_ITER="1" @@ -137,6 +137,9 @@ while true; do test -n "$2" || die "-l must have a nr iterations argument" test "$2" -eq "$2" 2>/dev/null || die "-l argument must be an integer" T_LOOP_ITER="$2" + + # when looping, break after first failure + T_ABORT="1" shift ;; -M) @@ -399,31 +402,44 @@ if [ -n "$T_INSMOD" ]; then cmd insmod "$T_MODULE" fi -if [ -n "$T_TRACE_MULT" ]; then -# orig_trace_size=$(cat /sys/kernel/debug/tracing/buffer_size_kb) - orig_trace_size=1408 - mult_trace_size=$((orig_trace_size * T_TRACE_MULT)) - msg "increasing trace buffer size from $orig_trace_size KiB to $mult_trace_size KiB" - echo $mult_trace_size > /sys/kernel/debug/tracing/buffer_size_kb -fi - -nr_globs=${#T_TRACE_GLOB[@]} -if [ $nr_globs -gt 0 ]; then - echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable +start_tracing() { + if [ -n "$T_TRACE_MULT" ]; then + orig_trace_size=1408 + mult_trace_size=$((orig_trace_size * T_TRACE_MULT)) + msg "increasing trace buffer size from $orig_trace_size KiB to $mult_trace_size KiB" + echo $mult_trace_size > /sys/kernel/debug/tracing/buffer_size_kb + fi - for g in "${T_TRACE_GLOB[@]}"; do - for e in /sys/kernel/debug/tracing/events/scoutfs/$g/enable; do - if test -w "$e"; then - echo 1 > "$e" - else - die "-t glob '$g' matched no scoutfs events" - fi + nr_globs=${#T_TRACE_GLOB[@]} + if [ $nr_globs -gt 0 ]; then + echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable + + for g in "${T_TRACE_GLOB[@]}"; do + for e in /sys/kernel/debug/tracing/events/scoutfs/$g/enable; do + if test -w "$e"; then + echo 1 > "$e" + else + die "-t glob '$g' matched no scoutfs events" + fi + done done - done - nr_events=$(cat /sys/kernel/debug/tracing/set_event | wc -l) - msg "enabled $nr_events trace events from $nr_globs -t globs" -fi + nr_events=$(cat /sys/kernel/debug/tracing/set_event | wc -l) + msg "enabled $nr_events trace events from $nr_globs -t globs" + fi +} + +stop_tracing() { + if [ -n "$T_TRACE_GLOB" -o -n "$T_TRACE_PRINTK" ]; then + msg "saving traces and disabling tracing" + echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable + echo 0 > /sys/kernel/debug/tracing/options/trace_printk + cat /sys/kernel/debug/tracing/trace | gzip > "$T_RESULTS/traces.gz" + if [ -n "$orig_trace_size" ]; then + echo $orig_trace_size > /sys/kernel/debug/tracing/buffer_size_kb + fi + fi +} if [ -n "$T_TRACE_PRINTK" ]; then echo "$T_TRACE_PRINTK" > /sys/kernel/debug/tracing/options/trace_printk @@ -603,24 +619,26 @@ passed=0 skipped=0 failed=0 skipped_permitted=0 -for t in $tests; do - # tests has basenames from sequence, get path and name - t="tests/$t" - test_name=$(basename "$t" | sed -e 's/.sh$//') +for iter in $(seq 1 $T_LOOP_ITER); do + + start_tracing - # get stats from previous pass - last="$T_RESULTS/last-passed-test-stats" - stats=$(grep -s "^$test_name " "$last" | cut -d " " -f 2-) - test -n "$stats" && stats="last: $stats" - printf " %-30s $stats" "$test_name" + for t in $tests; do + # tests has basenames from sequence, get path and name + t="tests/$t" + test_name=$(basename "$t" | sed -e 's/.sh$//') - # mark in dmesg as to what test we are running - echo "run scoutfs test $test_name" > /dev/kmsg + # get stats from previous pass + last="$T_RESULTS/last-passed-test-stats" + stats=$(grep -s "^$test_name " "$last" | cut -d " " -f 2-) + test -n "$stats" && stats="last: $stats" + printf " %-30s $stats" "$test_name" - # let the test get at its extra files - T_EXTRA="$T_TESTS/extra/$test_name" + # mark in dmesg as to what test we are running + echo "run scoutfs test $test_name" > /dev/kmsg - for iter in $(seq 1 $T_LOOP_ITER); do + # let the test get at its extra files + T_EXTRA="$T_TESTS/extra/$test_name" # create a temporary dir and file path for the test T_TMPDIR="$T_RESULTS/tmp/$test_name" @@ -710,55 +728,43 @@ for t in $tests; do sts=$T_FAIL_STATUS fi - # stop looping if we didn't pass - if [ "$sts" != "$T_PASS_STATUS" ]; then - break; + # show and record the result of the test + if [ "$sts" == "$T_PASS_STATUS" ]; then + echo " passed: $stats" + ((passed++)) + # save stats for passed test + grep -s -v "^$test_name " "$last" > "$last.tmp" + echo "$test_name $stats" >> "$last.tmp" + mv -f "$last.tmp" "$last" + elif [ "$sts" == "$T_SKIP_PERMITTED_STATUS" ]; then + echo " [ skipped (permitted): $message ]" + echo "$test_name skipped (permitted) $message " >> "$T_RESULTS/skip.log" + ((skipped_permitted++)) + elif [ "$sts" == "$T_SKIP_STATUS" ]; then + echo " [ skipped: $message ]" + echo "$test_name $message" >> "$T_RESULTS/skip.log" + ((skipped++)) + elif [ "$sts" == "$T_FAIL_STATUS" ]; then + echo " [ failed: $message ]" + echo "$test_name $message" >> "$T_RESULTS/fail.log" + ((failed++)) + + if [ -n "$T_ABORT" ]; then + stop_tracing + die "aborting after first failure" + fi fi - done - - # show and record the result of the test - if [ "$sts" == "$T_PASS_STATUS" ]; then - echo " passed: $stats" - ((passed++)) - # save stats for passed test - grep -s -v "^$test_name " "$last" > "$last.tmp" - echo "$test_name $stats" >> "$last.tmp" - mv -f "$last.tmp" "$last" - elif [ "$sts" == "$T_SKIP_PERMITTED_STATUS" ]; then - echo " [ skipped (permitted): $message ]" - echo "$test_name skipped (permitted) $message " >> "$T_RESULTS/skip.log" - ((skipped_permitted++)) - elif [ "$sts" == "$T_SKIP_STATUS" ]; then - echo " [ skipped: $message ]" - echo "$test_name $message" >> "$T_RESULTS/skip.log" - ((skipped++)) - elif [ "$sts" == "$T_FAIL_STATUS" ]; then - echo " [ failed: $message ]" - echo "$test_name $message" >> "$T_RESULTS/fail.log" - ((failed++)) - - test -n "$T_ABORT" && die "aborting after first failure" - fi - # record results for TAP format output - t_tap_progress $test_name $sts - ((testcount++)) + # record results for TAP format output + t_tap_progress $test_name $sts + ((testcount++)) + done + stop_tracing done msg "all tests run: $passed passed, $skipped skipped, $skipped_permitted skipped (permitted), $failed failed" - -if [ -n "$T_TRACE_GLOB" -o -n "$T_TRACE_PRINTK" ]; then - msg "saving traces and disabling tracing" - echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable - echo 0 > /sys/kernel/debug/tracing/options/trace_printk - cat /sys/kernel/debug/tracing/trace > "$T_RESULTS/traces" - if [ -n "$orig_trace_size" ]; then - echo $orig_trace_size > /sys/kernel/debug/tracing/buffer_size_kb - fi -fi - if [ "$skipped" == 0 -a "$failed" == 0 ]; then msg "all tests passed" unmount_all From 23fab64157cc8342af7f3d54ed661ad90156b707 Mon Sep 17 00:00:00 2001 From: Chris Kirby Date: Wed, 14 Jan 2026 15:03:02 -0600 Subject: [PATCH 4/5] Don't emit empty blocks in kway_merge() It's possible for a srch compaction to collapse down to nothing if given evenly paired create/delete entries. In this case, we were emitting an empty block. This could cause problems for search_sorted_file(), which assumes that every block it sees has a valid first and last entry. Fix this by keeping a temp entry and only emitting it if it differs from the next entry in the block. Be sure to flush out a straggling temp entry if we have one when we're done with the last block of the merge. Signed-off-by: Chris Kirby --- kmod/src/scoutfs_trace.h | 104 +++++++++++++++++++ kmod/src/srch.c | 215 +++++++++++++++++++++++++-------------- 2 files changed, 240 insertions(+), 79 deletions(-) diff --git a/kmod/src/scoutfs_trace.h b/kmod/src/scoutfs_trace.h index 7da72738c..c378b2ee3 100644 --- a/kmod/src/scoutfs_trace.h +++ b/kmod/src/scoutfs_trace.h @@ -2660,6 +2660,110 @@ TRACE_EVENT(scoutfs_get_file_block, __entry->last_hash, __entry->last_ino, __entry->last_id) ); +TRACE_EVENT(scoutfs_srch_new_merge, + TP_PROTO(struct super_block *sb), + + TP_ARGS(sb), + + TP_STRUCT__entry( + SCSB_TRACE_FIELDS + ), + + TP_fast_assign( + SCSB_TRACE_ASSIGN(sb); + ), + + TP_printk(SCSBF, SCSB_TRACE_ARGS) +); + +TRACE_EVENT(scoutfs_srch_emit_entry, + TP_PROTO(struct super_block *sb, struct scoutfs_srch_entry *sre, + struct scoutfs_srch_block *srb, u64 blkno), + + TP_ARGS(sb, sre, srb, blkno), + + TP_STRUCT__entry( + SCSB_TRACE_FIELDS + __field(__u32, entry_nr) + __field(__u64, blkno) + __field(__u64, hash) + __field(__u64, ino) + __field(__u64, id) + ), + + TP_fast_assign( + SCSB_TRACE_ASSIGN(sb); + __entry->entry_nr = __le32_to_cpu(srb->entry_nr); + __entry->blkno = blkno; + __entry->hash = __le64_to_cpu(sre->hash); + __entry->ino = __le64_to_cpu(sre->ino); + __entry->id = __le64_to_cpu(sre->id); + ), + + TP_printk(SCSBF" nr %u blkno %llu hash 0x%llx ino %llu id 0x%llx", + SCSB_TRACE_ARGS, __entry->entry_nr, __entry->blkno, + __entry->hash, __entry->ino, __entry->id) +); + +TRACE_EVENT(scoutfs_srch_clr_tmp, + TP_PROTO(struct super_block *sb, struct scoutfs_srch_entry *tmp), + + TP_ARGS(sb, tmp), + + TP_STRUCT__entry( + SCSB_TRACE_FIELDS + __field(__u64, tmp_hash) + __field(__u64, tmp_ino) + __field(__u64, tmp_id) + ), + + TP_fast_assign( + SCSB_TRACE_ASSIGN(sb); + __entry->tmp_hash = __le64_to_cpu(tmp->hash); + __entry->tmp_ino = __le64_to_cpu(tmp->ino); + __entry->tmp_id = __le64_to_cpu(tmp->id); + ), + + TP_printk(SCSBF" tmp hash 0x%llx tmp ino %llu tmp hash 0x%llx", + SCSB_TRACE_ARGS, + __entry->tmp_hash, __entry->tmp_ino, __entry->tmp_id) +); + +TRACE_EVENT(scoutfs_srch_cmp, + TP_PROTO(struct super_block *sb, struct scoutfs_srch_entry *root, + struct scoutfs_srch_entry *tmp, void *bl), + + TP_ARGS(sb, root, tmp, bl), + + TP_STRUCT__entry( + SCSB_TRACE_FIELDS + __field(__u64, root_hash) + __field(__u64, root_ino) + __field(__u64, root_id) + __field(__u64, tmp_hash) + __field(__u64, tmp_ino) + __field(__u64, tmp_id) + __field(void *, bl) + ), + + TP_fast_assign( + SCSB_TRACE_ASSIGN(sb); + __entry->root_hash = __le64_to_cpu(root->hash); + __entry->root_ino = __le64_to_cpu(root->ino); + __entry->root_id = __le64_to_cpu(root->id); + __entry->tmp_hash = __le64_to_cpu(tmp->hash); + __entry->tmp_ino = __le64_to_cpu(tmp->ino); + __entry->tmp_id = __le64_to_cpu(tmp->id); + __entry->bl = bl; + ), + + TP_printk(SCSBF" root hash 0x%llx root ino %llu root id 0x%llx tmp hash 0x%llx tmp ino %llu tmp hash 0x%llx, bl %p", + SCSB_TRACE_ARGS, + __entry->root_hash, __entry->root_ino, __entry->root_id, + __entry->tmp_hash, __entry->tmp_ino, __entry->tmp_id, + __entry->bl) +); + TRACE_EVENT(scoutfs_block_stale, TP_PROTO(struct super_block *sb, struct scoutfs_block_ref *ref, struct scoutfs_block_header *hdr, u32 magic, u32 crc), diff --git a/kmod/src/srch.c b/kmod/src/srch.c index c106027e0..942bd969e 100644 --- a/kmod/src/srch.c +++ b/kmod/src/srch.c @@ -1525,6 +1525,66 @@ static bool should_commit(struct super_block *sb, struct scoutfs_alloc *alloc, scoutfs_alloc_meta_low(sb, alloc, nr); } +static int alloc_srch_block(struct super_block *sb, struct scoutfs_alloc *alloc, + struct scoutfs_block_writer *wri, + struct scoutfs_srch_file *sfl, + struct scoutfs_block **bl, + u64 blk) +{ + DECLARE_SRCH_INFO(sb, srinf); + int ret; + + if (atomic_read(&srinf->shutdown)) + return -ESHUTDOWN; + + /* could grow and dirty to a leaf */ + if (should_commit(sb, alloc, wri, sfl->height + 1)) + return -EAGAIN; + + ret = get_file_block(sb, alloc, wri, sfl, GFB_INSERT | GFB_DIRTY, + blk, bl); + if (ret < 0) + return ret; + + scoutfs_inc_counter(sb, srch_compact_dirty_block); + + return 0; +} + +static int emit_srch_entry(struct super_block *sb, + struct scoutfs_srch_file *sfl, + struct scoutfs_srch_block *srb, + struct scoutfs_srch_entry *sre, + u64 blk) +{ + int ret; + + ret = encode_entry(srb->entries + le32_to_cpu(srb->entry_bytes), + sre, &srb->tail); + if (WARN_ON_ONCE(ret <= 0)) { + /* shouldn't happen */ + return -EIO; + } + + if (srb->entry_bytes == 0) { + if (blk == 0) + sfl->first = *sre; + srb->first = *sre; + } + + le32_add_cpu(&srb->entry_nr, 1); + le32_add_cpu(&srb->entry_bytes, ret); + srb->last = *sre; + srb->tail = *sre; + sfl->last = *sre; + le64_add_cpu(&sfl->entries, 1); + + scoutfs_inc_counter(sb, srch_compact_entry); + trace_scoutfs_srch_emit_entry(sb, sre, srb, blk); + + return 0; +} + struct tourn_node { struct scoutfs_srch_entry sre; int ind; @@ -1559,20 +1619,18 @@ static int kway_merge(struct super_block *sb, kway_get_t kway_get, kway_advance_t kway_adv, void **args, int nr, bool logs_input) { - DECLARE_SRCH_INFO(sb, srinf); struct scoutfs_srch_block *srb = NULL; - struct scoutfs_srch_entry last_tail; + struct scoutfs_srch_entry tmp_entry = {0}; struct scoutfs_block *bl = NULL; struct tourn_node *tnodes; struct tourn_node *leaves; struct tourn_node *root; struct tourn_node *tn; - int last_bytes = 0; + bool have_tmp = false; int nr_parents; int nr_nodes; int empty = 0; int ret = 0; - int diff; u64 blk; int ind; int i; @@ -1606,97 +1664,73 @@ static int kway_merge(struct super_block *sb, } } + trace_scoutfs_srch_new_merge(sb); + /* always append new blocks */ blk = le64_to_cpu(sfl->blocks); while (empty < nr) { - if (bl == NULL) { - if (atomic_read(&srinf->shutdown)) { - ret = -ESHUTDOWN; - goto out; - } - - /* could grow and dirty to a leaf */ - if (should_commit(sb, alloc, wri, sfl->height + 1)) { - ret = 0; - goto out; - } - - ret = get_file_block(sb, alloc, wri, sfl, - GFB_INSERT | GFB_DIRTY, blk, &bl); - if (ret < 0) - goto out; - srb = bl->data; - scoutfs_inc_counter(sb, srch_compact_dirty_block); - } - - if (sre_cmp(&root->sre, &srb->last) != 0) { - last_bytes = le32_to_cpu(srb->entry_bytes); - last_tail = srb->last; - ret = encode_entry(srb->entries + - le32_to_cpu(srb->entry_bytes), - &root->sre, &srb->tail); - if (WARN_ON_ONCE(ret <= 0)) { - /* shouldn't happen */ - ret = -EIO; - goto out; - } + trace_scoutfs_srch_cmp(sb, &root->sre, &tmp_entry, bl); + + if (sre_cmp(&root->sre, &tmp_entry) != 0) { + if (have_tmp) { + if (bl == NULL) { + ret = alloc_srch_block(sb, alloc, wri, + sfl, &bl, blk); + if (ret < 0) { + if (ret == -EAGAIN) + ret = 0; + goto out; + } + srb = bl->data; + } - if (srb->entry_bytes == 0) { - if (blk == 0) - sfl->first = root->sre; - srb->first = root->sre; - } - le32_add_cpu(&srb->entry_nr, 1); - le32_add_cpu(&srb->entry_bytes, ret); - srb->last = root->sre; - srb->tail = root->sre; - sfl->last = root->sre; - le64_add_cpu(&sfl->entries, 1); - ret = 0; + ret = emit_srch_entry(sb, sfl, srb, &tmp_entry, + blk); + if (ret < 0) + goto out; - if (le32_to_cpu(srb->entry_bytes) > - SCOUTFS_SRCH_BLOCK_SAFE_BYTES) { - scoutfs_block_put(sb, bl); - bl = NULL; - blk++; - } + if (le32_to_cpu(srb->entry_bytes) > + SCOUTFS_SRCH_BLOCK_SAFE_BYTES) { + scoutfs_block_put(sb, bl); + bl = NULL; + blk++; + memset(&tmp_entry, 0, sizeof(tmp_entry)); + have_tmp = false; + continue; + } - /* end sorted block on _SAFE offset for testing */ - if (bl && le32_to_cpu(srb->entry_nr) == 1 && logs_input && - scoutfs_trigger(sb, SRCH_COMPACT_LOGS_PAD_SAFE)) { - pad_entries_at_safe(sfl, srb); - scoutfs_block_put(sb, bl); - bl = NULL; - blk++; + /* + * end sorted block on _SAFE offset for + * testing + */ + if (bl && le32_to_cpu(srb->entry_nr) == 1 && + logs_input && + scoutfs_trigger(sb, SRCH_COMPACT_LOGS_PAD_SAFE)) { + pad_entries_at_safe(sfl, srb); + scoutfs_block_put(sb, bl); + bl = NULL; + blk++; + + memset(&tmp_entry, 0, sizeof(tmp_entry)); + have_tmp = false; + continue; + } } - scoutfs_inc_counter(sb, srch_compact_entry); - + tmp_entry = root->sre; + have_tmp = true; } else { /* * Duplicate entries indicate deletion so we - * undo the previously encoded entry and ignore + * undo the previously cached tmp entry and ignore * this entry. This only happens within each * block. Deletions can span block boundaries * and will be filtered out by search and * hopefully removed in future compactions. */ - diff = le32_to_cpu(srb->entry_bytes) - last_bytes; - if (diff) { - memset(srb->entries + last_bytes, 0, diff); - if (srb->entry_bytes == 0) { - /* last_tail will be 0 */ - if (blk == 0) - sfl->first = last_tail; - srb->first = last_tail; - } - le32_add_cpu(&srb->entry_nr, -1); - srb->entry_bytes = cpu_to_le32(last_bytes); - srb->last = last_tail; - srb->tail = last_tail; - sfl->last = last_tail; - le64_add_cpu(&sfl->entries, -1); - } + trace_scoutfs_srch_clr_tmp(sb, &tmp_entry); + memset(&tmp_entry, 0, sizeof(tmp_entry)); + have_tmp = false; scoutfs_inc_counter(sb, srch_compact_removed_entry); } @@ -1739,6 +1773,24 @@ static int kway_merge(struct super_block *sb, /* could stream a final index.. arguably a small portion of work */ out: + if (have_tmp) { + bool emit = true; + + if (bl == NULL) { + ret = alloc_srch_block(sb, alloc, wri, sfl, &bl, blk); + if (ret) { + emit = false; + if (ret == -EAGAIN) + ret = 0; + } else { + srb = bl->data; + } + } + + if (emit) + ret = emit_srch_entry(sb, sfl, srb, &tmp_entry, blk); + } + scoutfs_block_put(sb, bl); vfree(tnodes); return ret; @@ -1982,6 +2034,11 @@ static int kway_get_reader(struct super_block *sb, rdr->skip > SCOUTFS_SRCH_BLOCK_SAFE_BYTES || rdr->skip >= le32_to_cpu(srb->entry_bytes)) { /* XXX inconsistency */ + scoutfs_err(sb, "blkno %llu pos %u vs %ld, skip %u, bytes %u", + __le64_to_cpu(srb->hdr.blkno), + rdr->pos, SCOUTFS_SRCH_BLOCK_SAFE_BYTES, + rdr->skip, + le32_to_cpu(srb->entry_bytes)); return -EIO; } From 9b27e94c2f6b900a13c7af33036d375e28834000 Mon Sep 17 00:00:00 2001 From: Chris Kirby Date: Thu, 29 Jan 2026 08:25:52 -0600 Subject: [PATCH 5/5] Suppress another forced shutdown error message The "server error emptying freed" error was causing a fence-and-reclaim test failure. In this case, the error was -ENOLINK, which we should ignore for messaging purposes. Signed-off-by: Chris Kirby --- kmod/src/server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kmod/src/server.c b/kmod/src/server.c index 7f979df72..42992887d 100644 --- a/kmod/src/server.c +++ b/kmod/src/server.c @@ -630,7 +630,7 @@ static void scoutfs_server_commit_func(struct work_struct *work) ret = scoutfs_alloc_empty_list(sb, &server->alloc, &server->wri, server->meta_freed, server->other_freed); - if (ret) { + if (ret && ret != -ENOLINK) { scoutfs_err(sb, "server error emptying freed: %d", ret); goto out; }