diff --git a/kmod/src/btree.c b/kmod/src/btree.c index 7a21afc7..1a2d85e9 100644 --- a/kmod/src/btree.c +++ b/kmod/src/btree.c @@ -2183,6 +2183,8 @@ static int merge_read_item(struct super_block *sb, struct scoutfs_key *key, u64 if (ret > 0) { if (ret == SCOUTFS_DELTA_COMBINED) { scoutfs_inc_counter(sb, btree_merge_delta_combined); + if (seq > found->seq) + found->seq = seq; } else if (ret == SCOUTFS_DELTA_COMBINED_NULL) { scoutfs_inc_counter(sb, btree_merge_delta_null); free_mitem(rng, found); diff --git a/kmod/src/forest.c b/kmod/src/forest.c index 306f4841..41e99008 100644 --- a/kmod/src/forest.c +++ b/kmod/src/forest.c @@ -26,6 +26,7 @@ #include "hash.h" #include "srch.h" #include "counters.h" +#include "triggers.h" #include "xattr.h" #include "scoutfs_trace.h" @@ -731,6 +732,8 @@ static void scoutfs_forest_log_merge_worker(struct work_struct *work) ret = scoutfs_btree_merge(sb, &alloc, &wri, &req.start, &req.end, &next, &comp.root, &inputs, !!(req.flags & cpu_to_le64(SCOUTFS_LOG_MERGE_REQUEST_SUBTREE)), + scoutfs_trigger(sb, LOG_MERGE_FORCE_PARTIAL) ? + SCOUTFS_BLOCK_LG_SIZE : SCOUTFS_LOG_MERGE_DIRTY_BYTE_LIMIT, 10, (2 * 1024 * 1024)); if (ret == -ERANGE) { diff --git a/kmod/src/triggers.c b/kmod/src/triggers.c index 317f0911..10ebfd4d 100644 --- a/kmod/src/triggers.c +++ b/kmod/src/triggers.c @@ -45,6 +45,7 @@ static char *names[] = { [SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE] = "srch_force_log_rotate", [SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE] = "srch_merge_stop_safe", [SCOUTFS_TRIGGER_STATFS_LOCK_PURGE] = "statfs_lock_purge", + [SCOUTFS_TRIGGER_LOG_MERGE_FORCE_PARTIAL] = "log_merge_force_partial", }; bool scoutfs_trigger_test_and_clear(struct super_block *sb, unsigned int t) diff --git a/kmod/src/triggers.h b/kmod/src/triggers.h index eeb33b49..0b60f74e 100644 --- a/kmod/src/triggers.h +++ b/kmod/src/triggers.h @@ -8,6 +8,7 @@ enum scoutfs_trigger { SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE, SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE, SCOUTFS_TRIGGER_STATFS_LOCK_PURGE, + SCOUTFS_TRIGGER_LOG_MERGE_FORCE_PARTIAL, SCOUTFS_TRIGGER_NR, }; diff --git a/tests/golden/totl-merge-read b/tests/golden/totl-merge-read new file mode 100644 index 00000000..931671e6 --- /dev/null +++ b/tests/golden/totl-merge-read @@ -0,0 +1,3 @@ +== setup +expected 4681 +== cleanup diff --git a/tests/sequence b/tests/sequence index 8091b1b1..c2ec96c8 100644 --- a/tests/sequence +++ b/tests/sequence @@ -26,6 +26,7 @@ simple-xattr-unit.sh retention-basic.sh totl-xattr-tag.sh quota.sh +totl-merge-read.sh lock-refleak.sh lock-shrink-consistency.sh lock-shrink-read-race.sh diff --git a/tests/tests/totl-merge-read.sh b/tests/tests/totl-merge-read.sh new file mode 100644 index 00000000..3cd40644 --- /dev/null +++ b/tests/tests/totl-merge-read.sh @@ -0,0 +1,132 @@ +# +# Test that merge_read_item() correctly updates the sequence number when +# combining delta items from multiple finalized log trees. +# +# A bug in merge_read_item() fails to update found->seq to the max of +# both items' seqs when combining deltas. The combined item retains +# a stale seq from the lower-RID tree processed first. +# +# Multiple write/merge/read passes increase the chance of hitting the +# double-counting window. +# +# The log_merge_force_partial trigger forces one-block dirty limits on +# each merge iteration, causing many partial merges that splice +# stale-seq items into fs_root while finalized logs still exist. +# +# The read-xattr-totals ioctl uses a sliding cursor that can return +# pre-merge values (< expected) or duplicate entries when btree data +# changes between batches. To avoid test false positives we've assigned +# a 3-bit window for each mount so that any double counting can +# identify the double counting by overflow in the bit window. +# + +t_require_commands setfattr scoutfs +t_require_mounts 5 + +NR_KEYS=2500 + +echo "== setup" +for nr in $(t_fs_nrs); do + d=$(eval echo \$T_D$nr) + for i in $(seq 1 $NR_KEYS); do + : > "$d/file-${nr}-${i}" + done +done +sync +t_force_log_merge + +sv=$(t_server_nr) + +vals=( 4096 512 64 8 1 ) +expected=$(( vals[0] + vals[1] + vals[2] + vals[3] + vals[4] )) + +n=0 +for nr in $(t_fs_nrs); do + d=$(eval echo \$T_D$nr) + val=${vals[$n]} + (( n++ )) + for i in $(seq 1 $NR_KEYS); do + setfattr -n "scoutfs.totl.test.${i}.0.0" -v $val \ + "$d/file-${nr}-${i}" + done +done + +sync + +last_complete=$(t_counter log_merge_complete $sv) + +t_trigger_arm_silent log_merge_force_finalize_ours $sv +t_sync_seq_index +while test "$(t_trigger_get log_merge_force_finalize_ours $sv)" == "1"; do + sleep .1 +done + +# Spam the log_merge_force_partial trigger in a tight Python loop +# that keeps fds open and uses pwrite, avoiding fork/exec overhead of +# doing this in a shell "echo" loop, which is too slow. +trigger_paths="" +for i in $(t_fs_nrs); do + trigger_paths="$trigger_paths $(t_trigger_path $i)/log_merge_force_partial" +done +python3 -c " +import sys +paths = sys.argv[1:] +while True: + for p in paths: + with open(p, 'w') as f: + f.write('1') +" $trigger_paths & +spam_pid=$! + +bad_dir="$T_TMPDIR/bad" +mkdir -p "$bad_dir" + +read_totals() { + local nr=$1 + local mnt=$(eval echo \$T_M$nr) + while true; do + echo 1 > $(t_debugfs_path $nr)/drop_weak_item_cache + # This is probably too elaborate, but, it's pretty neat we can + # illustrate the double reads this way with some awk magic. + scoutfs read-xattr-totals -p "$mnt" | \ + awk -F'[ =,]+' -v expect=$expected \ + 'or($2+0, expect) != expect { + v = $2+0; s = "" + split("0 1 2 3 4", m) + split("12 9 6 3 0", sh) + for (i = 1; i <= 5; i++) { + c = and(rshift(v, sh[i]+0), 7) + if (c > 1) s = s " m" m[i] ":" c + } + printf "%s (%s)\n", $0, substr(s, 2) + }' >> "$bad_dir/$nr" + done +} + +echo "expected $expected" +reader_pids="" +for nr in $(t_fs_nrs); do + read_totals $nr & + reader_pids="$reader_pids $!" +done + +while (( $(t_counter log_merge_complete $sv) == last_complete )); do + sleep .1 +done + +t_silent_kill $spam_pid $reader_pids + +for nr in $(t_fs_nrs); do + if [ -s "$bad_dir/$nr" ]; then + echo "double-counted totals on mount $nr:" + cat "$bad_dir/$nr" + fi +done + +echo "== cleanup" +for nr in $(t_fs_nrs); do + d=$(eval echo \$T_D$nr) + find "$d" -maxdepth 1 -name "file-${nr}-*" -delete +done + +t_pass