#! /bin/bash
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2022 Oracle.  All Rights Reserved.
#
# FS QA Test No. 559
#
# This is a regression test for a data corruption bug that existed in iomap's
# buffered write routines.
#
. ./common/preamble
_begin_fstest auto quick rw mmap

# Import common functions.
. ./common/inject
. ./common/tracing

_cleanup()
{
	test -n "$sentryfile" && rm -f $sentryfile
	wait
	_ftrace_cleanup
	cd /
	rm -r -f $tmp.* $sentryfile $tracefile
}

# Modify as appropriate.
_require_ftrace
_require_xfs_io_command "falloc"
_require_xfs_io_error_injection "write_delay_ms"
_require_scratch

_fixed_by_kernel_commit 304a68b9c63b \
	"xfs: use iomap_valid method to detect stale cached iomaps"

_scratch_mkfs >> $seqres.full
_scratch_mount >> $seqres.full

# This is a pagecache test, so try to disable fsdax mode.
$XFS_IO_PROG -c 'chattr -x' $SCRATCH_MNT &> $seqres.full
_require_pagecache_access $SCRATCH_MNT

blocks=10

# If this kernel advertises huge page support, it's possible that it could be
# using large folios for the page cache writes.  It is necessary to write
# multiple folios (large or regular) to triggering the write invalidation,
# so we'll scale the test write size accordingly.
blksz=$(_get_hugepagesize)
base_pagesize=$(_get_page_size)
test -z "$blksz" && blksz=${base_pagesize}
filesz=$((blocks * blksz))
dirty_offset=$(( filesz - 1 ))
write_len=$(( ( (blocks - 1) * blksz) + 1 ))

# The write invalidation that we're testing below can only occur as part of
# a single large write.  The kernel limits writes to one base page less than
# 2GiB to prevent lengthy IOs and integer overflows.  If the block size is so
# huge (e.g. 512M huge pages on arm64) that we'd exceed that, reduce the number
# of blocks to get us under the limit.
max_writesize=$((2147483647 - base_pagesize))
if ((write_len > max_writesize)); then
	blocks=$(( ( (max_writesize - 1) / blksz) + 1))
	# We need at least three blocks in the file to test invalidation
	# between writes to multiple folios.  If we drop below that,
	# reconfigure ourselves with base pages and hope for the best.
	if ((blocks < 3)); then
		blksz=$base_pagesize
		blocks=10
	fi
	filesz=$((blocks * blksz))
	dirty_offset=$(( filesz - 1 ))
	write_len=$(( ( (blocks - 1) * blksz) + 1 ))
fi

# Create a large file with a large unwritten range.
$XFS_IO_PROG -f -c "falloc 0 $filesz" $SCRATCH_MNT/file >> $seqres.full

# Write the same data to file.compare as we're about to do to file.  Do this
# before slowing down writeback to avoid unnecessary delay.
_pwrite_byte 0x58 $dirty_offset 1 $SCRATCH_MNT/file.compare >> $seqres.full
_pwrite_byte 0x57 0 $write_len $SCRATCH_MNT/file.compare >> $seqres.full

# Reinitialize the page cache for this file.
_scratch_cycle_mount

# Dirty the last page in the range and immediately set the write delay so that
# any subsequent writes have to wait.
$XFS_IO_PROG -c "pwrite -S 0x58 $dirty_offset 1" $SCRATCH_MNT/file >> $seqres.full
_scratch_inject_error "write_delay_ms" 500

_ftrace_setup
_ftrace_record_events 'xfs_iomap_invalid'

# Start a sentry to look for evidence of invalidation tracepoint tripping.  If
# we see that, we know we've forced writeback to revalidate a mapping.  The
# test has been successful, so turn off the delay.
sentryfile=$TEST_DIR/$seq.sentry
tracefile=$TEST_DIR/$seq.ftrace
wait_for_errortag() {
	while [ -e "$sentryfile" ]; do
		_ftrace_dump | grep iomap_invalid >> "$tracefile"
		if grep -q iomap_invalid "$tracefile"; then
			_scratch_inject_error "write_delay_ms" 0
			_ftrace_ignore_events
			break;
		fi
		sleep 0.5
	done
}
touch $sentryfile
wait_for_errortag &

# Start thread 1 + writeback above
($XFS_IO_PROG -c "pwrite -S 0x57 -b $write_len 0 $write_len" \
	$SCRATCH_MNT/file >> $seqres.full; rm -f $sentryfile) &
sleep 1

# Start thread 2 to simulate reclaim writeback via sync_file_range and fadvise
# to drop the page cache.
#	-c "fadvise -d $dirty_offset 1" \
dirty_pageoff=$((filesz - blksz))
$XFS_IO_PROG -c "sync_range -a -w $dirty_pageoff $blksz" \
	-c "mmap -r 0 $filesz" \
	-c "madvise -d 0 $filesz" \
	$SCRATCH_MNT/file >> $seqres.full
wait
rm -f $sentryfile

cat "$tracefile" >> $seqres.full
grep -q iomap_invalid "$tracefile"
saw_invalidation=$?

# Flush everything to disk.  If the bug manifests, then after the cycle,
# file should have stale 0x58 in block 0 because we silently dropped a write.
_scratch_cycle_mount

if ! cmp -s $SCRATCH_MNT/file $SCRATCH_MNT/file.compare; then
	echo file and file.compare do not match
	$XFS_IO_PROG -c 'bmap -celpv' -c 'bmap -elpv' $SCRATCH_MNT/file &>> $seqres.full
	echo file.compare
	od -tx1 -Ad -c $SCRATCH_MNT/file.compare
	echo file
	od -tx1 -Ad -c $SCRATCH_MNT/file
elif [ $saw_invalidation -ne 0 ]; then
	# The files matched, but nothing got logged about the revalidation?
	echo "Expected to hear about write iomap invalidation?"
fi

echo Silence is golden
status=0
exit
