Add some automated tests
This commit is contained in:
parent
35ae98cc65
commit
a6fe2a199c
|
@ -1,4 +1,4 @@
|
|||
*.o
|
||||
*~
|
||||
sortbin
|
||||
recgen
|
||||
/build
|
||||
/tests/testdata
|
||||
|
|
14
Makefile
14
Makefile
|
@ -4,17 +4,22 @@
|
|||
|
||||
CXX = g++
|
||||
CXXFLAGS = -Wall -O2 -pthread
|
||||
# -fsanitize=address -fsanitize=undefined
|
||||
CXXFLAGS_DEBUG = -g -fsanitize=address -fsanitize=undefined -fsanitize=leak
|
||||
|
||||
SRCDIR = src
|
||||
BUILDDIR = build
|
||||
|
||||
TOOLS = sortbin recgen
|
||||
BINFILES = $(patsubst %,$(BUILDDIR)/%,$(TOOLS))
|
||||
BINFILES_DEBUG = $(patsubst %,$(BUILDDIR)/%_dbg,$(TOOLS))
|
||||
|
||||
.PHONY: all
|
||||
all: $(BINFILES)
|
||||
|
||||
.PHONY: test
|
||||
test: $(BINFILES_DEBUG)
|
||||
cd tests ; ./run_tests.sh
|
||||
|
||||
$(BUILDDIR)/sortbin: $(SRCDIR)/sortbin.cpp
|
||||
$(BUILDDIR)/recgen: $(SRCDIR)/recgen.cpp
|
||||
|
||||
|
@ -22,7 +27,12 @@ $(BUILDDIR)/%: $(SRCDIR)/%.cpp
|
|||
@mkdir -p $(BUILDDIR)
|
||||
$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) $< $(LDLIBS) -o $@
|
||||
|
||||
$(BUILDDIR)/%_dbg: $(SRCDIR)/%.cpp
|
||||
@mkdir -p $(BUILDDIR)
|
||||
$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CXXFLAGS_DEBUG) $(LDFLAGS) $< $(LDLIBS) -o $@
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
$(RM) $(BINFILES)
|
||||
$(RM) $(BINFILES) $(BINFILES_DEBUG)
|
||||
$(RM) -r tests/testdata
|
||||
|
||||
|
|
|
@ -0,0 +1,279 @@
|
|||
#!/bin/bash
|
||||
|
||||
#
|
||||
# This script runs a series of tests on the SortBin tool.
|
||||
#
|
||||
# The tests are relatively small.
|
||||
# The whole test suite requires less than 1 GB disk space and 1 GB memory.
|
||||
#
|
||||
# This script assumes it runs from the "tests" subdirectory of the repository
|
||||
# and expects the SortBin tools already built in the "build" subdirectory.
|
||||
#
|
||||
# This script writes temporary data files to a subdirectory "testdata"
|
||||
# in the current working directory.
|
||||
#
|
||||
|
||||
SCRIPTDIR=$(dirname "$0")
|
||||
|
||||
# Tools under test.
|
||||
RECGEN="${SCRIPTDIR}/../build/recgen_dbg"
|
||||
SORTBIN="${SCRIPTDIR}/../build/sortbin_dbg"
|
||||
|
||||
# GNU sort command.
|
||||
SORT="sort"
|
||||
|
||||
# Directory for temporary test data.
|
||||
TESTDATA="testdata"
|
||||
|
||||
|
||||
# Run the "recgen" tool and show invocation.
|
||||
recgen () {
|
||||
echo "${RECGEN} $*"
|
||||
"${RECGEN}" "$@"
|
||||
}
|
||||
|
||||
|
||||
# Run the Unix "sort" command and show invocation.
|
||||
unixsort () {
|
||||
echo "${SORT} --temporary-directory=${TESTDATA} $*"
|
||||
"${SORT}" "--temporary-directory=${TESTDATA}" "$@"
|
||||
}
|
||||
|
||||
|
||||
# Run the "sortbin" tool and show invocation.
|
||||
sortbin () {
|
||||
echo "${SORTBIN} --temporary-directory=${TESTDATA} --verbose $*"
|
||||
"${SORTBIN}" "--temporary-directory=${TESTDATA}" --verbose "$@"
|
||||
}
|
||||
|
||||
|
||||
# Verify SHA-256 checksum.
|
||||
# $1 = file name
|
||||
# $2 = expected SHA-256 sum
|
||||
verify_sha256 () {
|
||||
( cd "${TESTDATA}" ; echo "${2} ${1}" | sha256sum -c )
|
||||
}
|
||||
|
||||
|
||||
# Count number of lines in output file.
|
||||
# $1 = output file name
|
||||
count_unique () {
|
||||
local nline
|
||||
nline=$(wc --lines < "${TESTDATA}/$1")
|
||||
echo "$1: ${nline} unique records"
|
||||
}
|
||||
|
||||
|
||||
# Stop on the first error.
|
||||
set -e
|
||||
|
||||
echo "Running tests"
|
||||
echo
|
||||
|
||||
# Wipe and create test data directory, if needed.
|
||||
[ ! -d "${TESTDATA}" ] || rm -r "${TESTDATA}"
|
||||
mkdir -p "${TESTDATA}"
|
||||
|
||||
|
||||
#
|
||||
# Generate test input files
|
||||
#
|
||||
|
||||
echo "Generating in01: 10 MB, 10 bytes/record, ascii, 50% duplicates"
|
||||
|
||||
hash_in01="fe72553a046d9e268c8c9da62902f63f1ba4b02901cd2413f9f1df7792e26304"
|
||||
hash_out01="59df36e7f7895b56e59ef94cbad269cd5bec908d8a8e32aa2ca5ea624481431f"
|
||||
hash_out01u="e46e33fbf34ca575652806a51d2042b64da073d76ed52e5e74c4becb85db974f"
|
||||
|
||||
recgen -S 1001 -s 10 -n 1000000 -a -d 0.5 "${TESTDATA}/in01"
|
||||
verify_sha256 in01 ${hash_in01}
|
||||
echo
|
||||
|
||||
echo "Generating in02: 10 MB, 10 bytes/record, ascii, 95% duplicates"
|
||||
|
||||
hash_in02="c5cdd3faeaf11b1a5508e3f781148a8b899514999f61917c4897bef8dc65aacf"
|
||||
hash_out02u="a95f80b2400a1e14645f1d186a065a7483c103229fc94f11bb0d62e4da6edd7d"
|
||||
|
||||
recgen -S 1002 -s 10 -n 1000000 -a -d 0.95 "${TESTDATA}/in02"
|
||||
verify_sha256 in02 ${hash_in02}
|
||||
echo
|
||||
|
||||
echo "Generating in03: 100 MB, 10 bytes/record, ascii, 50% duplicates"
|
||||
|
||||
hash_in03="1a3c00a317f39e9dc336c2a46d31a3e3189a41285723f32a09be316d5273580c"
|
||||
hash_out03="f17a2e566bbccecdab04373f92ba87784af77cb36c510fb7949d7fd719528a01"
|
||||
hash_out03u="cbb975d1ce136e00668f14feb5880ebce474366ac5cb5c74287b7b4018c62b03"
|
||||
|
||||
recgen -S 1003 -s 10 -n 10000000 -a -d 0.5 "${TESTDATA}/in03"
|
||||
verify_sha256 in03 ${hash_in03}
|
||||
echo
|
||||
|
||||
echo "Generating in04: 100 MB, 100 bytes/record, ascii, 50% duplicates"
|
||||
|
||||
hash_in04="75fee029fbaa762176ec4f72ecf356674dda04cefebfb01af48b17ee560f9c25"
|
||||
hash_out04u="4df05a28cdf2af29c4e8c5bb22930ee68bea12cca362ae378ab5dc6c6049fa9f"
|
||||
|
||||
recgen -S 1004 -s 100 -n 1000000 -a -d 0.5 "${TESTDATA}/in04"
|
||||
verify_sha256 in04 ${hash_in04}
|
||||
echo
|
||||
|
||||
echo "Generating in05: 10 MB, 10 bytes/record, binary, 50% duplicates"
|
||||
|
||||
hash_in05="aacd9d9b1955a88397004a8ca6192b20fb9bcfa60633ac735cb6297a997f38ad"
|
||||
hash_out05u="f8b5da29076b36f7a97c9fc15da6a1754e4cc85aead368bf0540b9940f8f8cdb"
|
||||
|
||||
recgen -S 1005 -s 10 -n 1310720 -d 0.5 "${TESTDATA}/in05"
|
||||
verify_sha256 in05 ${hash_in05}
|
||||
echo
|
||||
|
||||
|
||||
#
|
||||
# Run GNU sort to check reference output.
|
||||
#
|
||||
|
||||
echo "Running GNU sort to check reference output"
|
||||
echo
|
||||
|
||||
unixsort -o "${TESTDATA}/out01" "${TESTDATA}/in01"
|
||||
verify_sha256 out01 ${hash_out01}
|
||||
|
||||
unixsort -o "${TESTDATA}/out01u" --unique "${TESTDATA}/in01"
|
||||
verify_sha256 out01u ${hash_out01u}
|
||||
count_unique out01u
|
||||
|
||||
unixsort -o "${TESTDATA}/out02u" --unique "${TESTDATA}/in02"
|
||||
verify_sha256 out02u ${hash_out02u}
|
||||
count_unique out02u
|
||||
|
||||
unixsort -o "${TESTDATA}/out03" "${TESTDATA}/in03"
|
||||
verify_sha256 out03 ${hash_out03}
|
||||
|
||||
unixsort -o "${TESTDATA}/out03u" --unique "${TESTDATA}/in03"
|
||||
verify_sha256 out03u ${hash_out03u}
|
||||
count_unique out03u
|
||||
|
||||
unixsort -o "${TESTDATA}/out04u" --unique "${TESTDATA}/in04"
|
||||
verify_sha256 out04u ${hash_out04u}
|
||||
count_unique out04u
|
||||
|
||||
echo
|
||||
|
||||
|
||||
#
|
||||
# Test in-memory sorting.
|
||||
#
|
||||
|
||||
echo "in01: 10 MB, in-memory sort, non-parallel, without background I/O"
|
||||
sortbin --size=10 --memory=100M --parallel=1 --no-iothread "${TESTDATA}/in01" "${TESTDATA}/out01_x"
|
||||
verify_sha256 out01_x ${hash_out01}
|
||||
rm "${TESTDATA}/out01_x"
|
||||
echo
|
||||
|
||||
echo "in01: 10 MB, in-memory sort, non-parallel"
|
||||
sortbin --size=10 --memory=100M --parallel=1 --iothread "${TESTDATA}/in01" "${TESTDATA}/out01_x"
|
||||
verify_sha256 out01_x ${hash_out01}
|
||||
rm "${TESTDATA}/out01_x"
|
||||
echo
|
||||
|
||||
echo "in01: 10 MB, in-memory sort, parallel"
|
||||
sortbin --size=10 --memory=100M --parallel=4 --iothread "${TESTDATA}/in01" "${TESTDATA}/out01_x"
|
||||
verify_sha256 out01_x ${hash_out01}
|
||||
rm "${TESTDATA}/out01_x"
|
||||
echo
|
||||
|
||||
echo "in01: 10 MB, in-memory sort, unique, parallel"
|
||||
sortbin --size=10 --memory=100M --parallel=4 --iothread --unique "${TESTDATA}/in01" "${TESTDATA}/out01u_x"
|
||||
verify_sha256 out01u_x ${hash_out01u}
|
||||
rm "${TESTDATA}/out01u_x"
|
||||
echo
|
||||
|
||||
echo "in02: 10 MB, many duplicates, in-memory sort, unique, parallel"
|
||||
sortbin --size=10 --memory=100M --parallel=4 --iothread --unique "${TESTDATA}/in02" "${TESTDATA}/out02u_x"
|
||||
verify_sha256 out02u_x ${hash_out02u}
|
||||
rm "${TESTDATA}/out02u_x"
|
||||
echo
|
||||
|
||||
echo "in03: 100 MB, in-memory sort, unique, parallel"
|
||||
sortbin --size=10 --memory=100M --parallel=4 --iothread --unique "${TESTDATA}/in03" "${TESTDATA}/out03u_x"
|
||||
verify_sha256 out03u_x ${hash_out03u}
|
||||
rm "${TESTDATA}/out03u_x"
|
||||
echo
|
||||
|
||||
echo "in04: 100 MB, big records, in-memory sort, unique, parallel"
|
||||
sortbin --size=100 --memory=100M --parallel=4 --iothread --unique "${TESTDATA}/in04" "${TESTDATA}/out04u_x"
|
||||
verify_sha256 out04u_x ${hash_out04u}
|
||||
rm "${TESTDATA}/out04u_x"
|
||||
echo
|
||||
|
||||
echo "in05: 10 MB, binary data, in-memory sort, unique, parallel"
|
||||
sortbin --size=10 --memory=100M --parallel=4 --iothread --unique "${TESTDATA}/in05" "${TESTDATA}/out05u_x"
|
||||
verify_sha256 out05u_x ${hash_out05u}
|
||||
rm "${TESTDATA}/out05u_x"
|
||||
echo
|
||||
|
||||
|
||||
#
|
||||
# Test external sorting.
|
||||
#
|
||||
|
||||
echo "in03: 100 MB, external sort, 1 merge pass"
|
||||
sortbin --size=10 --memory=20M --parallel=4 --iothread "${TESTDATA}/in03" "${TESTDATA}/out03_x"
|
||||
verify_sha256 out03_x ${hash_out03}
|
||||
rm "${TESTDATA}/out03_x"
|
||||
echo
|
||||
|
||||
echo "in03: 100 MB, external sort, 1 merge pass, unique"
|
||||
sortbin --size=10 --memory=20M --parallel=4 --iothread --unique "${TESTDATA}/in03" "${TESTDATA}/out03u_x"
|
||||
verify_sha256 out03u_x ${hash_out03u}
|
||||
rm "${TESTDATA}/out03u_x"
|
||||
echo
|
||||
|
||||
echo "in05: 10 MB, binary data, external sort, 1 merge pass, equal block sizes"
|
||||
sortbin --size=10 --memory=5M --parallel=4 --iothread --unique "${TESTDATA}/in05" "${TESTDATA}/out05u_x"
|
||||
verify_sha256 out05u_x ${hash_out05u}
|
||||
rm "${TESTDATA}/out05u_x"
|
||||
echo
|
||||
|
||||
echo "in03: 100 MB, external sort, 2 merge passes, unique, without background I/O"
|
||||
sortbin --size=10 --memory=10M --parallel=4 --no-iothread --branch=4 --unique "${TESTDATA}/in03" "${TESTDATA}/out03u_x"
|
||||
verify_sha256 out03u_x ${hash_out03u}
|
||||
rm "${TESTDATA}/out03u_x"
|
||||
echo
|
||||
|
||||
echo "in03: 100 MB, external sort, 2 merge passes, unique"
|
||||
sortbin --size=10 --memory=20M --parallel=4 --iothread --branch=4 --unique "${TESTDATA}/in03" "${TESTDATA}/out03u_x"
|
||||
verify_sha256 out03u_x ${hash_out03u}
|
||||
rm "${TESTDATA}/out03u_x"
|
||||
echo
|
||||
|
||||
echo "in03: 100 MB, external sort, 2 merge passes, balanced merge tree"
|
||||
sortbin --size=10 --memory=22M --parallel=4 --iothread --branch=3 "${TESTDATA}/in03" "${TESTDATA}/out03_x"
|
||||
verify_sha256 out03_x ${hash_out03}
|
||||
rm "${TESTDATA}/out03_x"
|
||||
echo
|
||||
|
||||
echo "in03: 100 MB, external sort, 2 merge passes, slightly unbalanced merge"
|
||||
sortbin --size=10 --memory=20M --parallel=4 --iothread --branch=9 "${TESTDATA}/in03" "${TESTDATA}/out03_x"
|
||||
verify_sha256 out03_x ${hash_out03}
|
||||
rm "${TESTDATA}/out03_x"
|
||||
echo
|
||||
|
||||
echo "in03: 100 MB, external sort, 2 merge passes, slightly unbalanced merge"
|
||||
sortbin --size=10 --memory=13M --parallel=4 --iothread --branch=4 "${TESTDATA}/in03" "${TESTDATA}/out03_x"
|
||||
verify_sha256 out03_x ${hash_out03}
|
||||
rm "${TESTDATA}/out03_x"
|
||||
echo
|
||||
|
||||
echo "in03: 100 MB, external sort, 2-way merging, 5 merge passes"
|
||||
sortbin --size=10 --memory=10M --parallel=4 --iothread --branch=2 "${TESTDATA}/in03" "${TESTDATA}/out03_x"
|
||||
verify_sha256 out03_x ${hash_out03}
|
||||
rm "${TESTDATA}/out03_x"
|
||||
echo
|
||||
|
||||
echo "in03: 100 MB, external sort, 24-way merge, 1 merge pass"
|
||||
sortbin --size=10 --memory=8M --parallel=4 --iothread --branch=24 "${TESTDATA}/in03" "${TESTDATA}/out03_x"
|
||||
verify_sha256 out03_x ${hash_out03}
|
||||
rm "${TESTDATA}/out03_x"
|
||||
echo
|
||||
|
||||
echo "Tests finished"
|
Loading…
Reference in New Issue