Add command-line option for I/O thread
Actual background I/O not yet implemented.
This commit is contained in:
parent
eb3b06eede
commit
57c20dbca3
114
src/sortbin.cpp
114
src/sortbin.cpp
|
@ -57,6 +57,9 @@
|
|||
#define DEFAULT_THREADS 1
|
||||
#define MAX_THREADS 128
|
||||
|
||||
/* By default use background threads for I/O. */
|
||||
#define DEFAULT_IO_THREAD 1
|
||||
|
||||
/* Align buffer sizes and I/O on this number of records.
|
||||
For efficiency, I/O should be done in multiples of 4096 bytes. */
|
||||
#define TRANSFER_ALIGNMENT 4096
|
||||
|
@ -86,6 +89,9 @@ struct SortContext
|
|||
/** True to eliminate duplicate records. */
|
||||
bool flag_unique;
|
||||
|
||||
/** True to do I/O in background threads. */
|
||||
bool flag_io_thread;
|
||||
|
||||
/** True to write progress messages to stderr. */
|
||||
bool flag_verbose;
|
||||
|
||||
|
@ -2206,22 +2212,42 @@ void usage()
|
|||
"\n"
|
||||
"Options:\n"
|
||||
"\n"
|
||||
" -s, --size=N specify record size of N bytes (required)\n"
|
||||
" -u, --unique eliminate duplicates after sorting\n"
|
||||
" --memory=<n>M use at most <n> MiByte RAM (default: %d)\n"
|
||||
" --memory=<n>G use at most <n> GiByte RAM\n"
|
||||
" --branch=N merge N subarrays in one step (default: %d)\n"
|
||||
" --threads=N use N threads for parallel sorting (default: %d)\n"
|
||||
" --temporary-directory=DIR write temporary file to the specified\n"
|
||||
" directory (default: $TMPDIR)\n"
|
||||
" -s, --size=N\n"
|
||||
" Specify record size of N bytes (required)\n"
|
||||
"\n"
|
||||
" -u, --unique\n"
|
||||
" Eliminate duplicates after sorting\n"
|
||||
"\n"
|
||||
" --memory=<n>M, --memory=<n>G\n"
|
||||
" Specify the amount of RAM that may be used.\n"
|
||||
" Use suffix 'M' for MiByte, or 'G' for GiByte."
|
||||
" (default: %d MiB)\n"
|
||||
"\n"
|
||||
" --branch=N\n"
|
||||
" Merge at most N subarrays in one step. (default: %d)\n"
|
||||
"\n"
|
||||
" --parallel=N\n"
|
||||
" Use N threads for parallel sorting. (default: %d)\n"
|
||||
"\n"
|
||||
" --iothread / --no-iothread\n"
|
||||
" Enable or disable use of background threads for I/O.\n"
|
||||
" (default: %s)\n"
|
||||
"\n"
|
||||
" -T, --temporary-directory=DIR\n"
|
||||
" Write temporary file to the specified directory."
|
||||
" (default: $TMPDIR)\n"
|
||||
"\n"
|
||||
" -v, --verbose\n"
|
||||
" Write progress messages to STDERR.\n"
|
||||
"\n"
|
||||
"The output file must not yet exist.\n"
|
||||
"If the data does not fit in memory, a temporary file will be\n"
|
||||
"If the file does not fit in memory, a temporary file will be\n"
|
||||
"created with the same size as the input/output files.\n"
|
||||
"\n",
|
||||
DEFAULT_MEMORY_SIZE_MBYTE,
|
||||
DEFAULT_BRANCH_FACTOR,
|
||||
DEFAULT_THREADS);
|
||||
DEFAULT_THREADS,
|
||||
DEFAULT_IO_THREAD ? "enable" : "disable");
|
||||
}
|
||||
|
||||
|
||||
|
@ -2235,59 +2261,72 @@ int main(int argc, char **argv)
|
|||
{ "unique", 0, NULL, 'u' },
|
||||
{ "memory", 1, NULL, 'M' },
|
||||
{ "branch", 1, NULL, 'B' },
|
||||
{ "threads", 1, NULL, 'J' },
|
||||
{ "parallel", 1, NULL, 'P' },
|
||||
{ "temporary-directory", 1, NULL, 'T' },
|
||||
{ "iothread", 0, NULL, 'X' },
|
||||
{ "no-iothread", 0, NULL, 'x' },
|
||||
{ "verbose", 0, NULL, 'v' },
|
||||
{ "help", 0, NULL, 'h' },
|
||||
{ NULL, 0, NULL, 0 }
|
||||
};
|
||||
bool flag_unique = false;
|
||||
bool flag_verbose = false;
|
||||
unsigned int record_size = 0;
|
||||
unsigned int branch_factor = DEFAULT_BRANCH_FACTOR;
|
||||
unsigned int num_threads = DEFAULT_THREADS;
|
||||
uint64_t memory_size = uint64_t(DEFAULT_MEMORY_SIZE_MBYTE) * 1024 * 1024;
|
||||
std::string tempdir = get_default_tmpdir();
|
||||
int opt;
|
||||
|
||||
SortContext ctx;
|
||||
ctx.record_size = 0;
|
||||
ctx.memory_size = uint64_t(DEFAULT_MEMORY_SIZE_MBYTE) * 1024 * 1024;
|
||||
ctx.branch_factor = DEFAULT_BRANCH_FACTOR;
|
||||
ctx.num_threads = DEFAULT_THREADS;
|
||||
ctx.flag_unique = false;
|
||||
ctx.flag_io_thread = DEFAULT_IO_THREAD;
|
||||
ctx.flag_verbose = false;
|
||||
ctx.temporary_directory = get_default_tmpdir();
|
||||
|
||||
int opt;
|
||||
while ((opt = getopt_long(argc, argv, "s:T:uvh", longopts, NULL)) != -1) {
|
||||
switch (opt) {
|
||||
case 's':
|
||||
if (!parse_uint(optarg, record_size) || record_size < 1) {
|
||||
if (!parse_uint(optarg, ctx.record_size)
|
||||
|| ctx.record_size < 1) {
|
||||
fprintf(stderr,
|
||||
"ERROR: Invalid record size (must be at least 1)\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
break;
|
||||
case 'u':
|
||||
flag_unique = true;
|
||||
ctx.flag_unique = true;
|
||||
break;
|
||||
case 'M':
|
||||
memory_size = parse_memory_size(optarg);
|
||||
if (memory_size == 0) {
|
||||
ctx.memory_size = parse_memory_size(optarg);
|
||||
if (ctx.memory_size == 0) {
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
break;
|
||||
case 'B':
|
||||
if (!parse_uint(optarg, branch_factor) || branch_factor < 2) {
|
||||
if (!parse_uint(optarg, ctx.branch_factor)
|
||||
|| ctx.branch_factor < 2) {
|
||||
fprintf(stderr,
|
||||
"ERROR: Invalid radix value, must be at least 2\n");
|
||||
"ERROR: Invalid branch factor, must be at least 2\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
break;
|
||||
case 'J':
|
||||
if (!parse_uint(optarg, num_threads)
|
||||
|| num_threads < 1
|
||||
|| num_threads > MAX_THREADS) {
|
||||
case 'P':
|
||||
if (!parse_uint(optarg, ctx.num_threads)
|
||||
|| ctx.num_threads < 1
|
||||
|| ctx.num_threads > MAX_THREADS) {
|
||||
fprintf(stderr, "ERROR: Invalid number of threads\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
break;
|
||||
case 'T':
|
||||
tempdir = optarg;
|
||||
ctx.temporary_directory = optarg;
|
||||
break;
|
||||
case 'X':
|
||||
ctx.flag_io_thread = true;
|
||||
break;
|
||||
case 'x':
|
||||
ctx.flag_io_thread = false;
|
||||
break;
|
||||
case 'v':
|
||||
flag_verbose = true;
|
||||
ctx.flag_verbose = true;
|
||||
break;
|
||||
case 'h':
|
||||
usage();
|
||||
|
@ -2298,7 +2337,7 @@ int main(int argc, char **argv)
|
|||
}
|
||||
}
|
||||
|
||||
if (record_size < 1) {
|
||||
if (ctx.record_size < 1) {
|
||||
fprintf(stderr, "ERROR: Missing required parameter --size\n");
|
||||
usage();
|
||||
return EXIT_FAILURE;
|
||||
|
@ -2317,7 +2356,7 @@ int main(int argc, char **argv)
|
|||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (memory_size >= SIZE_MAX) {
|
||||
if (ctx.memory_size >= SIZE_MAX) {
|
||||
fprintf(
|
||||
stderr,
|
||||
"ERROR: This system supports at most %zu MB memory\n",
|
||||
|
@ -2328,15 +2367,6 @@ int main(int argc, char **argv)
|
|||
std::string input_name(argv[optind]);
|
||||
std::string output_name(argv[optind+1]);
|
||||
|
||||
SortContext ctx;
|
||||
ctx.record_size = record_size;
|
||||
ctx.memory_size = memory_size;
|
||||
ctx.branch_factor = branch_factor;
|
||||
ctx.num_threads = num_threads;
|
||||
ctx.flag_unique = flag_unique;
|
||||
ctx.flag_verbose = flag_verbose;
|
||||
ctx.temporary_directory = tempdir;
|
||||
|
||||
try {
|
||||
sortbin(input_name, output_name, ctx);
|
||||
} catch (const std::exception& ex) {
|
||||
|
|
Loading…
Reference in New Issue