2022-06-21 07:50:19 +02:00
|
|
|
/*
|
|
|
|
* Sort arrays of binary data records.
|
|
|
|
*
|
2022-06-25 13:10:06 +02:00
|
|
|
* Input and output files contain flat, raw arrays of fixed-length
|
2022-06-21 07:50:19 +02:00
|
|
|
* binary data records.
|
|
|
|
*
|
|
|
|
* Records are interpreted as fixed-length strings of 8-bit unsigned integers.
|
|
|
|
* The program sorts these records in lexicographic order.
|
|
|
|
*
|
|
|
|
* This program is optimized for short records, e.g. up to 20 bytes.
|
2022-06-24 15:14:51 +02:00
|
|
|
*
|
|
|
|
* Written by Joris van Rantwijk in 2022.
|
2022-06-21 07:50:19 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
// (already defined by g++) #define _GNU_SOURCE
|
|
|
|
#define _FILE_OFFSET_BITS 64
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdarg.h>
|
|
|
|
#include <assert.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <getopt.h>
|
|
|
|
#include <inttypes.h>
|
2022-06-26 20:51:35 +02:00
|
|
|
#include <limits.h>
|
2022-06-21 07:50:19 +02:00
|
|
|
#include <string.h>
|
|
|
|
#include <time.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
#include <algorithm>
|
2022-06-26 20:51:35 +02:00
|
|
|
#include <condition_variable>
|
|
|
|
#include <deque>
|
|
|
|
#include <functional>
|
|
|
|
#include <future>
|
2022-06-21 07:50:19 +02:00
|
|
|
#include <iterator>
|
2022-06-22 22:17:14 +02:00
|
|
|
#include <memory>
|
2022-06-26 20:51:35 +02:00
|
|
|
#include <mutex>
|
2022-06-26 01:14:19 +02:00
|
|
|
#include <numeric>
|
2022-06-21 07:50:19 +02:00
|
|
|
#include <stdexcept>
|
|
|
|
#include <string>
|
|
|
|
#include <system_error>
|
2022-06-26 20:51:35 +02:00
|
|
|
#include <thread>
|
2022-06-21 07:50:19 +02:00
|
|
|
#include <tuple>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
|
|
|
|
/* Maximum amount of RAM to use (in MBytes). */
|
|
|
|
#define DEFAULT_MEMORY_SIZE_MBYTE 1024
|
|
|
|
|
|
|
|
/* Default branch factor while merging. */
|
|
|
|
#define DEFAULT_BRANCH_FACTOR 16
|
|
|
|
|
|
|
|
/* Default number of sorting threads. */
|
|
|
|
#define DEFAULT_THREADS 1
|
2022-06-26 20:51:35 +02:00
|
|
|
#define MAX_THREADS 128
|
2022-06-21 07:50:19 +02:00
|
|
|
|
2022-06-22 22:17:14 +02:00
|
|
|
/* Align buffer sizes and I/O on this number of records.
|
|
|
|
For efficiency, I/O should be done in multiples of 4096 bytes. */
|
|
|
|
#define TRANSFER_ALIGNMENT 4096
|
|
|
|
|
2022-06-25 13:10:06 +02:00
|
|
|
/* Template for temporary file name. Must end in 6 'X' characters. */
|
|
|
|
#define TEMPFILE_TEMPLATE "sortbin_tmpXXXXXX"
|
|
|
|
|
2022-06-21 07:50:19 +02:00
|
|
|
|
|
|
|
namespace { // anonymous namespace
|
|
|
|
|
|
|
|
|
|
|
|
/** Information about the sort job. */
|
|
|
|
struct SortContext
|
|
|
|
{
|
|
|
|
/** Record length in bytes. */
|
|
|
|
unsigned int record_size;
|
|
|
|
|
|
|
|
/** Maximum memory to use (bytes). */
|
|
|
|
uint64_t memory_size;
|
|
|
|
|
|
|
|
/** Maximum number of arrays to merge in one step. */
|
|
|
|
unsigned int branch_factor;
|
|
|
|
|
2022-06-26 20:51:35 +02:00
|
|
|
/** Number of threads for parallel sorting. */
|
|
|
|
unsigned int num_threads;
|
|
|
|
|
2022-06-21 07:50:19 +02:00
|
|
|
/** True to eliminate duplicate records. */
|
|
|
|
bool flag_unique;
|
|
|
|
|
|
|
|
/** True to write progress messages to stderr. */
|
|
|
|
bool flag_verbose;
|
|
|
|
|
|
|
|
/** Directory where temporary files are created. */
|
|
|
|
std::string temporary_directory;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/** Strategy for multi-pass external sorting. */
|
|
|
|
struct SortStrategy
|
|
|
|
{
|
|
|
|
/** Strategy for a single merge pass. */
|
|
|
|
struct MergePass {
|
2022-06-26 01:14:19 +02:00
|
|
|
/** Number of records in each input block into this pass. */
|
|
|
|
std::vector<uint64_t> records_per_block;
|
2022-06-21 07:50:19 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
/** Number of records per block during the initial sort pass. */
|
|
|
|
uint64_t records_per_sort_block;
|
|
|
|
|
|
|
|
/** Number of blocks for the initial sort pass. */
|
|
|
|
uint64_t num_sort_blocks;
|
|
|
|
|
2022-06-26 01:14:19 +02:00
|
|
|
/** Number of blocks that are processed during the first merge pass. */
|
|
|
|
uint64_t num_sort_blocks_first_merge;
|
|
|
|
|
2022-06-21 07:50:19 +02:00
|
|
|
/** List of merge passes. */
|
|
|
|
std::vector<MergePass> merge_pass;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/** Show a progress message. */
|
|
|
|
void log(const SortContext& ctx, const char *format, ...)
|
|
|
|
__attribute__ ((format (printf, 2, 3))) ;
|
|
|
|
|
|
|
|
void log(const SortContext& ctx, const char *format, ...)
|
|
|
|
{
|
|
|
|
va_list ap;
|
|
|
|
va_start(ap, format);
|
|
|
|
if (ctx.flag_verbose) {
|
|
|
|
vfprintf(stderr, format, ap);
|
|
|
|
}
|
|
|
|
va_end(ap);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/** Time measurements. */
|
|
|
|
class Timer
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
Timer()
|
|
|
|
: m_value(0.0), m_running(false)
|
|
|
|
{ }
|
|
|
|
|
|
|
|
double value() const
|
|
|
|
{
|
|
|
|
double v = m_value;
|
|
|
|
if (m_running) {
|
|
|
|
struct timespec tnow;
|
|
|
|
clock_gettime(CLOCK_REALTIME, &tnow);
|
|
|
|
v += tnow.tv_sec - m_tstart.tv_sec;
|
|
|
|
v += 1.0e-9 * (tnow.tv_nsec - m_tstart.tv_nsec);
|
|
|
|
}
|
|
|
|
return m_value;
|
|
|
|
}
|
|
|
|
|
|
|
|
void start()
|
|
|
|
{
|
|
|
|
m_value = 0.0;
|
|
|
|
m_running = true;
|
|
|
|
clock_gettime(CLOCK_REALTIME, &m_tstart);
|
|
|
|
}
|
|
|
|
|
|
|
|
void resume()
|
|
|
|
{
|
|
|
|
if (!m_running) {
|
|
|
|
m_running = true;
|
|
|
|
clock_gettime(CLOCK_REALTIME, &m_tstart);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void stop()
|
|
|
|
{
|
|
|
|
if (m_running) {
|
|
|
|
struct timespec tnow;
|
|
|
|
clock_gettime(CLOCK_REALTIME, &tnow);
|
|
|
|
m_value += tnow.tv_sec - m_tstart.tv_sec;
|
|
|
|
m_value += 1.0e-9 * (tnow.tv_nsec - m_tstart.tv_nsec);
|
|
|
|
m_running = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
double m_value;
|
|
|
|
bool m_running;
|
|
|
|
struct timespec m_tstart;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2022-06-26 20:51:35 +02:00
|
|
|
/** Thread pool for parallel sorting and background I/O. */
|
|
|
|
class ThreadPool
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
typedef std::future<void> FutureType;
|
|
|
|
|
|
|
|
/** Initialize thread pool with the specified number of threads. */
|
|
|
|
explicit ThreadPool(unsigned int num_threads);
|
|
|
|
|
|
|
|
// Prevent copying and assignment.
|
|
|
|
ThreadPool(const ThreadPool&) = delete;
|
|
|
|
ThreadPool& operator=(const ThreadPool&) = delete;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Destructor: Wait until pending tasks are finished, then join threads.
|
|
|
|
*/
|
|
|
|
~ThreadPool();
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Submit a function to be executed in the thread pool.
|
|
|
|
* @param f Function which takes no arguments and returns void.
|
|
|
|
* @return A future that becomes ready when the function completes.
|
|
|
|
*/
|
|
|
|
template <class FunctionType>
|
|
|
|
FutureType submit(FunctionType f)
|
|
|
|
{
|
|
|
|
std::packaged_task<void()> task(f);
|
|
|
|
FutureType fut(task.get_future());
|
|
|
|
std::lock_guard<std::mutex> lock(m_mutex);
|
|
|
|
m_queue.push_back(std::move(task));
|
|
|
|
m_cond.notify_all();
|
|
|
|
return fut;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
/** Worker function that runs in each thread. */
|
|
|
|
void worker_function();
|
|
|
|
|
|
|
|
std::mutex m_mutex;
|
|
|
|
std::condition_variable m_cond;
|
|
|
|
std::vector<std::thread> m_threads;
|
|
|
|
bool m_stop_flag;
|
|
|
|
std::deque<std::packaged_task<void()>> m_queue;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Constructor.
|
|
|
|
ThreadPool::ThreadPool(unsigned int num_threads)
|
|
|
|
: m_stop_flag(false)
|
|
|
|
{
|
|
|
|
// Create the worker threads.
|
|
|
|
for (unsigned int i = 0; i < num_threads; i++) {
|
|
|
|
m_threads.emplace_back(&ThreadPool::worker_function, this);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Destructor.
|
|
|
|
ThreadPool::~ThreadPool()
|
|
|
|
{
|
|
|
|
// Set stop flag.
|
|
|
|
{
|
|
|
|
std::unique_lock<std::mutex> lock(m_mutex);
|
|
|
|
m_stop_flag = true;
|
|
|
|
m_cond.notify_all();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Join the worker threads.
|
|
|
|
for (auto& thread : m_threads) {
|
|
|
|
thread.join();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Worker function that runs in each thread.
|
|
|
|
void ThreadPool::worker_function()
|
|
|
|
{
|
|
|
|
// Main loop.
|
|
|
|
while (true) {
|
|
|
|
|
|
|
|
std::packaged_task<void()> task;
|
|
|
|
|
|
|
|
// Get a pending task from the queue.
|
|
|
|
{
|
|
|
|
std::unique_lock<std::mutex> lock(m_mutex);
|
|
|
|
|
|
|
|
// Wait until the queue is non-empty or the stop flag is raised.
|
|
|
|
while (m_queue.empty() && !m_stop_flag) {
|
|
|
|
m_cond.wait(lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Exit if the stop flag is raised (thread pool is shutting down).
|
|
|
|
if (m_stop_flag) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get oldest pending task.
|
|
|
|
task = std::move(m_queue.front());
|
|
|
|
m_queue.pop_front();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Execute the pending task.
|
|
|
|
task();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/** Helper class to wait until multiple "std::future"s are ready. */
|
|
|
|
class CompletionTracker
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
typedef std::future<void> FutureType;
|
|
|
|
|
|
|
|
/** Add a future to wait for. */
|
|
|
|
void add(FutureType&& fut)
|
|
|
|
{
|
|
|
|
std::unique_lock<std::mutex> lock(m_mutex);
|
|
|
|
m_futures.emplace_back(std::move(fut));
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Wait until all futures have completed. */
|
|
|
|
void wait()
|
|
|
|
{
|
|
|
|
while (true) {
|
|
|
|
FutureType fut;
|
|
|
|
|
|
|
|
// Get the next future from the queue.
|
|
|
|
{
|
|
|
|
std::unique_lock<std::mutex> lock(m_mutex);
|
|
|
|
if (m_futures.empty()) {
|
|
|
|
// No more futures. We are done waiting.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
fut = std::move(m_futures.front());
|
|
|
|
m_futures.pop_front();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wait on this future.
|
|
|
|
fut.wait();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::mutex m_mutex;
|
|
|
|
std::deque<FutureType> m_futures;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2022-06-21 07:50:19 +02:00
|
|
|
/**
|
|
|
|
* Binary file access.
|
|
|
|
*
|
|
|
|
* This is a base class which can not be directly constructed.
|
|
|
|
* Subclasses implement constructors that open files in various ways.
|
|
|
|
*/
|
|
|
|
class BinaryFile
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
// Prevent copying and assignment.
|
|
|
|
BinaryFile(const BinaryFile&) = delete;
|
|
|
|
BinaryFile& operator=(const BinaryFile&) = delete;
|
|
|
|
|
|
|
|
/** Return file size in bytes. */
|
|
|
|
uint64_t size() const
|
|
|
|
{
|
|
|
|
return m_file_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Truncate file to reduce its size. */
|
|
|
|
void truncate_file(uint64_t new_size);
|
|
|
|
|
|
|
|
/** Read raw bytes from file. */
|
|
|
|
void read(
|
|
|
|
unsigned char * buf,
|
|
|
|
uint64_t file_offset,
|
|
|
|
size_t length);
|
|
|
|
|
|
|
|
/** Write raw bytes to file. */
|
|
|
|
void write(
|
|
|
|
const unsigned char * buf,
|
|
|
|
uint64_t file_offset,
|
|
|
|
size_t length);
|
|
|
|
|
|
|
|
protected:
|
|
|
|
// Prevent constructing and destructing via this base class.
|
|
|
|
BinaryFile()
|
|
|
|
: m_fd(-1), m_file_size(0)
|
|
|
|
{ }
|
|
|
|
|
|
|
|
/** Destructor: close file. */
|
|
|
|
~BinaryFile()
|
|
|
|
{
|
|
|
|
if (m_fd >= 0) {
|
|
|
|
close(m_fd);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/** File descriptor. */
|
|
|
|
int m_fd;
|
|
|
|
|
|
|
|
/** File size (bytes). */
|
|
|
|
uint64_t m_file_size;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// Truncate file to reduce its size.
|
|
|
|
void BinaryFile::truncate_file(uint64_t new_size)
|
|
|
|
{
|
|
|
|
int ret = ftruncate(m_fd, new_size);
|
|
|
|
if (ret != 0) {
|
|
|
|
throw std::system_error(
|
|
|
|
errno,
|
|
|
|
std::system_category(),
|
|
|
|
"Can not truncate file");
|
|
|
|
}
|
|
|
|
m_file_size = new_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read raw bytes from file.
|
|
|
|
void BinaryFile::read(
|
|
|
|
unsigned char * buf,
|
|
|
|
uint64_t file_offset,
|
|
|
|
size_t length)
|
|
|
|
{
|
|
|
|
// Note that pread() may read fewer bytes than requested.
|
|
|
|
while (length > 0) {
|
|
|
|
ssize_t ret = pread(m_fd, buf, length, file_offset);
|
|
|
|
if (ret <= 0) {
|
|
|
|
throw std::system_error(
|
|
|
|
errno,
|
|
|
|
std::system_category(),
|
|
|
|
"Can not read from file");
|
|
|
|
}
|
|
|
|
buf += ret;
|
|
|
|
file_offset += ret;
|
|
|
|
length -= ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write raw bytes to file.
|
|
|
|
void BinaryFile::write(
|
|
|
|
const unsigned char * buf,
|
|
|
|
uint64_t file_offset,
|
|
|
|
size_t length)
|
|
|
|
{
|
|
|
|
// Note that pwrite() may write fewer bytes than requested.
|
|
|
|
while (length > 0) {
|
|
|
|
ssize_t ret = pwrite(m_fd, buf, length, file_offset);
|
|
|
|
if (ret <= 0) {
|
|
|
|
throw std::system_error(
|
|
|
|
errno,
|
|
|
|
std::system_category(),
|
|
|
|
"Can not write to file");
|
|
|
|
}
|
|
|
|
buf += ret;
|
|
|
|
file_offset += ret;
|
|
|
|
length -= ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/** Binary input file. */
|
|
|
|
class BinaryInputFile : public BinaryFile
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
/** Open existing file for read-only access. */
|
|
|
|
explicit BinaryInputFile(const std::string& filename)
|
|
|
|
{
|
|
|
|
m_fd = open(filename.c_str(), O_RDONLY);
|
|
|
|
if (m_fd < 0) {
|
|
|
|
throw std::system_error(
|
|
|
|
errno,
|
|
|
|
std::system_category(),
|
|
|
|
"Can not open input file");
|
|
|
|
}
|
|
|
|
|
|
|
|
off_t fsz = lseek(m_fd, 0, SEEK_END);
|
|
|
|
if (fsz == (off_t)(-1)) {
|
|
|
|
throw std::system_error(
|
|
|
|
errno,
|
|
|
|
std::system_category(),
|
|
|
|
"Can not seek input file");
|
|
|
|
}
|
|
|
|
|
|
|
|
m_file_size = fsz;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/** Binary output file. */
|
|
|
|
class BinaryOutputFile : public BinaryFile
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
/** Create output file and pre-allocate space. */
|
|
|
|
BinaryOutputFile(const std::string& filename, uint64_t new_size)
|
|
|
|
{
|
|
|
|
m_fd = open(filename.c_str(), O_RDWR | O_CREAT | O_EXCL, 0666);
|
|
|
|
if (m_fd < 0) {
|
|
|
|
throw std::system_error(
|
|
|
|
errno,
|
|
|
|
std::system_category(),
|
|
|
|
"Can not create output file");
|
|
|
|
}
|
|
|
|
|
|
|
|
int ret = posix_fallocate(m_fd, 0, new_size);
|
|
|
|
if (ret != 0) {
|
|
|
|
int errnum = errno;
|
|
|
|
|
|
|
|
// Delete empty output file.
|
|
|
|
unlink(filename.c_str());
|
|
|
|
|
|
|
|
throw std::system_error(
|
|
|
|
errnum,
|
|
|
|
std::system_category(),
|
|
|
|
"Can not allocate space in output file");
|
|
|
|
}
|
|
|
|
|
|
|
|
m_file_size = new_size;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/** Temporary binary file. */
|
|
|
|
class BinaryTempFile : public BinaryFile
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
/** Create temporary file and pre-allocate space. */
|
|
|
|
BinaryTempFile(const std::string& tempdir, uint64_t new_size)
|
|
|
|
{
|
2022-06-25 13:10:06 +02:00
|
|
|
// Prepare file name template ending in 6 'X' characters.
|
|
|
|
std::string filename_template = tempdir;
|
|
|
|
if (!filename_template.empty() && filename_template.back() != '/') {
|
|
|
|
filename_template.push_back('/');
|
|
|
|
}
|
|
|
|
filename_template.append(TEMPFILE_TEMPLATE);
|
|
|
|
|
|
|
|
// Copy template to modifiable buffer.
|
|
|
|
std::vector<char> filename_buf(
|
|
|
|
filename_template.c_str(),
|
|
|
|
filename_template.c_str() + filename_template.size() + 1);
|
|
|
|
|
|
|
|
// Create temporary file with unique name.
|
|
|
|
// mkstemp() replaces the 'X' characters in the file name template
|
|
|
|
// to create a unique file name.
|
|
|
|
m_fd = mkstemp(filename_buf.data());
|
2022-06-21 07:50:19 +02:00
|
|
|
if (m_fd < 0) {
|
|
|
|
throw std::system_error(
|
|
|
|
errno,
|
|
|
|
std::system_category(),
|
|
|
|
"Can not create temporary file");
|
|
|
|
}
|
|
|
|
|
|
|
|
int ret = posix_fallocate(m_fd, 0, new_size);
|
|
|
|
if (ret != 0) {
|
2022-06-25 13:10:06 +02:00
|
|
|
int errnum = errno;
|
|
|
|
|
|
|
|
// Delete temporary file.
|
|
|
|
unlink(filename_buf.data());
|
|
|
|
|
2022-06-21 07:50:19 +02:00
|
|
|
throw std::system_error(
|
2022-06-25 13:10:06 +02:00
|
|
|
errnum,
|
2022-06-21 07:50:19 +02:00
|
|
|
std::system_category(),
|
|
|
|
"Can not allocate space in temporary file");
|
|
|
|
}
|
|
|
|
|
|
|
|
m_file_size = new_size;
|
2022-06-25 13:10:06 +02:00
|
|
|
|
|
|
|
// Delete the temporary file.
|
|
|
|
// Since the file is still open, it will continue to exist on disk
|
|
|
|
// until the file handle is closed.
|
|
|
|
if (unlink(filename_buf.data()) != 0) {
|
|
|
|
throw std::system_error(
|
|
|
|
errno,
|
|
|
|
std::system_category(),
|
|
|
|
"Can not delete temporary file");
|
|
|
|
}
|
2022-06-21 07:50:19 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Read binary records from an input file with buffering.
|
|
|
|
*
|
2022-06-26 01:14:19 +02:00
|
|
|
* The input stream reads from a sequence of discontinuous blocks in
|
|
|
|
* the input file. Each block contains a flat array of binary records.
|
|
|
|
* An explicit list of these blocks is passed to the stream constructor.
|
2022-06-21 07:50:19 +02:00
|
|
|
*
|
2022-06-22 22:17:14 +02:00
|
|
|
* The input stream starts in the "empty" state.
|
|
|
|
* The first call to "next_block()" enables reading records from the
|
|
|
|
* first input block. When the input stream reaches the end of the
|
|
|
|
* current block, it again enters the "empty" state until the following
|
|
|
|
* call to "next_block()".
|
2022-06-21 07:50:19 +02:00
|
|
|
*/
|
|
|
|
class RecordInputStream
|
|
|
|
{
|
2022-06-22 22:17:14 +02:00
|
|
|
// TODO : double-buffering with delayed I/O via background thread
|
2022-06-21 07:50:19 +02:00
|
|
|
public:
|
2022-06-26 01:14:19 +02:00
|
|
|
typedef std::vector<std::tuple<uint64_t, uint64_t>> BlockList;
|
|
|
|
|
2022-06-21 07:50:19 +02:00
|
|
|
/**
|
|
|
|
* Construct a record input stream.
|
|
|
|
*
|
|
|
|
* The stream will initially contain no input sections.
|
|
|
|
*
|
2022-06-22 22:17:14 +02:00
|
|
|
* @param input_file Input file where records read from.
|
|
|
|
* @param record_size Record size in bytes.
|
2022-06-26 01:14:19 +02:00
|
|
|
* @param blocks Vector of input blocks specified as tuple
|
|
|
|
* (file_offset, number_of_records).
|
2022-06-22 22:17:14 +02:00
|
|
|
* @param buffer_size Buffer size in bytes.
|
|
|
|
* Must be a multiple of "record_size".
|
|
|
|
* Note: Each RecordInputStream creates two buffers
|
|
|
|
* of the specified size.
|
2022-06-21 07:50:19 +02:00
|
|
|
*/
|
|
|
|
RecordInputStream(
|
|
|
|
BinaryFile& input_file,
|
|
|
|
unsigned int record_size,
|
2022-06-26 01:14:19 +02:00
|
|
|
BlockList&& blocks,
|
2022-06-21 07:50:19 +02:00
|
|
|
size_t buffer_size)
|
|
|
|
: m_input_file(input_file),
|
|
|
|
m_record_size(record_size),
|
2022-06-26 01:14:19 +02:00
|
|
|
m_next_block(0),
|
2022-06-22 22:17:14 +02:00
|
|
|
m_block_remaining(0),
|
2022-06-21 07:50:19 +02:00
|
|
|
m_file_offset(0),
|
|
|
|
m_bufpos(NULL),
|
|
|
|
m_bufend(NULL),
|
2022-06-26 01:14:19 +02:00
|
|
|
m_blocks(blocks),
|
2022-06-21 07:50:19 +02:00
|
|
|
m_buffer(buffer_size)
|
|
|
|
{
|
|
|
|
assert(buffer_size % record_size == 0);
|
|
|
|
assert(buffer_size > record_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Prevent copying and assignment.
|
|
|
|
RecordInputStream(const RecordInputStream&) = delete;
|
|
|
|
RecordInputStream& operator=(const RecordInputStream&) = delete;
|
|
|
|
|
2022-06-22 22:17:14 +02:00
|
|
|
/** Return true if the end of the current input block is reached. */
|
|
|
|
inline bool empty() const
|
2022-06-21 07:50:19 +02:00
|
|
|
{
|
|
|
|
return (m_bufpos == m_bufend);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return a pointer to the current binary record.
|
|
|
|
*
|
|
|
|
* This function must only be used if "end_of_stream()" returns false.
|
|
|
|
* The returned pointer becomes invalid after a call to "next_record()".
|
|
|
|
*/
|
2022-06-22 22:17:14 +02:00
|
|
|
inline const unsigned char * record() const
|
2022-06-21 07:50:19 +02:00
|
|
|
{
|
|
|
|
return m_bufpos;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Move to the next record of the current section.
|
|
|
|
*
|
2022-06-22 22:17:14 +02:00
|
|
|
* This function must only be used if "empty()" returns false.
|
2022-06-21 07:50:19 +02:00
|
|
|
* Calling this function invalidates all pointers previously returned
|
|
|
|
* by "current_record()".
|
|
|
|
*/
|
|
|
|
inline void next_record()
|
|
|
|
{
|
|
|
|
assert(m_bufpos != m_bufend);
|
|
|
|
m_bufpos += m_record_size;
|
|
|
|
|
|
|
|
if (m_bufpos == m_bufend) {
|
|
|
|
refill_buffer();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Start reading from the next input section.
|
|
|
|
*
|
2022-06-22 22:17:14 +02:00
|
|
|
* This function may only be called if "empty()" returns true.
|
2022-06-21 07:50:19 +02:00
|
|
|
*/
|
2022-06-22 22:17:14 +02:00
|
|
|
void next_block()
|
2022-06-21 07:50:19 +02:00
|
|
|
{
|
|
|
|
assert(m_bufpos == m_bufend);
|
2022-06-26 01:14:19 +02:00
|
|
|
assert(m_next_block < m_blocks.size());
|
2022-06-21 07:50:19 +02:00
|
|
|
|
2022-06-26 01:14:19 +02:00
|
|
|
uint64_t num_records;
|
|
|
|
std::tie(m_file_offset, num_records) = m_blocks[m_next_block];
|
|
|
|
m_block_remaining = num_records * m_record_size;
|
|
|
|
m_next_block++;
|
2022-06-21 07:50:19 +02:00
|
|
|
|
2022-06-26 01:14:19 +02:00
|
|
|
uint64_t file_size = m_input_file.size();
|
|
|
|
assert(m_file_offset <= file_size);
|
|
|
|
assert(m_block_remaining <= file_size - m_file_offset);
|
2022-06-24 16:31:09 +02:00
|
|
|
|
2022-06-21 07:50:19 +02:00
|
|
|
refill_buffer();
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
/** Refill the buffer from the current input section. */
|
|
|
|
void refill_buffer()
|
|
|
|
{
|
2022-06-22 22:17:14 +02:00
|
|
|
if (m_block_remaining > 0) {
|
|
|
|
size_t transfer_size =
|
|
|
|
(m_buffer.size() < m_block_remaining) ?
|
|
|
|
m_buffer.size() : m_block_remaining;
|
|
|
|
m_input_file.read(m_buffer.data(), m_file_offset, transfer_size);
|
|
|
|
m_file_offset += transfer_size;
|
|
|
|
m_block_remaining -= transfer_size;
|
2022-06-24 16:31:09 +02:00
|
|
|
m_bufpos = m_buffer.data();
|
2022-06-22 22:17:14 +02:00
|
|
|
m_bufend = m_buffer.data() + transfer_size;
|
2022-06-21 07:50:19 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
BinaryFile& m_input_file;
|
|
|
|
const unsigned int m_record_size;
|
2022-06-26 01:14:19 +02:00
|
|
|
size_t m_next_block;
|
2022-06-22 22:17:14 +02:00
|
|
|
uint64_t m_block_remaining;
|
2022-06-21 07:50:19 +02:00
|
|
|
uint64_t m_file_offset;
|
|
|
|
unsigned char * m_bufpos;
|
|
|
|
unsigned char * m_bufend;
|
2022-06-26 01:14:19 +02:00
|
|
|
BlockList m_blocks;
|
2022-06-21 07:50:19 +02:00
|
|
|
std::vector<unsigned char> m_buffer;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Write binary records to an output file with buffering.
|
|
|
|
*/
|
|
|
|
class RecordOutputStream
|
|
|
|
{
|
2022-06-22 22:17:14 +02:00
|
|
|
// TODO : double-buffering with delayed I/O via background thread
|
2022-06-21 07:50:19 +02:00
|
|
|
public:
|
|
|
|
/**
|
|
|
|
* Construct a record output stream.
|
|
|
|
*
|
|
|
|
* @param output_file Output file where records are written.
|
|
|
|
* @param record_size Record size in bytes.
|
|
|
|
* @param file_offset Start offset in the output file.
|
|
|
|
* @param buffer_size Buffer size in bytes.
|
|
|
|
* Note: Each RecordOutputStream creates two buffers
|
|
|
|
* of the specified size.
|
|
|
|
*/
|
|
|
|
RecordOutputStream(
|
|
|
|
BinaryFile& output_file,
|
|
|
|
unsigned int record_size,
|
|
|
|
uint64_t file_offset,
|
|
|
|
size_t buffer_size)
|
|
|
|
: m_output_file(output_file),
|
|
|
|
m_record_size(record_size),
|
|
|
|
m_file_offset(file_offset),
|
|
|
|
m_buffer(buffer_size)
|
2022-06-24 16:31:09 +02:00
|
|
|
{
|
|
|
|
m_bufpos = m_buffer.data();
|
|
|
|
m_bufend = m_buffer.data() + m_buffer.size();
|
|
|
|
}
|
2022-06-21 07:50:19 +02:00
|
|
|
|
|
|
|
// Prevent copying and assignment.
|
|
|
|
RecordOutputStream(const RecordOutputStream&) = delete;
|
|
|
|
RecordOutputStream& operator=(const RecordOutputStream&) = delete;
|
|
|
|
|
|
|
|
/** Append a record to the output stream. */
|
|
|
|
inline void put(const unsigned char *record)
|
|
|
|
{
|
2022-06-24 16:31:09 +02:00
|
|
|
if (m_record_size > m_bufend - m_bufpos) {
|
2022-06-21 07:50:19 +02:00
|
|
|
flush();
|
|
|
|
}
|
2022-06-24 16:31:09 +02:00
|
|
|
memcpy(m_bufpos, record, m_record_size);
|
|
|
|
m_bufpos += m_record_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Return the current file offset. Flush before calling this function. */
|
|
|
|
inline uint64_t file_offset() const
|
|
|
|
{
|
|
|
|
assert(m_bufpos == m_buffer.data());
|
|
|
|
return m_file_offset;
|
2022-06-21 07:50:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/** Flush buffered records to the output file. */
|
|
|
|
void flush()
|
|
|
|
{
|
2022-06-24 16:31:09 +02:00
|
|
|
size_t flush_size = m_bufpos - m_buffer.data();
|
|
|
|
if (flush_size > 0) {
|
|
|
|
m_output_file.write(m_buffer.data(), m_file_offset, flush_size);
|
|
|
|
m_file_offset += flush_size;
|
|
|
|
m_bufpos = m_buffer.data();
|
2022-06-21 07:50:19 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
BinaryFile& m_output_file;
|
|
|
|
const unsigned int m_record_size;
|
|
|
|
uint64_t m_file_offset;
|
2022-06-24 16:31:09 +02:00
|
|
|
unsigned char * m_bufpos;
|
|
|
|
unsigned char * m_bufend;
|
2022-06-21 07:50:19 +02:00
|
|
|
std::vector<unsigned char> m_buffer;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/** Compare two records. */
|
|
|
|
#define record_compare(_a, _b, _n) (memcmp(_a, _b, _n))
|
|
|
|
|
|
|
|
/** Copy a record. */
|
|
|
|
#define record_copy(_dst, _src, _n) (memcpy(_dst, _src, _n))
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Swap two records.
|
|
|
|
*/
|
|
|
|
inline void record_swap(
|
|
|
|
unsigned char *a,
|
|
|
|
unsigned char *b,
|
|
|
|
size_t record_size)
|
|
|
|
{
|
|
|
|
while (record_size > 0) {
|
|
|
|
unsigned char aa = *a;
|
|
|
|
unsigned char bb = *b;
|
|
|
|
*b = aa;
|
|
|
|
*a = bb;
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
record_size--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/** Return the index of the parent of the specified heap node. */
|
|
|
|
inline size_t heap_parent_index(size_t node_index)
|
|
|
|
{
|
|
|
|
return (node_index - 1) / 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/** Return the index of the left child of the specified heap node. */
|
|
|
|
inline size_t heap_left_child_index(size_t node_index)
|
|
|
|
{
|
|
|
|
return node_index * 2 + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Insert a node into an empty spot in the heap, then repair the sub-heap
|
|
|
|
* rooted at that position.
|
|
|
|
*/
|
|
|
|
void heap_sift_down_records(
|
|
|
|
unsigned char * buffer,
|
|
|
|
size_t record_size,
|
|
|
|
size_t num_records,
|
|
|
|
size_t insert_index,
|
|
|
|
const unsigned char * insert_value)
|
|
|
|
{
|
|
|
|
// Find the first node index which does not have two child nodes.
|
|
|
|
size_t parent_end = heap_parent_index(num_records);
|
|
|
|
|
|
|
|
// Move the empty spot all the way down through the sub-heap.
|
|
|
|
size_t cur_idx = insert_index;
|
|
|
|
while (cur_idx < parent_end) {
|
|
|
|
|
|
|
|
// Find the left child node of the current node.
|
|
|
|
size_t child_idx = heap_left_child_index(cur_idx);
|
|
|
|
unsigned char * child_ptr = buffer + child_idx * record_size;
|
|
|
|
|
|
|
|
// Compare the two child nodes.
|
|
|
|
if (record_compare(child_ptr, child_ptr + record_size, record_size)
|
|
|
|
< 0) {
|
|
|
|
// Right child is greater, choose that one.
|
|
|
|
child_idx += 1;
|
|
|
|
child_ptr += record_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Move the chosen child to the empty spot.
|
|
|
|
unsigned char * cur_ptr = buffer + cur_idx * record_size;
|
|
|
|
record_copy(cur_ptr, child_ptr, record_size);
|
|
|
|
|
|
|
|
// Continue the scan from the (now empty) child spot.
|
|
|
|
cur_idx = child_idx;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the empty spot has a left child, swap it with the empty spot.
|
|
|
|
if (num_records > 1 && cur_idx <= heap_parent_index(num_records - 1)) {
|
|
|
|
size_t child_idx = heap_left_child_index(cur_idx);
|
|
|
|
unsigned char * cur_ptr = buffer + cur_idx * record_size;
|
|
|
|
unsigned char * child_ptr = buffer + child_idx * record_size;
|
|
|
|
record_copy(cur_ptr, child_ptr, record_size);
|
|
|
|
cur_idx = child_idx;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The empty spot is now in a leaf node of the heap.
|
|
|
|
// Scan back up to find the right place to insert the new node.
|
|
|
|
//
|
|
|
|
// Going all the way down and then back up may seem wasteful,
|
|
|
|
// but it is faster in practice because the correct insertion spot
|
|
|
|
// is likely to be in the bottom of the heap.
|
|
|
|
|
|
|
|
while (cur_idx > insert_index) {
|
|
|
|
|
|
|
|
// Find parent of the empty spot.
|
|
|
|
size_t parent_idx = heap_parent_index(cur_idx);
|
|
|
|
unsigned char * parent_ptr = buffer + parent_idx * record_size;
|
|
|
|
|
|
|
|
// Compare the new value to the parent value.
|
|
|
|
if (record_compare(parent_ptr, insert_value, record_size) >= 0) {
|
|
|
|
// We found the right spot to insert the new value.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Move the parent back to the empty spot.
|
|
|
|
unsigned char * cur_ptr = buffer + cur_idx * record_size;
|
|
|
|
record_copy(cur_ptr, parent_ptr, record_size);
|
|
|
|
|
|
|
|
// Move to te parent node.
|
|
|
|
cur_idx = parent_idx;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert the new node at the empty position.
|
|
|
|
unsigned char * cur_ptr = buffer + cur_idx * record_size;
|
|
|
|
record_copy(cur_ptr, insert_value, record_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Sort an array of records using in-place heap sort.
|
|
|
|
*
|
|
|
|
* Run time: O(N * log(N))
|
|
|
|
* Extra space: O(1)
|
|
|
|
*/
|
|
|
|
void heap_sort_records(
|
|
|
|
unsigned char * buffer,
|
|
|
|
size_t record_size,
|
|
|
|
size_t num_records)
|
|
|
|
{
|
|
|
|
// Skip trivial cases.
|
|
|
|
if (num_records < 2) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Allocate temporary space for one record.
|
|
|
|
std::vector<unsigned char> temp(record_size);
|
|
|
|
|
|
|
|
//
|
|
|
|
// Phase 1: Transform the unordered array into a max-heap.
|
|
|
|
//
|
|
|
|
|
|
|
|
// Bottom-up loop over all non-trivial sub-heaps.
|
|
|
|
// Start with the parent of the last node.
|
|
|
|
size_t cur_idx = heap_parent_index(num_records - 1);
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
|
|
|
// Remove the current node from the heap.
|
|
|
|
unsigned char * cur_ptr = buffer + cur_idx * record_size;
|
|
|
|
record_copy(temp.data(), cur_ptr, record_size);
|
|
|
|
|
|
|
|
// Re-insert the node and repair the sub-heap rooted at this node.
|
|
|
|
heap_sift_down_records(
|
|
|
|
buffer,
|
|
|
|
record_size,
|
|
|
|
num_records,
|
|
|
|
cur_idx,
|
|
|
|
temp.data());
|
|
|
|
|
|
|
|
// Stop after processing the root node.
|
|
|
|
if (cur_idx == 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Go do the next sub-heap.
|
|
|
|
cur_idx--;
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// Phase 2: Transform the max-heap into a sorted array.
|
|
|
|
//
|
|
|
|
|
|
|
|
// Loop over the record array from back to front.
|
|
|
|
cur_idx = num_records - 1;
|
|
|
|
while (cur_idx > 0) {
|
|
|
|
|
|
|
|
// Remove the root node from the heap.
|
|
|
|
// This is the largest remaining element, which belongs at index CUR
|
|
|
|
// in the sorted array.
|
|
|
|
record_copy(temp.data(), buffer, record_size);
|
|
|
|
|
|
|
|
// Remove the node at index CUR from the heap.
|
|
|
|
// This reduces the size of the heap by 1.
|
|
|
|
// Re-insert the removed node as the root node and repair the heap.
|
|
|
|
unsigned char * cur_ptr = buffer + cur_idx * record_size;
|
|
|
|
heap_sift_down_records(buffer, record_size, cur_idx, 0, cur_ptr);
|
|
|
|
|
|
|
|
// Copy the former root node to its position in the sorted array.
|
|
|
|
record_copy(cur_ptr, temp.data(), record_size);
|
|
|
|
|
|
|
|
// Go to next element.
|
|
|
|
cur_idx--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Sort an array of records using in-place insertion sort.
|
|
|
|
*
|
|
|
|
* This is a helper function for quicksort_records().
|
|
|
|
*/
|
|
|
|
void insertion_sort_records(
|
|
|
|
unsigned char * buffer,
|
|
|
|
size_t record_size,
|
|
|
|
size_t num_records)
|
|
|
|
{
|
|
|
|
// Allocate temporary space for one record.
|
|
|
|
std::vector<unsigned char> temp(record_size);
|
|
|
|
|
|
|
|
for (size_t cur_idx = 1; cur_idx < num_records; cur_idx++) {
|
|
|
|
// The partial array 0 .. (cur_idx - 1) is already sorted.
|
|
|
|
// We will now insert cur_idx into the sorted array.
|
|
|
|
|
|
|
|
// Quick check whether the new record is already in the right place.
|
|
|
|
unsigned char * cur_ptr = buffer + cur_idx * record_size;
|
|
|
|
unsigned char * prev_ptr = cur_ptr - record_size;
|
|
|
|
if (record_compare(cur_ptr, prev_ptr, record_size) >= 0) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Scan backwards through the sorted array to find the right place
|
|
|
|
// to insert the new record.
|
|
|
|
unsigned char * insert_ptr = prev_ptr;
|
|
|
|
while (insert_ptr > buffer) {
|
|
|
|
|
|
|
|
prev_ptr = insert_ptr - record_size;
|
|
|
|
if (record_compare(cur_ptr, prev_ptr, record_size) >= 0) {
|
|
|
|
// Found the right place to insert the new record.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Continue backwards scan.
|
|
|
|
insert_ptr = prev_ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copy the new record to temporary storage.
|
|
|
|
record_copy(temp.data(), cur_ptr, record_size);
|
|
|
|
|
|
|
|
// Move sorted records to make space for the new record.
|
|
|
|
memmove(insert_ptr + record_size, insert_ptr, cur_ptr - insert_ptr);
|
|
|
|
|
|
|
|
// Copy the new record to its place.
|
|
|
|
record_copy(insert_ptr, temp.data(), record_size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-06-26 20:51:35 +02:00
|
|
|
/**
|
|
|
|
* Partition the array into two parts.
|
|
|
|
*
|
|
|
|
* This is a helper function for quicksort_records().
|
|
|
|
*
|
|
|
|
* After partitioning, all of the "num_left" first records will be
|
|
|
|
* less-than-or-equal to all of the "num_right" last records.
|
|
|
|
*
|
|
|
|
* After partitioning, either
|
|
|
|
* num_left + num_right == num_records
|
|
|
|
* or
|
|
|
|
* num_left + num_right == num_records - 1
|
|
|
|
*
|
|
|
|
* In the second case, the record in the middle is already in its final
|
|
|
|
* position in the array.
|
|
|
|
*
|
|
|
|
* @param range_start Pointer to first element of the array.
|
|
|
|
* @param record_size Record size in bytes.
|
|
|
|
* @param num_records Number of records in the array.
|
|
|
|
* @param[out] num_left Number of records in the left half.
|
|
|
|
* @param[out] num_right Number of records in the right half.
|
|
|
|
*/
|
|
|
|
inline void quicksort_partition_records(
|
|
|
|
unsigned char * range_begin,
|
|
|
|
size_t record_size,
|
|
|
|
size_t num_records,
|
|
|
|
size_t& num_left,
|
|
|
|
size_t& num_right)
|
|
|
|
{
|
|
|
|
// Initialize pointers to start, end and middle of range.
|
|
|
|
unsigned char * left_ptr = range_begin;
|
|
|
|
unsigned char * right_ptr = range_begin + (num_records - 1) * record_size;
|
|
|
|
unsigned char * pivot_ptr = range_begin + (num_records / 2) * record_size;
|
|
|
|
|
|
|
|
// Sort the first, middle and last records such that they are
|
|
|
|
// in proper order with respect to each other.
|
|
|
|
if (record_compare(pivot_ptr, left_ptr, record_size) < 0) {
|
|
|
|
record_swap(left_ptr, pivot_ptr, record_size);
|
|
|
|
}
|
|
|
|
if (record_compare(right_ptr, pivot_ptr, record_size) < 0) {
|
|
|
|
record_swap(pivot_ptr, right_ptr, record_size);
|
|
|
|
if (record_compare(pivot_ptr, left_ptr, record_size) < 0) {
|
|
|
|
record_swap(left_ptr, pivot_ptr, record_size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// The median of the three records we examined is now in the
|
|
|
|
// middle of the range, pointed to by pivot_ptr.
|
|
|
|
// This is not necessarily the final location of that element.
|
|
|
|
|
|
|
|
// The first and last record of the range are now on the proper
|
|
|
|
// side of the partition. No need to examine them again.
|
|
|
|
left_ptr += record_size;
|
|
|
|
right_ptr -= record_size;
|
|
|
|
|
|
|
|
// Partition the rest of the array based on comparing to the pivot.
|
|
|
|
while (true) {
|
|
|
|
|
|
|
|
// Skip left-side records that are less than the pivot.
|
|
|
|
while (record_compare(left_ptr, pivot_ptr, record_size) < 0) {
|
|
|
|
left_ptr += record_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Skip right-side records that are greater than the pivot.
|
|
|
|
while (record_compare(pivot_ptr, right_ptr, record_size) < 0) {
|
|
|
|
right_ptr -= record_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Stop when the pointers meet.
|
|
|
|
if (left_ptr >= right_ptr) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Swap the records that are on the wrong sides.
|
|
|
|
record_swap(left_ptr, right_ptr, record_size);
|
|
|
|
|
|
|
|
// If we moved the pivot, update its pointer so it keeps
|
|
|
|
// pointing to the pivot value.
|
|
|
|
if (pivot_ptr == left_ptr) {
|
|
|
|
pivot_ptr = right_ptr;
|
|
|
|
} else if (pivot_ptr == right_ptr) {
|
|
|
|
pivot_ptr = left_ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Do not compare the swapped elements again.
|
|
|
|
left_ptr += record_size;
|
|
|
|
right_ptr -= record_size;
|
|
|
|
|
|
|
|
// Stop when pointers cross.
|
|
|
|
// (Pointers equal is not good enough at this point, because
|
|
|
|
// we won't know on which side the pointed record belongs.)
|
|
|
|
if (left_ptr > right_ptr) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If pointers are equal, they must both be pointing to a pivot.
|
|
|
|
// Bump both pointers so they correctly delineate the new
|
|
|
|
// subranges. The record where the pointers meet is already in
|
|
|
|
// its final position.
|
|
|
|
if (left_ptr == right_ptr) {
|
|
|
|
left_ptr += record_size;
|
|
|
|
right_ptr -= record_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Determine the number of elements in the left and right subranges.
|
|
|
|
num_left = (right_ptr + record_size - range_begin) / record_size;
|
|
|
|
num_right = num_records - (left_ptr - range_begin) / record_size;
|
|
|
|
}
|
2022-06-21 07:50:19 +02:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Sort an array of records using in-place quicksort.
|
|
|
|
*
|
|
|
|
* Run time: O(N * log(N))
|
|
|
|
* Extra space: O(log(N))
|
|
|
|
*
|
|
|
|
* Plain quicksort is known to have quadratic worst-case behaviour.
|
|
|
|
* This implementation uses median-of-three partitioning to reduce
|
|
|
|
* the probability of worst-case performance. If bad performance does
|
|
|
|
* occur, this implementation detects it and switches to heap sort.
|
|
|
|
*/
|
|
|
|
void quicksort_records(
|
|
|
|
unsigned char * buffer,
|
|
|
|
size_t record_size,
|
|
|
|
size_t num_records)
|
|
|
|
{
|
|
|
|
// Recursive partitioning is only applied to fragments larger than
|
|
|
|
// this threshold. The rest of the work will be done by insertion sort.
|
|
|
|
const size_t insertion_sort_threshold = 12;
|
|
|
|
|
|
|
|
// Skip trivial cases.
|
|
|
|
if (num_records < 2) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Determine maximum acceptable recursion depth.
|
|
|
|
unsigned int depth_limit = 1;
|
|
|
|
for (size_t nremain = num_records; nremain > 1; nremain >>= 1) {
|
|
|
|
depth_limit += 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Allocate stack.
|
|
|
|
std::vector<std::tuple<unsigned char *, size_t, unsigned int>> stack;
|
|
|
|
stack.reserve(depth_limit);
|
|
|
|
|
|
|
|
// Prepare recursive partitioning of the entire array.
|
|
|
|
if (num_records > insertion_sort_threshold) {
|
|
|
|
stack.emplace_back(buffer, num_records, depth_limit);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Execute recursive partitioning.
|
|
|
|
while (!stack.empty()) {
|
|
|
|
|
|
|
|
// Pop a range from the stack.
|
|
|
|
unsigned char * range_begin;
|
|
|
|
size_t range_num_records;
|
|
|
|
std::tie(range_begin, range_num_records, depth_limit) = stack.back();
|
|
|
|
stack.pop_back();
|
|
|
|
|
|
|
|
// Check recursion depth. Switch to heap sort if we get to deep.
|
|
|
|
if (depth_limit == 0) {
|
|
|
|
heap_sort_records(range_begin, record_size, range_num_records);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2022-06-26 20:51:35 +02:00
|
|
|
// Partition the array into two parts.
|
|
|
|
size_t num_left, num_right;
|
|
|
|
quicksort_partition_records(
|
|
|
|
range_begin,
|
|
|
|
record_size,
|
|
|
|
range_num_records,
|
|
|
|
num_left,
|
|
|
|
num_right);
|
2022-06-21 07:50:19 +02:00
|
|
|
|
|
|
|
// Push left subrange on the stack, if it meets the size threshold.
|
|
|
|
if (num_left > insertion_sort_threshold) {
|
|
|
|
stack.emplace_back(range_begin, num_left, depth_limit - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Push right subrange on the stack, if it meets the size threshold.
|
|
|
|
if (num_right > insertion_sort_threshold) {
|
2022-06-26 20:51:35 +02:00
|
|
|
unsigned char * right_half =
|
|
|
|
range_begin + (range_num_records - num_right) * record_size;
|
|
|
|
stack.emplace_back(right_half, num_right, depth_limit - 1);
|
2022-06-21 07:50:19 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Recursive partitining finished.
|
|
|
|
// The array is now roughly sorted, except for subranges that were
|
|
|
|
// skipped because they are within the threshold.
|
|
|
|
|
|
|
|
// Finish with insertion sort to get a fully sorted array.
|
|
|
|
insertion_sort_records(buffer, record_size, num_records);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-06-26 20:51:35 +02:00
|
|
|
/** Helper function for quicksort_records_parallel(). */
|
|
|
|
void quicksort_records_parallel_step(
|
|
|
|
unsigned char * range_begin,
|
|
|
|
size_t record_size,
|
|
|
|
size_t num_records,
|
|
|
|
size_t parallel_size_threshold,
|
|
|
|
unsigned int parallel_depth_limit,
|
|
|
|
ThreadPool * thread_pool,
|
|
|
|
CompletionTracker * completion_tracker)
|
|
|
|
{
|
|
|
|
while (true) {
|
|
|
|
|
|
|
|
// If the range is below threshold, or recursion is too deep,
|
|
|
|
// handle this part of the array entirely in this thread.
|
|
|
|
if (num_records <= parallel_size_threshold
|
|
|
|
|| parallel_depth_limit == 0) {
|
|
|
|
quicksort_records(range_begin, record_size, num_records);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
parallel_depth_limit--;
|
|
|
|
|
|
|
|
// Partition the array into two parts.
|
|
|
|
size_t num_left, num_right;
|
|
|
|
quicksort_partition_records(
|
|
|
|
range_begin,
|
|
|
|
record_size,
|
|
|
|
num_records,
|
|
|
|
num_left,
|
|
|
|
num_right);
|
|
|
|
|
|
|
|
// Submit the largest of the two subranges to the thread pool.
|
|
|
|
// We will handle the other subrange within this thread.
|
|
|
|
unsigned char * right_half =
|
|
|
|
range_begin + (num_records - num_right) * record_size;
|
|
|
|
if (num_left >= num_right) {
|
|
|
|
|
|
|
|
// Submit left subrange.
|
|
|
|
completion_tracker->add(
|
|
|
|
thread_pool->submit(
|
|
|
|
std::bind(
|
|
|
|
quicksort_records_parallel_step,
|
|
|
|
range_begin,
|
|
|
|
record_size,
|
|
|
|
num_left,
|
|
|
|
parallel_size_threshold,
|
|
|
|
parallel_depth_limit,
|
|
|
|
thread_pool,
|
|
|
|
completion_tracker)));
|
|
|
|
|
|
|
|
// Continue with right subrange in this thread.
|
|
|
|
range_begin = right_half;
|
|
|
|
num_records = num_right;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
// Submit right subrange.
|
|
|
|
completion_tracker->add(
|
|
|
|
thread_pool->submit(
|
|
|
|
std::bind(
|
|
|
|
quicksort_records_parallel_step,
|
|
|
|
right_half,
|
|
|
|
record_size,
|
|
|
|
num_right,
|
|
|
|
parallel_size_threshold,
|
|
|
|
parallel_depth_limit,
|
|
|
|
thread_pool,
|
|
|
|
completion_tracker)));
|
|
|
|
|
|
|
|
// Continue with left subrange in this thread.
|
|
|
|
num_records = num_left;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Sort an array of records using in-place quicksort.
|
|
|
|
*
|
|
|
|
* Use multiple threads to parallelize the sort process.
|
|
|
|
*/
|
|
|
|
void quicksort_records_parallel(
|
|
|
|
unsigned char * buffer,
|
|
|
|
size_t record_size,
|
|
|
|
size_t num_records,
|
|
|
|
unsigned int num_threads,
|
|
|
|
ThreadPool * thread_pool)
|
|
|
|
{
|
|
|
|
// Small fragments should not be further distributed between threads.
|
|
|
|
size_t parallel_size_threshold =
|
|
|
|
std::max(size_t(1024), num_records / num_threads / 4);
|
|
|
|
|
|
|
|
// Stop parallel processing past a certain nesting depth.
|
|
|
|
// This is necessary to avoid quadratic run time.
|
|
|
|
unsigned int parallel_depth_limit = 2;
|
|
|
|
for (unsigned int nn = num_threads; nn > 1; nn >>= 1) {
|
|
|
|
parallel_depth_limit += 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Tracker to determine when all sort tasks have finished.
|
|
|
|
CompletionTracker completion_tracker;
|
|
|
|
|
|
|
|
// Submit the full array to the thread pool.
|
|
|
|
completion_tracker.add(
|
|
|
|
thread_pool->submit(
|
|
|
|
std::bind(
|
|
|
|
quicksort_records_parallel_step,
|
|
|
|
buffer,
|
|
|
|
record_size,
|
|
|
|
num_records,
|
|
|
|
parallel_size_threshold,
|
|
|
|
parallel_depth_limit,
|
|
|
|
thread_pool,
|
|
|
|
&completion_tracker)));
|
|
|
|
|
|
|
|
// Wait until all sort tasks have finished.
|
|
|
|
completion_tracker.wait();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-06-21 07:50:19 +02:00
|
|
|
/** Sort the specified block of records (in-place). */
|
|
|
|
void sort_records(
|
|
|
|
unsigned char * buffer,
|
|
|
|
size_t record_size,
|
|
|
|
size_t num_records)
|
|
|
|
{
|
|
|
|
// TODO : multi-threaded quicksort
|
|
|
|
|
|
|
|
// heap_sort_records(buffer, record_size, num_records);
|
|
|
|
quicksort_records(buffer, record_size, num_records);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Remove duplicate records from an already sorted array.
|
|
|
|
*
|
|
|
|
* @return the number of unique records
|
|
|
|
*/
|
|
|
|
size_t filter_duplicate_records(
|
|
|
|
unsigned char * buffer,
|
|
|
|
unsigned int record_size,
|
|
|
|
size_t num_records)
|
|
|
|
{
|
|
|
|
// Special case for 0 or 1 records.
|
|
|
|
if (num_records < 2) {
|
|
|
|
return num_records;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Find the first duplicate record.
|
|
|
|
unsigned char * last_unique = buffer;
|
|
|
|
unsigned char * next_record = buffer + record_size;
|
|
|
|
size_t next_pos = 1;
|
|
|
|
|
|
|
|
while (next_pos < num_records) {
|
|
|
|
if (memcmp(last_unique, next_record, record_size) == 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
last_unique = next_record;
|
|
|
|
next_record += record_size;
|
|
|
|
next_pos++;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Scan the rest of the records and copy unique records.
|
|
|
|
size_t num_unique = next_pos;
|
|
|
|
while (next_pos < num_records) {
|
|
|
|
if (memcmp(last_unique, next_record, record_size) != 0) {
|
|
|
|
num_unique++;
|
|
|
|
last_unique += record_size;
|
|
|
|
memcpy(last_unique, next_record, record_size);
|
|
|
|
}
|
|
|
|
next_record += record_size;
|
|
|
|
next_pos++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return num_unique;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Sort the whole file in a single pass.
|
|
|
|
*
|
|
|
|
* This function is used only when the input file fits in memory.
|
|
|
|
*/
|
|
|
|
void single_pass(
|
|
|
|
BinaryFile& input_file,
|
|
|
|
BinaryFile& output_file,
|
|
|
|
const SortContext& ctx)
|
|
|
|
{
|
|
|
|
assert(input_file.size() < SIZE_MAX);
|
|
|
|
|
|
|
|
size_t input_size = input_file.size();
|
|
|
|
size_t num_records = input_size / ctx.record_size;
|
|
|
|
|
|
|
|
log(ctx, "sorting %zu records in a single pass\n", num_records);
|
|
|
|
|
|
|
|
// Allocate memory.
|
|
|
|
log(ctx, "allocating memory\n");
|
|
|
|
std::vector<unsigned char> buffer(input_size);
|
|
|
|
|
|
|
|
Timer timer;
|
|
|
|
|
|
|
|
// Read input file into memory.
|
|
|
|
log(ctx, "reading input file\n");
|
|
|
|
timer.start();
|
|
|
|
input_file.read(buffer.data(), 0, input_size);
|
|
|
|
timer.stop();
|
|
|
|
log(ctx, " t = %.3f seconds\n", timer.value());
|
|
|
|
|
2022-06-26 20:51:35 +02:00
|
|
|
// Set up thread pool.
|
|
|
|
std::unique_ptr<ThreadPool> thread_pool;
|
|
|
|
if (ctx.num_threads > 1) {
|
|
|
|
log(ctx, "creating thread pool with %u threads\n", ctx.num_threads);
|
|
|
|
thread_pool.reset(new ThreadPool(ctx.num_threads));
|
|
|
|
}
|
2022-06-21 07:50:19 +02:00
|
|
|
|
|
|
|
// Sort records in memory buffer.
|
2022-06-26 20:51:35 +02:00
|
|
|
log(ctx, "sorting records using %u threads\n", ctx.num_threads);
|
2022-06-21 07:50:19 +02:00
|
|
|
timer.start();
|
2022-06-26 20:51:35 +02:00
|
|
|
if (ctx.num_threads > 1) {
|
|
|
|
quicksort_records_parallel(
|
|
|
|
buffer.data(),
|
|
|
|
ctx.record_size,
|
|
|
|
num_records,
|
|
|
|
ctx.num_threads,
|
|
|
|
thread_pool.get());
|
|
|
|
} else {
|
|
|
|
quicksort_records(buffer.data(), ctx.record_size, num_records);
|
|
|
|
}
|
2022-06-21 07:50:19 +02:00
|
|
|
timer.stop();
|
|
|
|
log(ctx, " t = %.3f seconds\n", timer.value());
|
|
|
|
|
|
|
|
if (ctx.flag_unique) {
|
|
|
|
// Eliminate duplicate records.
|
|
|
|
log(ctx, "filtering duplicate records\n");
|
|
|
|
timer.start();
|
|
|
|
num_records = filter_duplicate_records(
|
|
|
|
buffer.data(), ctx.record_size, num_records);
|
|
|
|
timer.stop();
|
|
|
|
log(ctx, " t = %.3f seconds\n", timer.value());
|
|
|
|
log(ctx, "found %zu unique records\n", num_records);
|
|
|
|
}
|
|
|
|
|
|
|
|
log(ctx, "writing output file\n");
|
|
|
|
timer.start();
|
|
|
|
|
|
|
|
// Shrink output file if duplicate records were removed.
|
|
|
|
uint64_t output_size = num_records * ctx.record_size;
|
|
|
|
if (output_size < input_size) {
|
|
|
|
output_file.truncate_file(output_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write memory buffer to output file.
|
|
|
|
output_file.write(buffer.data(), 0, output_size);
|
|
|
|
timer.stop();
|
|
|
|
log(ctx, " t = %.3f seconds\n", timer.value());
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Perform the initial sort pass of multi-pass external sorting.
|
|
|
|
*
|
|
|
|
* All blocks will have the specified number of records, except for
|
|
|
|
* the last block in the file which may be smaller.
|
|
|
|
*
|
|
|
|
* @param input_file Input file.
|
2022-06-26 01:14:19 +02:00
|
|
|
* @param output_file1 Output file for this pass.
|
|
|
|
* @param output_file2 Second output file for this pass.
|
2022-06-21 07:50:19 +02:00
|
|
|
* @param records_per_block Number of records per sort block.
|
|
|
|
* @param num_blocks Number of sort blocks.
|
2022-06-26 01:14:19 +02:00
|
|
|
* @param num_blocks_file1 Number of blocks for the first output file.
|
2022-06-21 07:50:19 +02:00
|
|
|
* @param ctx Reference to context structure.
|
|
|
|
*/
|
|
|
|
void sort_pass(
|
|
|
|
BinaryFile& input_file,
|
2022-06-26 01:14:19 +02:00
|
|
|
BinaryFile& output_file1,
|
|
|
|
BinaryFile& output_file2,
|
2022-06-21 07:50:19 +02:00
|
|
|
uint64_t records_per_block,
|
|
|
|
uint64_t num_blocks,
|
2022-06-26 01:14:19 +02:00
|
|
|
uint64_t num_blocks_file1,
|
2022-06-21 07:50:19 +02:00
|
|
|
const SortContext& ctx)
|
|
|
|
{
|
|
|
|
unsigned int record_size = ctx.record_size;
|
|
|
|
uint64_t file_size = input_file.size();
|
|
|
|
uint64_t num_records = file_size / record_size;
|
|
|
|
|
|
|
|
log(ctx, "running initial sort pass\n");
|
|
|
|
|
|
|
|
Timer timer;
|
|
|
|
timer.start();
|
|
|
|
|
|
|
|
// TODO : double-buffer with I/O in separate thread
|
|
|
|
// Allocate sort buffer.
|
|
|
|
assert(records_per_block < SIZE_MAX / record_size);
|
|
|
|
std::vector<unsigned char> buffer(records_per_block * record_size);
|
|
|
|
|
|
|
|
// TODO : multi-threaded sorting with thread pool
|
|
|
|
|
|
|
|
// Loop over blocks to be sorted.
|
|
|
|
for (uint64_t block_index = 0; block_index < num_blocks; block_index++) {
|
|
|
|
|
|
|
|
uint64_t first_record_idx = block_index * records_per_block;
|
|
|
|
size_t block_num_records =
|
|
|
|
std::min(records_per_block, num_records - first_record_idx);
|
|
|
|
|
|
|
|
log(ctx,
|
2022-06-26 01:14:19 +02:00
|
|
|
" sorting block %" PRIu64 " / %" PRIu64 ": %" PRIu64 " records\n",
|
2022-06-21 07:50:19 +02:00
|
|
|
block_index,
|
|
|
|
num_blocks,
|
|
|
|
block_num_records);
|
|
|
|
|
|
|
|
// Read block.
|
|
|
|
input_file.read(
|
|
|
|
buffer.data(),
|
|
|
|
first_record_idx * record_size,
|
|
|
|
block_num_records * record_size);
|
|
|
|
|
|
|
|
// Sort records in this block.
|
|
|
|
sort_records(
|
|
|
|
buffer.data(),
|
|
|
|
record_size,
|
|
|
|
block_num_records);
|
|
|
|
|
|
|
|
// Write block.
|
2022-06-26 01:14:19 +02:00
|
|
|
BinaryFile& output_file =
|
|
|
|
(block_index < num_blocks_file1) ? output_file1 : output_file2;
|
2022-06-21 07:50:19 +02:00
|
|
|
output_file.write(
|
|
|
|
buffer.data(),
|
|
|
|
first_record_idx * record_size,
|
|
|
|
block_num_records * record_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
timer.stop();
|
2022-06-26 01:14:19 +02:00
|
|
|
log(ctx, "end initial sort pass\n");
|
2022-06-21 07:50:19 +02:00
|
|
|
log(ctx, " t = %.3f seconds\n", timer.value());
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-06-22 22:17:14 +02:00
|
|
|
/**
|
|
|
|
* Merge sorted blocks of records into a single sorted block.
|
|
|
|
*
|
|
|
|
* @param input_streams One input stream for each input blocks.
|
|
|
|
* @param output_stream Output stream for the merged block.
|
|
|
|
* @param record_size Record size in bytes.
|
|
|
|
* @param branch_factor Number of input blocks.
|
|
|
|
* May be less than the length of input_streams.
|
|
|
|
* @param filter_dupl True to eliminate duplicate records.
|
|
|
|
*/
|
2022-06-26 01:14:19 +02:00
|
|
|
void merge_blocks(
|
2022-06-22 22:17:14 +02:00
|
|
|
std::vector<std::unique_ptr<RecordInputStream>>& input_streams,
|
|
|
|
RecordOutputStream& output_stream,
|
|
|
|
size_t record_size,
|
|
|
|
unsigned int branch_factor,
|
|
|
|
bool filter_dupl)
|
|
|
|
{
|
|
|
|
assert(branch_factor > 1);
|
|
|
|
assert(branch_factor <= input_streams.size());
|
|
|
|
|
|
|
|
// Put the head element of each block into a heap.
|
|
|
|
// The heap will determine which block contains the element that
|
|
|
|
// should go first in the merged block.
|
|
|
|
typedef std::tuple<const unsigned char*, RecordInputStream*> HeapElement;
|
|
|
|
|
|
|
|
// Function which compares records and returns true if
|
|
|
|
// record A comes after record B in sort order.
|
|
|
|
// If this function is used as the compare operator of a max-heap,
|
|
|
|
// the record that comes first in sort order will be at the top
|
|
|
|
// of the heap.
|
|
|
|
auto cmp_heap_elem =
|
|
|
|
[record_size](const HeapElement& a, const HeapElement& b) {
|
|
|
|
const unsigned char *reca = std::get<0>(a);
|
|
|
|
const unsigned char *recb = std::get<0>(b);
|
|
|
|
return record_compare(reca, recb, record_size) > 0;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Initialize empty heap.
|
|
|
|
std::vector<HeapElement> heap;
|
|
|
|
|
|
|
|
// Get the first element of each block.
|
|
|
|
for (unsigned int i = 0; i < branch_factor; i++) {
|
2022-06-22 22:45:09 +02:00
|
|
|
// Input blocks should not be empty.
|
|
|
|
assert(!input_streams[i]->empty());
|
|
|
|
heap.emplace_back(input_streams[i]->record(), input_streams[i].get());
|
2022-06-22 22:17:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Make a heap of the first blocks.
|
|
|
|
std::make_heap(heap.begin(), heap.end(), cmp_heap_elem);
|
|
|
|
|
2022-06-22 22:45:09 +02:00
|
|
|
// Allocate a temporary record for duplicate filtering.
|
|
|
|
std::vector<unsigned char> temp_record(record_size);
|
|
|
|
|
|
|
|
// The very first record can not be filtered out.
|
|
|
|
bool filter_first_pass = true;
|
|
|
|
|
2022-06-22 22:17:14 +02:00
|
|
|
// Keep merging until the heap runs empty.
|
|
|
|
while (!heap.empty()) {
|
|
|
|
|
|
|
|
// Extract the first element from the heap.
|
|
|
|
const unsigned char * rec;
|
|
|
|
RecordInputStream * instream;
|
|
|
|
std::tie(rec, instream) = heap[0];
|
2022-06-24 16:31:09 +02:00
|
|
|
std::pop_heap(heap.begin(), heap.end(), cmp_heap_elem);
|
2022-06-22 22:17:14 +02:00
|
|
|
|
2022-06-22 22:45:09 +02:00
|
|
|
if (filter_dupl) {
|
|
|
|
|
|
|
|
// Compare against previous record, only output if different.
|
|
|
|
if (filter_first_pass
|
|
|
|
|| record_compare(temp_record.data(), rec, record_size) != 0)
|
|
|
|
{
|
|
|
|
output_stream.put(rec);
|
|
|
|
record_copy(temp_record.data(), rec, record_size);
|
|
|
|
}
|
|
|
|
filter_first_pass = false;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
// No filtering, just push record to the output block.
|
|
|
|
output_stream.put(rec);
|
|
|
|
|
|
|
|
}
|
2022-06-22 22:17:14 +02:00
|
|
|
|
|
|
|
// Try to pull the next record from this input stream.
|
|
|
|
instream->next_record();
|
|
|
|
if (instream->empty()) {
|
|
|
|
// Stream is empty. This stream is now out of the game.
|
|
|
|
// The heap shrinks by 1 element.
|
|
|
|
heap.pop_back();
|
|
|
|
} else {
|
|
|
|
// Push next record from the stream into the heap.
|
|
|
|
heap.back() = std::make_tuple(instream->record(), instream);
|
2022-06-24 16:31:09 +02:00
|
|
|
std::push_heap(heap.begin(), heap.end(), cmp_heap_elem);
|
2022-06-22 22:17:14 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-06-21 07:50:19 +02:00
|
|
|
/**
|
|
|
|
* Perform a merge pass of multi-pass external sorting.
|
|
|
|
*
|
|
|
|
* All input blocks will have the specified number of records, except for
|
|
|
|
* the last block in the file which may be smaller.
|
|
|
|
*
|
|
|
|
* All output blocks will have (branch_factor * records_per_block) records,
|
|
|
|
* except for the last output block in the file which may be smaller.
|
|
|
|
*
|
|
|
|
* If "filter_duplicates" is specified, the number of output blocks MUST be 1.
|
|
|
|
* In this case duplicate elements will be removed from the output.
|
|
|
|
*
|
|
|
|
* @param input_file Input file for this pass.
|
|
|
|
* @param output_file Output file for this pass.
|
|
|
|
* @param records_per_block Number of records per input block.
|
|
|
|
* @param num_blocks Number of input blocks.
|
2022-06-22 22:17:14 +02:00
|
|
|
* @param branch_factor Number of blocks to merge per output block.
|
|
|
|
* @param filter_dupl True to eliminate duplicate records.
|
2022-06-21 07:50:19 +02:00
|
|
|
* @param ctx Reference to context structure.
|
|
|
|
*/
|
|
|
|
void merge_pass(
|
|
|
|
BinaryFile& input_file,
|
|
|
|
BinaryFile& output_file,
|
2022-06-26 01:14:19 +02:00
|
|
|
const SortStrategy::MergePass& merge_pass,
|
2022-06-22 22:17:14 +02:00
|
|
|
bool filter_dupl,
|
2022-06-21 07:50:19 +02:00
|
|
|
const SortContext& ctx)
|
|
|
|
{
|
2022-06-26 01:14:19 +02:00
|
|
|
size_t num_blocks = merge_pass.records_per_block.size();
|
2022-06-22 22:17:14 +02:00
|
|
|
|
|
|
|
// Only filter duplicates when the output is a single block.
|
2022-06-26 01:14:19 +02:00
|
|
|
assert((!filter_dupl) || num_blocks <= ctx.branch_factor);
|
2022-06-21 07:50:19 +02:00
|
|
|
|
|
|
|
Timer timer;
|
|
|
|
timer.start();
|
|
|
|
|
2022-06-22 22:17:14 +02:00
|
|
|
// Calculate number of buffers:
|
2022-06-25 13:10:06 +02:00
|
|
|
// 2 buffers per input stream + more buffers for output stream.
|
2022-06-26 01:14:19 +02:00
|
|
|
size_t num_output_buffers = 2 + (ctx.branch_factor - 1) / 2;
|
|
|
|
size_t num_buffers = 2 * ctx.branch_factor + num_output_buffers;
|
2022-06-22 22:17:14 +02:00
|
|
|
|
|
|
|
// Calculate buffer size.
|
|
|
|
// Must be a multiple of the record size and the transfer alignment.
|
|
|
|
size_t buffer_size = ctx.memory_size / num_buffers;
|
|
|
|
buffer_size -= buffer_size % (TRANSFER_ALIGNMENT * ctx.record_size);
|
|
|
|
|
|
|
|
// TODO : double-buffering with I/O in separate thread
|
|
|
|
|
2022-06-26 01:14:19 +02:00
|
|
|
// Prepare a list of blocks for each input stream.
|
|
|
|
std::vector<RecordInputStream::BlockList> stream_blocks(ctx.branch_factor);
|
|
|
|
uint64_t file_offset = 0;
|
|
|
|
for (size_t p = 0; p < num_blocks; p++) {
|
|
|
|
uint64_t num_records = merge_pass.records_per_block[p];
|
|
|
|
unsigned int streamidx = p % ctx.branch_factor;
|
|
|
|
stream_blocks[streamidx].emplace_back(file_offset, num_records);
|
|
|
|
file_offset += num_records * ctx.record_size;
|
|
|
|
}
|
|
|
|
|
2022-06-22 22:17:14 +02:00
|
|
|
// Initialize input streams.
|
|
|
|
std::vector<std::unique_ptr<RecordInputStream>> input_streams;
|
2022-06-26 01:14:19 +02:00
|
|
|
for (unsigned int i = 0; i < ctx.branch_factor; i++) {
|
2022-06-22 22:17:14 +02:00
|
|
|
input_streams.emplace_back(new RecordInputStream(
|
|
|
|
input_file,
|
|
|
|
ctx.record_size,
|
2022-06-26 01:14:19 +02:00
|
|
|
std::move(stream_blocks[i]),
|
2022-06-22 22:17:14 +02:00
|
|
|
buffer_size));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Initialize output stream.
|
|
|
|
RecordOutputStream output_stream(
|
|
|
|
output_file,
|
|
|
|
ctx.record_size,
|
|
|
|
0,
|
|
|
|
buffer_size);
|
2022-06-21 07:50:19 +02:00
|
|
|
|
|
|
|
// Loop over groups of blocks to be sorted.
|
2022-06-22 22:17:14 +02:00
|
|
|
// Every group consists of "branch_factor" blocks, except the last
|
|
|
|
// group which may contain fewer blocks.
|
|
|
|
// Each group produces one output block.
|
2022-06-26 01:14:19 +02:00
|
|
|
size_t block_index = 0;
|
2022-06-21 07:50:19 +02:00
|
|
|
while (block_index < num_blocks) {
|
|
|
|
|
2022-06-26 01:14:19 +02:00
|
|
|
// If this is the first merge pass, the last group may have
|
|
|
|
// fewer than "branch_factor" blocks.
|
|
|
|
unsigned int blocks_this_group = ctx.branch_factor;
|
|
|
|
if (blocks_this_group > num_blocks - block_index) {
|
|
|
|
blocks_this_group = num_blocks - block_index;
|
2022-06-21 07:50:19 +02:00
|
|
|
}
|
|
|
|
|
2022-06-26 01:14:19 +02:00
|
|
|
// Merging a single block with itself would be dumb.
|
|
|
|
// And our strategy planner is not that dumb.
|
|
|
|
assert(blocks_this_group > 1);
|
|
|
|
|
2022-06-22 22:17:14 +02:00
|
|
|
// Skip to the next section of each active input stream.
|
2022-06-26 01:14:19 +02:00
|
|
|
for (unsigned int i = 0; i < blocks_this_group; i++) {
|
2022-06-22 22:17:14 +02:00
|
|
|
input_streams[i]->next_block();
|
|
|
|
}
|
2022-06-21 07:50:19 +02:00
|
|
|
|
2022-06-26 01:14:19 +02:00
|
|
|
// Merge the blocks.
|
|
|
|
merge_blocks(
|
|
|
|
input_streams,
|
|
|
|
output_stream,
|
|
|
|
ctx.record_size,
|
|
|
|
blocks_this_group,
|
|
|
|
filter_dupl);
|
2022-06-22 22:17:14 +02:00
|
|
|
|
|
|
|
// Skip to the start of the next block group.
|
2022-06-26 01:14:19 +02:00
|
|
|
block_index += blocks_this_group;
|
2022-06-21 07:50:19 +02:00
|
|
|
}
|
|
|
|
|
2022-06-22 22:17:14 +02:00
|
|
|
// Flush output stream buffers.
|
|
|
|
output_stream.flush();
|
|
|
|
|
2022-06-24 16:31:09 +02:00
|
|
|
// Shrink output file if duplicate records were removed.
|
|
|
|
if (filter_dupl) {
|
|
|
|
uint64_t output_size = output_stream.file_offset();
|
|
|
|
uint64_t num_output_records = output_size / ctx.record_size;
|
|
|
|
log(ctx, "found %zu unique records\n", num_output_records);
|
|
|
|
output_file.truncate_file(output_size);
|
|
|
|
}
|
|
|
|
|
2022-06-21 07:50:19 +02:00
|
|
|
timer.stop();
|
|
|
|
log(ctx, " t = %.3f seconds\n", timer.value());
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Prepare a strategy for multi-pass external sorting.
|
|
|
|
*/
|
|
|
|
SortStrategy plan_multi_pass_strategy(
|
|
|
|
uint64_t file_size,
|
|
|
|
const SortContext& ctx)
|
|
|
|
{
|
|
|
|
// Plan the initial sort pass.
|
2022-06-22 22:17:14 +02:00
|
|
|
// Use blocks that are at most half of available memory,
|
|
|
|
// so we can use two buffers to overlap I/O and sorting.
|
2022-06-21 07:50:19 +02:00
|
|
|
uint64_t max_sort_block_size = ctx.memory_size / 2;
|
|
|
|
|
|
|
|
// Calculate number of records per block.
|
2022-06-22 22:17:14 +02:00
|
|
|
// Make sure this is a multiple of the transfer alignment size.
|
2022-06-21 07:50:19 +02:00
|
|
|
uint64_t records_per_sort_block = max_sort_block_size / ctx.record_size;
|
2022-06-22 22:17:14 +02:00
|
|
|
records_per_sort_block -= records_per_sort_block % TRANSFER_ALIGNMENT;
|
2022-06-21 07:50:19 +02:00
|
|
|
|
2022-06-22 22:17:14 +02:00
|
|
|
// Calculate number of blocks during the initial sort pass.
|
2022-06-21 07:50:19 +02:00
|
|
|
uint64_t num_records = file_size / ctx.record_size;
|
|
|
|
uint64_t num_sort_blocks = 1 + (num_records - 1) / records_per_sort_block;
|
|
|
|
|
2022-06-26 01:14:19 +02:00
|
|
|
// A list of blocks is constructed in memory for each merge pass.
|
|
|
|
// There are several of these lists. Together they will consume
|
|
|
|
// about 40 bytes per block. If we get an insanely large input file
|
|
|
|
// with a small memory limit, this metadata could by itself consume
|
|
|
|
// too much memory.
|
|
|
|
// Let's do a sanity check to ensure that the metadata uses less than
|
|
|
|
// 25% of the memory limit.
|
|
|
|
if (num_sort_blocks >= ctx.memory_size / 4 / 40) {
|
|
|
|
throw std::logic_error(
|
|
|
|
"Not enough memory to manage the list of blocks");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Plan the merge passes.
|
|
|
|
//
|
|
|
|
// In prinicple, every pass merges groups of "branch_factor" input blocks
|
|
|
|
// into one output block per group, thus reducing the number of remaining
|
|
|
|
// blocks by a factor "branch_factor".
|
|
|
|
//
|
|
|
|
// However, this gets more complicated if the merge tree is not perfectly
|
|
|
|
// balanced, which happens if the number of sort blocks is not a power
|
|
|
|
// of "branch_factor". In that case, the first merge pass will have
|
|
|
|
// to make things right by handling only a subset of the data.
|
|
|
|
//
|
|
|
|
// The first merge pass processes a subset of the sort blocks.
|
|
|
|
// It merges groups of "branch_factor" sort blocks into one output block
|
|
|
|
// per group. The last group in this pass may contain fewer than
|
|
|
|
// "branch_factor" sort blocks. After the first merge pass, the number
|
|
|
|
// of remaining blocks is an exact power of "branch_factor".
|
|
|
|
// The remaining blocks are in general not all the same size.
|
|
|
|
//
|
|
|
|
// After the first merge pass, each subsequent pass (if any) merges
|
|
|
|
// groups of exactly "branch_factor" blocks into one output block per
|
|
|
|
// group. These blocks are in general not all the same size.
|
|
|
|
//
|
|
|
|
// Example:
|
|
|
|
//
|
|
|
|
// branch_factor = 3
|
|
|
|
// num_sort_blocks = 12
|
|
|
|
//
|
|
|
|
// Sorted blocks:
|
|
|
|
// [S00] [S01] [S02] [S03] [S04] [S05] [S06] [S07] [S08] [S09] [S10] [S11]
|
|
|
|
//
|
|
|
|
// There are 3 merge passes.
|
|
|
|
// The first merge pass handles only sort blocks S00 - S04.
|
|
|
|
// The next two merge passes handle groups of exactly 3 blocks.
|
|
|
|
//
|
|
|
|
// [S00] [S01] [S02] [S03] [S04] [S05] [S06] [S07] [S08] [S09] [S10] [S11]
|
|
|
|
// | | | | |
|
|
|
|
// +-----+-----+ +--+--+
|
|
|
|
// | |
|
|
|
|
// [S00-S02] [S03-S04] [S05] [S06] [S07] [S08] [S09] [S10] [S11]
|
|
|
|
// | | | | | | | | |
|
|
|
|
// +-----------+--------+ +-----+-----+ +-----+-----+
|
|
|
|
// | | |
|
|
|
|
// [S00-S05] [S06-S08] [S09-S11]
|
|
|
|
// | | |
|
|
|
|
// +--------------------+-----------------+
|
|
|
|
// |
|
|
|
|
// [S00-S11]
|
|
|
|
//
|
|
|
|
// Note that a subset of sort blocks enter into the first merge pass
|
|
|
|
// while the remaining sort blocks go directly into the second merge pass.
|
|
|
|
// (It is also possible that all sort blocks go into the first pass,
|
|
|
|
// if the merge tree is perfectly balanced.)
|
|
|
|
//
|
|
|
|
|
|
|
|
// Determine the number of full merge passes (2nd pass and later).
|
|
|
|
unsigned int num_merge_pass = 0;
|
|
|
|
uint64_t num_merge_blocks = 1;
|
|
|
|
while (num_merge_blocks * ctx.branch_factor < num_sort_blocks) {
|
|
|
|
num_merge_blocks *= ctx.branch_factor;
|
|
|
|
num_merge_pass++;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add a first merge pass.
|
|
|
|
num_merge_pass++;
|
|
|
|
|
|
|
|
// Determine the number of merge groups in the first merge pass.
|
|
|
|
// The last group may have fewer than "branch_factor" input blocks.
|
|
|
|
uint64_t num_merge_ops_first_pass =
|
|
|
|
(num_sort_blocks - num_merge_blocks + (ctx.branch_factor - 1) - 1)
|
|
|
|
/ (ctx.branch_factor - 1);
|
|
|
|
|
|
|
|
// Determine the number of sort blocks to process in the first merge pass.
|
|
|
|
uint64_t num_sort_blocks_first_merge =
|
|
|
|
num_sort_blocks - num_merge_blocks + num_merge_ops_first_pass;
|
|
|
|
|
|
|
|
assert(num_sort_blocks_first_merge <= num_sort_blocks);
|
|
|
|
|
2022-06-21 07:50:19 +02:00
|
|
|
SortStrategy strategy;
|
|
|
|
strategy.records_per_sort_block = records_per_sort_block;
|
|
|
|
strategy.num_sort_blocks = num_sort_blocks;
|
2022-06-26 01:14:19 +02:00
|
|
|
strategy.num_sort_blocks_first_merge = num_sort_blocks_first_merge;
|
2022-06-21 07:50:19 +02:00
|
|
|
|
2022-06-26 01:14:19 +02:00
|
|
|
// Plan the details of each merge pass.
|
|
|
|
//
|
|
|
|
// The first merge pass handles a subset of the sort blocks.
|
|
|
|
// All of these sort blocks are the same size, except possibly the
|
|
|
|
// last block if it runs to the end of the file.
|
|
|
|
{
|
|
|
|
strategy.merge_pass.emplace_back();
|
|
|
|
SortStrategy::MergePass& merge_pass = strategy.merge_pass.back();
|
|
|
|
for (size_t i = 0; i < num_sort_blocks_first_merge; i++) {
|
|
|
|
uint64_t records_this_block =
|
|
|
|
std::min(records_per_sort_block,
|
|
|
|
num_records - i * records_per_sort_block);
|
|
|
|
merge_pass.records_per_block.push_back(records_this_block);
|
|
|
|
}
|
2022-06-21 07:50:19 +02:00
|
|
|
}
|
|
|
|
|
2022-06-26 01:14:19 +02:00
|
|
|
// Plan the rest of the passes.
|
|
|
|
for (unsigned int mp = 1; mp < num_merge_pass; mp++) {
|
|
|
|
|
|
|
|
strategy.merge_pass.emplace_back();
|
|
|
|
SortStrategy::MergePass& merge_pass = strategy.merge_pass.back();
|
|
|
|
SortStrategy::MergePass& prev_pass = *(strategy.merge_pass.end() - 2);
|
|
|
|
|
|
|
|
uint64_t records_this_pass = 0;
|
|
|
|
|
|
|
|
// Output from the previous pass is input into this pass.
|
|
|
|
uint64_t records_this_block = 0;
|
|
|
|
for (size_t i = 0; i < prev_pass.records_per_block.size(); i++) {
|
|
|
|
records_this_block += prev_pass.records_per_block[i];
|
|
|
|
if ((i + 1) % ctx.branch_factor == 0) {
|
|
|
|
merge_pass.records_per_block.push_back(records_this_block);
|
|
|
|
records_this_pass += records_this_block;
|
|
|
|
records_this_block = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// The last group of the first pass may merge fewer than
|
|
|
|
// "branch_factor" blocks.
|
|
|
|
if (records_this_block > 0) {
|
|
|
|
merge_pass.records_per_block.push_back(records_this_block);
|
|
|
|
records_this_pass += records_this_block;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mp == 1) {
|
|
|
|
// The second pass handles sort blocks that were skipped
|
|
|
|
// during the first pass.
|
|
|
|
for (size_t i = num_sort_blocks_first_merge;
|
|
|
|
i < num_sort_blocks;
|
|
|
|
i++) {
|
|
|
|
records_this_block = std::min(
|
|
|
|
records_per_sort_block,
|
|
|
|
num_records - i * records_per_sort_block);
|
|
|
|
merge_pass.records_per_block.push_back(records_this_block);
|
|
|
|
records_this_pass += records_this_block;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check that number of input blocks is divisible by branch_factor.
|
|
|
|
assert(merge_pass.records_per_block.size() % ctx.branch_factor == 0);
|
|
|
|
|
|
|
|
// Check that all records are accounted for.
|
|
|
|
assert(records_this_pass == num_records);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Double-check that the last pass produces a single output block.
|
|
|
|
SortStrategy::MergePass& last_pass = strategy.merge_pass.back();
|
|
|
|
assert(last_pass.records_per_block.size() <= ctx.branch_factor);
|
|
|
|
|
2022-06-21 07:50:19 +02:00
|
|
|
return strategy;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Sort a binary data file.
|
|
|
|
*
|
|
|
|
* @param input_file Path name of binary input file.
|
|
|
|
* @param output_file Path name of binary output file.
|
|
|
|
* @param ctx Reference to context structure.
|
|
|
|
*/
|
|
|
|
void sortbin(
|
|
|
|
const std::string& input_name,
|
|
|
|
const std::string& output_name,
|
|
|
|
const SortContext& ctx)
|
|
|
|
{
|
2022-06-25 13:33:41 +02:00
|
|
|
log(ctx, "using memory_size = %" PRIu64 " bytes\n", ctx.memory_size);
|
|
|
|
|
2022-06-26 01:14:19 +02:00
|
|
|
if (ctx.branch_factor < 2) {
|
|
|
|
throw std::logic_error("Invalid branch factor (must be >= 2)");
|
|
|
|
}
|
|
|
|
|
2022-06-21 07:50:19 +02:00
|
|
|
// We want file I/O to occur on 4096-byte boundaries.
|
|
|
|
// To ensure this, we want to do I/O on multiples of 4096 records.
|
|
|
|
// To ensure this is possible, we need room for ~ 32k records per branch.
|
2022-06-22 22:17:14 +02:00
|
|
|
if (ctx.memory_size / ctx.record_size / ctx.branch_factor <
|
|
|
|
8 * TRANSFER_ALIGNMENT) {
|
2022-06-21 07:50:19 +02:00
|
|
|
throw std::logic_error(
|
|
|
|
"Not enough memory for this combination of record size"
|
|
|
|
" and branch factor");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Open input file.
|
|
|
|
log(ctx, "opening input file\n");
|
|
|
|
BinaryInputFile input_file(input_name);
|
|
|
|
uint64_t file_size = input_file.size();
|
|
|
|
|
|
|
|
// Check that input file contains an integer number of records.
|
|
|
|
if (file_size % ctx.record_size != 0) {
|
|
|
|
throw std::logic_error(
|
|
|
|
"Input file does not contain an integer number of records");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create output file.
|
|
|
|
log(ctx, "creating output file\n");
|
|
|
|
BinaryOutputFile output_file(output_name, file_size);
|
|
|
|
|
|
|
|
if (file_size <= ctx.memory_size) {
|
|
|
|
// Data fits in memory.
|
|
|
|
|
|
|
|
// Sort in a single pass.
|
|
|
|
single_pass(input_file, output_file, ctx);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
// Data does not fit in memory.
|
|
|
|
|
|
|
|
// Plan a multi-pass strategy.
|
|
|
|
SortStrategy strategy = plan_multi_pass_strategy(file_size, ctx);
|
|
|
|
unsigned int num_merge_pass = strategy.merge_pass.size();
|
|
|
|
|
|
|
|
log(ctx,
|
|
|
|
"sorting %" PRIu64 " records in %" PRIu64 " blocks"
|
|
|
|
" followed by %u merge passes\n",
|
|
|
|
file_size / ctx.record_size,
|
|
|
|
strategy.num_sort_blocks,
|
|
|
|
num_merge_pass);
|
|
|
|
|
|
|
|
// Create a temporary file.
|
|
|
|
log(ctx, "creating temporary file\n");
|
|
|
|
BinaryTempFile temp_file(ctx.temporary_directory, file_size);
|
|
|
|
|
|
|
|
// The merge passes alternate between tempfile-to-outputfile and
|
|
|
|
// outputfile-to-tempfile.
|
|
|
|
BinaryFile * output_or_temp_file[2] = { &output_file, &temp_file };
|
|
|
|
|
2022-06-26 01:14:19 +02:00
|
|
|
// The final merge pass will be tempfile-to-outputfile.
|
|
|
|
// Depending on the number of merge passes, the first merge pass
|
|
|
|
// reads either from the output file or from the tempfile.
|
|
|
|
//
|
|
|
|
// The sort pass feeds blocks into the first merge pass,
|
|
|
|
// but may also feed blocks into the second merge pass if the
|
|
|
|
// merge tree is unbalanced.
|
|
|
|
{
|
|
|
|
BinaryFile * sort_output_first_merge_pass =
|
|
|
|
output_or_temp_file[num_merge_pass % 2];
|
|
|
|
BinaryFile * sort_output_second_merge_pass =
|
|
|
|
output_or_temp_file[(num_merge_pass - 1) % 2];
|
|
|
|
|
|
|
|
// Execute the initial sort pass.
|
|
|
|
sort_pass(
|
|
|
|
input_file,
|
|
|
|
*sort_output_first_merge_pass,
|
|
|
|
*sort_output_second_merge_pass,
|
|
|
|
strategy.records_per_sort_block,
|
|
|
|
strategy.num_sort_blocks,
|
|
|
|
strategy.num_sort_blocks_first_merge,
|
|
|
|
ctx);
|
|
|
|
}
|
2022-06-21 07:50:19 +02:00
|
|
|
|
|
|
|
// Execute the merge passes.
|
|
|
|
for (unsigned int mp = 0; mp < num_merge_pass; mp++) {
|
2022-06-26 01:14:19 +02:00
|
|
|
const SortStrategy::MergePass& mpass = strategy.merge_pass[mp];
|
|
|
|
size_t num_blocks = mpass.records_per_block.size();
|
|
|
|
uint64_t num_records = std::accumulate(
|
|
|
|
mpass.records_per_block.begin(),
|
|
|
|
mpass.records_per_block.end(),
|
|
|
|
uint64_t(0));
|
|
|
|
|
2022-06-21 07:50:19 +02:00
|
|
|
log(ctx,
|
2022-06-26 01:14:19 +02:00
|
|
|
"running merge pass %u / %u:"
|
|
|
|
" %zu blocks, %" PRIu64 " records\n",
|
2022-06-21 07:50:19 +02:00
|
|
|
mp,
|
|
|
|
num_merge_pass,
|
2022-06-26 01:14:19 +02:00
|
|
|
num_blocks,
|
|
|
|
num_records);
|
2022-06-21 07:50:19 +02:00
|
|
|
|
|
|
|
// Filter duplicate records only on the last pass.
|
|
|
|
bool filter_dupl = ctx.flag_unique && (mp + 1 == num_merge_pass);
|
|
|
|
|
|
|
|
// Alternate between temp_file and output_file.
|
|
|
|
BinaryFile * pass_input_file =
|
|
|
|
output_or_temp_file[(num_merge_pass - mp) % 2];
|
|
|
|
BinaryFile * pass_output_file =
|
|
|
|
output_or_temp_file[(num_merge_pass - mp - 1) % 2];
|
|
|
|
|
|
|
|
// Execute the merge pass.
|
|
|
|
merge_pass(
|
|
|
|
*pass_input_file,
|
|
|
|
*pass_output_file,
|
2022-06-26 01:14:19 +02:00
|
|
|
strategy.merge_pass[mp],
|
2022-06-21 07:50:19 +02:00
|
|
|
filter_dupl,
|
|
|
|
ctx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
log(ctx, "finished\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::string get_default_tmpdir(void)
|
|
|
|
{
|
|
|
|
const char *tmpdir = getenv("TMPDIR");
|
|
|
|
if (tmpdir == NULL) {
|
|
|
|
tmpdir = "/tmp";
|
|
|
|
}
|
|
|
|
return std::string(tmpdir);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-06-26 20:51:35 +02:00
|
|
|
/** Parse an unsigned integer. */
|
|
|
|
bool parse_uint(const char *argstr, unsigned int& value)
|
|
|
|
{
|
|
|
|
char *endptr;
|
|
|
|
|
|
|
|
errno = 0;
|
|
|
|
long t = strtol(argstr, &endptr, 10);
|
|
|
|
|
|
|
|
if (endptr == argstr || endptr[0] != '\0') {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (errno != 0) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (t < 0 || (unsigned long)t > UINT_MAX) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
value = t;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-06-25 13:33:41 +02:00
|
|
|
/**
|
|
|
|
* Parse a memory size specification.
|
|
|
|
*
|
|
|
|
* Memory size must be specified as an integer with suffix "M" or "G".
|
|
|
|
*
|
|
|
|
* @return specfified memory size in bytes, or 0 for an invalid size
|
|
|
|
*/
|
|
|
|
uint64_t parse_memory_size(const char *argstr)
|
|
|
|
{
|
|
|
|
char *endptr;
|
|
|
|
errno = 0;
|
|
|
|
|
|
|
|
long long value = strtoll(argstr, &endptr, 10);
|
|
|
|
if (endptr == argstr
|
|
|
|
|| (endptr[0] != 'G' && endptr[0] != 'M') || endptr[1] != '\0') {
|
|
|
|
fprintf(stderr,
|
|
|
|
"ERROR: Invalid memory size."
|
|
|
|
" Specify e.g. '--memory=800M' or '--memory=4G'.\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (value <= 0 || errno != 0) {
|
|
|
|
fprintf(stderr, "ERROR: Invalid memory size\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t factor;
|
|
|
|
if (endptr[0] == 'G') {
|
|
|
|
factor = 1024 * 1024 * 1024;
|
|
|
|
} else {
|
|
|
|
factor = 1024 * 1024;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((unsigned long long)value > UINT64_MAX / factor) {
|
|
|
|
fprintf(stderr, "ERROR: Invalid memory size\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return value * factor;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-06-21 07:50:19 +02:00
|
|
|
void usage()
|
|
|
|
{
|
|
|
|
fprintf(stderr,
|
2022-06-24 15:14:51 +02:00
|
|
|
"\n"
|
|
|
|
"Sort fixed-length binary records.\n"
|
2022-06-21 07:50:19 +02:00
|
|
|
"\n"
|
|
|
|
"Usage: sortbin [options] inputfile outputfile\n"
|
|
|
|
"\n"
|
|
|
|
"Options:\n"
|
|
|
|
"\n"
|
|
|
|
" -s, --size=N specify record size of N bytes (required)\n"
|
2022-06-24 15:14:51 +02:00
|
|
|
" -u, --unique eliminate duplicates after sorting\n"
|
2022-06-25 13:33:41 +02:00
|
|
|
" --memory=<n>M use at most <n> MiByte RAM (default: %d)\n"
|
|
|
|
" --memory=<n>G use at most <n> GiByte RAM\n"
|
2022-06-26 20:51:35 +02:00
|
|
|
" --branch=N merge N subarrays in one step (default: %d)\n"
|
|
|
|
" --threads=N use N threads for parallel sorting (default: %d)\n"
|
2022-06-21 07:50:19 +02:00
|
|
|
" --temporary-directory=DIR write temporary file to the specified\n"
|
|
|
|
" directory (default: $TMPDIR)\n"
|
|
|
|
"\n"
|
|
|
|
"The output file must not yet exist.\n"
|
|
|
|
"If the data does not fit in memory, a temporary file will be\n"
|
|
|
|
"created with the same size as the input/output files.\n"
|
|
|
|
"\n",
|
|
|
|
DEFAULT_MEMORY_SIZE_MBYTE,
|
2022-06-26 20:51:35 +02:00
|
|
|
DEFAULT_BRANCH_FACTOR,
|
|
|
|
DEFAULT_THREADS);
|
2022-06-21 07:50:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
} // anonymous namespace
|
|
|
|
|
|
|
|
|
|
|
|
int main(int argc, char **argv)
|
|
|
|
{
|
|
|
|
const struct option longopts[] = {
|
|
|
|
{ "size", 1, NULL, 's' },
|
|
|
|
{ "unique", 0, NULL, 'u' },
|
|
|
|
{ "memory", 1, NULL, 'M' },
|
|
|
|
{ "branch", 1, NULL, 'B' },
|
2022-06-26 20:51:35 +02:00
|
|
|
{ "threads", 1, NULL, 'J' },
|
2022-06-21 07:50:19 +02:00
|
|
|
{ "temporary-directory", 1, NULL, 'T' },
|
|
|
|
{ "verbose", 0, NULL, 'v' },
|
|
|
|
{ "help", 0, NULL, 'h' },
|
|
|
|
{ NULL, 0, NULL, 0 }
|
|
|
|
};
|
|
|
|
bool flag_unique = false;
|
|
|
|
bool flag_verbose = false;
|
2022-06-26 20:51:35 +02:00
|
|
|
unsigned int record_size = 0;
|
|
|
|
unsigned int branch_factor = DEFAULT_BRANCH_FACTOR;
|
|
|
|
unsigned int num_threads = DEFAULT_THREADS;
|
2022-06-25 13:33:41 +02:00
|
|
|
uint64_t memory_size = uint64_t(DEFAULT_MEMORY_SIZE_MBYTE) * 1024 * 1024;
|
2022-06-21 07:50:19 +02:00
|
|
|
std::string tempdir = get_default_tmpdir();
|
|
|
|
int opt;
|
|
|
|
|
2022-06-26 20:51:35 +02:00
|
|
|
while ((opt = getopt_long(argc, argv, "s:T:uvh", longopts, NULL)) != -1) {
|
2022-06-21 07:50:19 +02:00
|
|
|
switch (opt) {
|
|
|
|
case 's':
|
2022-06-26 20:51:35 +02:00
|
|
|
if (!parse_uint(optarg, record_size) || record_size < 1) {
|
2022-06-21 07:50:19 +02:00
|
|
|
fprintf(stderr,
|
|
|
|
"ERROR: Invalid record size (must be at least 1)\n");
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'u':
|
|
|
|
flag_unique = true;
|
|
|
|
break;
|
|
|
|
case 'M':
|
2022-06-25 13:33:41 +02:00
|
|
|
memory_size = parse_memory_size(optarg);
|
|
|
|
if (memory_size == 0) {
|
2022-06-21 07:50:19 +02:00
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'B':
|
2022-06-26 20:51:35 +02:00
|
|
|
if (!parse_uint(optarg, branch_factor) || branch_factor < 2) {
|
2022-06-21 07:50:19 +02:00
|
|
|
fprintf(stderr,
|
|
|
|
"ERROR: Invalid radix value, must be at least 2\n");
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
break;
|
2022-06-26 20:51:35 +02:00
|
|
|
case 'J':
|
|
|
|
if (!parse_uint(optarg, num_threads)
|
|
|
|
|| num_threads < 1
|
|
|
|
|| num_threads > MAX_THREADS) {
|
|
|
|
fprintf(stderr, "ERROR: Invalid number of threads\n");
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
break;
|
2022-06-21 07:50:19 +02:00
|
|
|
case 'T':
|
|
|
|
tempdir = optarg;
|
|
|
|
break;
|
|
|
|
case 'v':
|
|
|
|
flag_verbose = true;
|
|
|
|
break;
|
|
|
|
case 'h':
|
|
|
|
usage();
|
|
|
|
return EXIT_SUCCESS;
|
|
|
|
default:
|
|
|
|
usage();
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (record_size < 1) {
|
|
|
|
fprintf(stderr, "ERROR: Missing required parameter --size\n");
|
|
|
|
usage();
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (argc < optind + 2) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"ERROR: Input and output file names must be specified\n");
|
|
|
|
usage();
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (argc > optind + 2) {
|
|
|
|
fprintf(stderr, "ERROR: Unexpected command-line parameters\n");
|
|
|
|
usage();
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
|
2022-06-25 13:33:41 +02:00
|
|
|
if (memory_size >= SIZE_MAX) {
|
2022-06-21 07:50:19 +02:00
|
|
|
fprintf(
|
|
|
|
stderr,
|
2022-06-25 13:33:41 +02:00
|
|
|
"ERROR: This system supports at most %zu MB memory\n",
|
2022-06-21 07:50:19 +02:00
|
|
|
SIZE_MAX / 1024 / 1024 - 1);
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string input_name(argv[optind]);
|
|
|
|
std::string output_name(argv[optind+1]);
|
|
|
|
|
|
|
|
SortContext ctx;
|
|
|
|
ctx.record_size = record_size;
|
2022-06-25 13:33:41 +02:00
|
|
|
ctx.memory_size = memory_size;
|
2022-06-21 07:50:19 +02:00
|
|
|
ctx.branch_factor = branch_factor;
|
2022-06-26 20:51:35 +02:00
|
|
|
ctx.num_threads = num_threads;
|
2022-06-21 07:50:19 +02:00
|
|
|
ctx.flag_unique = flag_unique;
|
|
|
|
ctx.flag_verbose = flag_verbose;
|
|
|
|
ctx.temporary_directory = tempdir;
|
|
|
|
|
|
|
|
try {
|
|
|
|
sortbin(input_name, output_name, ctx);
|
|
|
|
} catch (const std::exception& ex) {
|
|
|
|
fprintf(stderr, "ERROR: %s\n", ex.what());
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
|
|
|
|
return EXIT_SUCCESS;
|
|
|
|
}
|