193 lines
5.7 KiB
C++
193 lines
5.7 KiB
C++
// This example shows how to sort structs using complex multiple part keys using
|
|
// string_sort.
|
|
//
|
|
// Copyright Steven Ross 2009-2014.
|
|
//
|
|
// Distributed under the Boost Software License, Version 1.0.
|
|
// (See accompanying file LICENSE_1_0.txt or copy at
|
|
// http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
// See http://www.boost.org/libs/sort for library home page.
|
|
|
|
#include <boost/sort/spreadsort/string_sort.hpp>
|
|
#include <boost/sort/spreadsort/float_sort.hpp>
|
|
#include <time.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <algorithm>
|
|
#include <vector>
|
|
#include <iostream>
|
|
#include <fstream>
|
|
#include <string>
|
|
using std::string;
|
|
using namespace boost::sort::spreadsort;
|
|
|
|
//[generalized_functors
|
|
struct DATA_TYPE {
|
|
time_t birth;
|
|
float net_worth;
|
|
string first_name;
|
|
string last_name;
|
|
};
|
|
|
|
static const int birth_size = sizeof(time_t);
|
|
static const int first_name_offset = birth_size + sizeof(float);
|
|
static const boost::uint64_t base_mask = 0xff;
|
|
|
|
struct lessthan {
|
|
inline bool operator()(const DATA_TYPE &x, const DATA_TYPE &y) const {
|
|
if (x.birth != y.birth) {
|
|
return x.birth < y.birth;
|
|
}
|
|
if (x.net_worth != y.net_worth) {
|
|
return x.net_worth < y.net_worth;
|
|
}
|
|
if (x.first_name != y.first_name) {
|
|
return x.first_name < y.first_name;
|
|
}
|
|
return x.last_name < y.last_name;
|
|
}
|
|
};
|
|
|
|
struct bracket {
|
|
inline unsigned char operator()(const DATA_TYPE &x, size_t offset) const {
|
|
// Sort date as a signed int, returning the appropriate byte.
|
|
if (offset < birth_size) {
|
|
const int bit_shift = 8 * (birth_size - offset - 1);
|
|
unsigned char result = (x.birth & (base_mask << bit_shift)) >> bit_shift;
|
|
// Handling the sign bit. Unnecessary if the data is always positive.
|
|
if (offset == 0) {
|
|
return result ^ 128;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
// Sort a signed float. This requires reversing the order of negatives
|
|
// because of the way floats are represented in bits.
|
|
if (offset < first_name_offset) {
|
|
const int bit_shift = 8 * (first_name_offset - offset - 1);
|
|
unsigned key = float_mem_cast<float, unsigned>(x.net_worth);
|
|
unsigned char result = (key & (base_mask << bit_shift)) >> bit_shift;
|
|
// Handling the sign.
|
|
if (x.net_worth < 0) {
|
|
return 255 - result;
|
|
}
|
|
// Increasing positives so they are higher than negatives.
|
|
if (offset == birth_size) {
|
|
return 128 + result;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
// Sort a string that is before the end. This approach supports embedded
|
|
// nulls. If embedded nulls are not required, then just delete the "* 2"
|
|
// and the inside of the following if just becomes:
|
|
// return x.first_name[offset - first_name_offset];
|
|
const unsigned first_name_end_offset =
|
|
first_name_offset + x.first_name.size() * 2;
|
|
if (offset < first_name_end_offset) {
|
|
int char_offset = offset - first_name_offset;
|
|
// This signals that the string continues.
|
|
if (!(char_offset & 1)) {
|
|
return 1;
|
|
}
|
|
return x.first_name[char_offset >> 1];
|
|
}
|
|
|
|
// This signals that the string has ended, so that shorter strings come
|
|
// before longer ones.
|
|
if (offset == first_name_end_offset) {
|
|
return 0;
|
|
}
|
|
|
|
// The final string needs no special consideration.
|
|
return x.last_name[offset - first_name_end_offset - 1];
|
|
}
|
|
};
|
|
|
|
struct getsize {
|
|
inline size_t operator()(const DATA_TYPE &x) const {
|
|
return first_name_offset + x.first_name.size() * 2 + 1 +
|
|
x.last_name.size();
|
|
}
|
|
};
|
|
//] [/generalized_functors]
|
|
|
|
//Pass in an argument to test std::sort
|
|
int main(int argc, const char ** argv) {
|
|
std::ifstream indata;
|
|
std::ofstream outfile;
|
|
bool stdSort = false;
|
|
unsigned loopCount = 1;
|
|
for (int u = 1; u < argc; ++u) {
|
|
if (std::string(argv[u]) == "-std")
|
|
stdSort = true;
|
|
else
|
|
loopCount = atoi(argv[u]);
|
|
}
|
|
double total = 0.0;
|
|
//Run multiple loops, if requested
|
|
std::vector<DATA_TYPE> array;
|
|
for (unsigned u = 0; u < loopCount; ++u) {
|
|
indata.open("input.txt", std::ios_base::in | std::ios_base::binary);
|
|
if (indata.bad()) {
|
|
printf("input.txt could not be opened\n");
|
|
return 1;
|
|
}
|
|
|
|
// Read in the data.
|
|
DATA_TYPE inval;
|
|
while (!indata.eof() ) {
|
|
indata >> inval.first_name;
|
|
indata >> inval.last_name;
|
|
indata.read(reinterpret_cast<char *>(&(inval.birth)), birth_size);
|
|
indata.read(reinterpret_cast<char *>(&(inval.net_worth)), sizeof(float));
|
|
// Handling nan.
|
|
if (inval.net_worth != inval.net_worth) {
|
|
inval.net_worth = 0;
|
|
}
|
|
if (indata.eof())
|
|
break;
|
|
array.push_back(inval);
|
|
}
|
|
indata.close();
|
|
|
|
// Sort the data.
|
|
clock_t start, end;
|
|
double elapsed;
|
|
start = clock();
|
|
if (stdSort) {
|
|
std::sort(array.begin(), array.end(), lessthan());
|
|
} else {
|
|
//[generalized_functors_call
|
|
string_sort(array.begin(), array.end(), bracket(), getsize(), lessthan());
|
|
//] [/generalized_functors_call]
|
|
}
|
|
end = clock();
|
|
elapsed = static_cast<double>(end - start);
|
|
if (stdSort) {
|
|
outfile.open("standard_sort_out.txt", std::ios_base::out |
|
|
std::ios_base::binary | std::ios_base::trunc);
|
|
} else {
|
|
outfile.open("boost_sort_out.txt", std::ios_base::out |
|
|
std::ios_base::binary | std::ios_base::trunc);
|
|
}
|
|
if (outfile.good()) {
|
|
for (unsigned u = 0; u < array.size(); ++u)
|
|
outfile << array[u].birth << " " << array[u].net_worth << " "
|
|
<< array[u].first_name << " " << array[u].last_name << "\n";
|
|
outfile.close();
|
|
}
|
|
total += elapsed;
|
|
array.clear();
|
|
}
|
|
if (stdSort) {
|
|
printf("std::sort elapsed time %f\n", total / CLOCKS_PER_SEC);
|
|
} else {
|
|
printf("spreadsort elapsed time %f\n", total / CLOCKS_PER_SEC);
|
|
}
|
|
return 0;
|
|
}
|