diff --git a/include/boost/sort/spreadsort/detail/float_sort.hpp b/include/boost/sort/spreadsort/detail/float_sort.hpp index 8af8a17..03dcbaf 100644 --- a/include/boost/sort/spreadsort/detail/float_sort.hpp +++ b/include/boost/sort/spreadsort/detail/float_sort.hpp @@ -56,12 +56,36 @@ namespace spreadsort { Div_type & max, Div_type & min, Right_shift rshift) { min = max = rshift(*current, 0); - Div_type prev = min; + RandomAccessIter prev = current; bool sorted = true; while (++current < last) { Div_type value = rshift(*current, 0); - sorted &= value >= prev; - prev = value; + sorted &= *current >= *prev; + prev = current; + if (max < value) + max = value; + else if (value < min) + min = value; + } + return sorted; + } + + // Return true if the list is sorted. Otherwise, find the minimum and + // maximum. Uses comp to check if the data is already sorted. + template + inline bool + is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last, + Div_type & max, Div_type & min, + Right_shift rshift, Compare comp) + { + min = max = rshift(*current, 0); + RandomAccessIter prev = current; + bool sorted = true; + while (++current < last) { + Div_type value = rshift(*current, 0); + sorted &= !comp(*current, *prev); + prev = current; if (max < value) max = value; else if (value < min) @@ -123,12 +147,12 @@ namespace spreadsort { Cast_type & max, Cast_type & min) { min = max = cast_float_iter(current); - Cast_type prev = min; + RandomAccessIter prev = current; bool sorted = true; while (++current < last) { Cast_type value = cast_float_iter(current); - sorted &= value >= prev; - prev = value; + sorted &= *current >= *prev; + prev = current; if (max < value) max = value; else if (value < min) @@ -205,8 +229,9 @@ namespace spreadsort { { Div_type max, min; if (is_sorted_or_find_extremes(first, last, - max, min)) + max, min)) return; + unsigned log_divisor = get_log_divisor( last - first, rough_log_2_size(Size_type(max - min))); Div_type div_min = min >> log_divisor; @@ -323,7 +348,7 @@ namespace spreadsort { size_t *bin_sizes, Right_shift rshift, Compare comp) { Div_type max, min; - if (is_sorted_or_find_extremes(first, last, max, min, rshift)) + if (is_sorted_or_find_extremes(first, last, max, min, rshift, comp)) return; unsigned log_divisor = get_log_divisor( last - first, rough_log_2_size(Size_type(max - min))); @@ -578,7 +603,7 @@ namespace spreadsort { size_t *bin_sizes, Right_shift rshift, Compare comp) { Div_type max, min; - if (is_sorted_or_find_extremes(first, last, max, min, rshift)) + if (is_sorted_or_find_extremes(first, last, max, min, rshift, comp)) return; unsigned log_divisor = get_log_divisor( last - first, rough_log_2_size(Size_type(max - min))); @@ -679,7 +704,7 @@ namespace spreadsort { void >::type float_sort(RandomAccessIter first, RandomAccessIter last) { - size_t bin_sizes[1 << max_splits]; + size_t bin_sizes[1 << max_finishing_splits]; std::vector bin_cache; float_sort_rec (first, last, bin_cache, 0, bin_sizes); @@ -694,7 +719,7 @@ namespace spreadsort { void >::type float_sort(RandomAccessIter first, RandomAccessIter last) { - size_t bin_sizes[1 << max_splits]; + size_t bin_sizes[1 << max_finishing_splits]; std::vector bin_cache; float_sort_rec (first, last, bin_cache, 0, bin_sizes); @@ -727,7 +752,7 @@ namespace spreadsort { float_sort(RandomAccessIter first, RandomAccessIter last, Div_type, Right_shift rshift) { - size_t bin_sizes[1 << max_splits]; + size_t bin_sizes[1 << max_finishing_splits]; std::vector bin_cache; float_sort_rec (first, last, bin_cache, 0, bin_sizes, rshift); @@ -740,7 +765,7 @@ namespace spreadsort { float_sort(RandomAccessIter first, RandomAccessIter last, Div_type, Right_shift rshift) { - size_t bin_sizes[1 << max_splits]; + size_t bin_sizes[1 << max_finishing_splits]; std::vector bin_cache; float_sort_rec (first, last, bin_cache, 0, bin_sizes, rshift); @@ -765,7 +790,7 @@ namespace spreadsort { float_sort(RandomAccessIter first, RandomAccessIter last, Div_type, Right_shift rshift, Compare comp) { - size_t bin_sizes[1 << max_splits]; + size_t bin_sizes[1 << max_finishing_splits]; std::vector bin_cache; float_sort_rec @@ -780,7 +805,7 @@ namespace spreadsort { float_sort(RandomAccessIter first, RandomAccessIter last, Div_type, Right_shift rshift, Compare comp) { - size_t bin_sizes[1 << max_splits]; + size_t bin_sizes[1 << max_finishing_splits]; std::vector bin_cache; float_sort_rec diff --git a/include/boost/sort/spreadsort/detail/string_sort.hpp b/include/boost/sort/spreadsort/detail/string_sort.hpp index a5a40f0..ef943b8 100644 --- a/include/boost/sort/spreadsort/detail/string_sort.hpp +++ b/include/boost/sort/spreadsort/detail/string_sort.hpp @@ -42,7 +42,7 @@ namespace spreadsort { { const int char_size = sizeof(Unsigned_char_type); size_t nextOffset = char_offset; - int step_size = max_step_size; + int step_size = max_step_size / char_size; while (true) { RandomAccessIter curr = first; do { diff --git a/include/boost/sort/spreadsort/spreadsort.hpp b/include/boost/sort/spreadsort/spreadsort.hpp index 678fba4..97e92b4 100644 --- a/include/boost/sort/spreadsort/spreadsort.hpp +++ b/include/boost/sort/spreadsort/spreadsort.hpp @@ -88,8 +88,8 @@ namespace spreadsort { } /*! - \brief Generic @c spreadsort variant detecting string element type so call to @c string_sort for @c std::strings and @c std::wstrings. - \details If the data type provided is a string or wstring, @c string_sort is used. + \brief Generic @c spreadsort variant detecting string element type so call to @c string_sort for @c std::strings. + \details If the data type provided is a string, @c string_sort is used. \note Sorting other data types requires picking between @c integer_sort, @c float_sort and @c string_sort directly, as @c spreadsort won't accept types that don't have the appropriate @c type_traits. @@ -107,12 +107,36 @@ namespace spreadsort { template inline typename boost::enable_if_c< is_same::value_type, - typename std::string>::value || + typename std::string>::value, void >::type + spreadsort(RandomAccessIter first, RandomAccessIter last) + { + string_sort(first, last); + } + + /*! + \brief Generic @c spreadsort variant detecting string element type so call to @c string_sort for @c std::wstrings. + \details If the data type provided is a wstring, @c string_sort is used. + \note Sorting other data types requires picking between @c integer_sort, @c float_sort and @c string_sort directly, + as @c spreadsort won't accept types that don't have the appropriate @c type_traits. + + \param[in] first Iterator pointer to first element. + \param[in] last Iterator pointing to one beyond the end of data. + + \pre [@c first, @c last) is a valid range. + \pre @c RandomAccessIter @c value_type is mutable. + \pre @c RandomAccessIter @c value_type is LessThanComparable + \pre @c RandomAccessIter @c value_type supports the @c operator>>, + which returns an integer-type right-shifted a specified number of bits. + \post The elements in the range [@c first, @c last) are sorted in ascending order. + */ + template + inline typename boost::enable_if_c< is_same::value_type, typename std::wstring>::value, void >::type spreadsort(RandomAccessIter first, RandomAccessIter last) { - string_sort(first, last); + unsigned wchar_t unused = '\0'; + string_sort(first, last, unused); } } // namespace spreadsort } // namespace sort diff --git a/test/float_sort_test.cpp b/test/float_sort_test.cpp index 2e0f92a..e479aab 100644 --- a/test/float_sort_test.cpp +++ b/test/float_sort_test.cpp @@ -42,7 +42,27 @@ rand_32(bool sign = true) { return result; } -static const unsigned input_count = 100000; +static const unsigned input_count = 1000000; + +// Helper class to run tests across all float_sort interface variants. +template +void test_vector(vector base_vec, RightShift shifter) { + vector sorted_vec = base_vec; + vector test_vec = base_vec; + std::sort(sorted_vec.begin(), sorted_vec.end()); + //Testing boost::sort::spreadsort version + test_vec = base_vec; + boost::sort::spreadsort::spreadsort(test_vec.begin(), test_vec.end()); + BOOST_CHECK(test_vec == sorted_vec); + //One functor + test_vec = base_vec; + float_sort(test_vec.begin(), test_vec.end(), shifter); + BOOST_CHECK(test_vec == sorted_vec); + //Both functors + test_vec = base_vec; + float_sort(test_vec.begin(), test_vec.end(), shifter, less()); + BOOST_CHECK(test_vec == sorted_vec); +} void float_test() { @@ -59,49 +79,58 @@ void float_test() else base_vec.push_back(val); } - vector sorted_vec = base_vec; - vector test_vec = base_vec; - std::sort(sorted_vec.begin(), sorted_vec.end()); - //Testing boost::sort::spreadsort version - test_vec = base_vec; - boost::sort::spreadsort::spreadsort(test_vec.begin(), test_vec.end()); - BOOST_CHECK(test_vec == sorted_vec); - //One functor - test_vec = base_vec; - float_sort(test_vec.begin(), test_vec.end(), rightshift()); - BOOST_CHECK(test_vec == sorted_vec); - //Both functors - test_vec = base_vec; - float_sort(test_vec.begin(), test_vec.end(), rightshift(), less()); - BOOST_CHECK(test_vec == sorted_vec); + test_vector(base_vec, rightshift()); + + // Trying both positive and negative sorted and reverse sorted data. + base_vec.clear(); + for (size_t i = 0; i < input_count; ++i) base_vec.push_back(-i); + test_vector(base_vec, rightshift()); + base_vec.clear(); + for (size_t i = 0; i < input_count; ++i) base_vec.push_back(i - input_count); + test_vector(base_vec, rightshift()); + base_vec.clear(); + for (size_t i = 0; i < input_count; ++i) base_vec.push_back(input_count - i); + test_vector(base_vec, rightshift()); + base_vec.clear(); + for (size_t i = 0; i < input_count; ++i) base_vec.push_back(i); + test_vector(base_vec, rightshift()); + base_vec.clear(); + for (size_t i = 0; i < input_count; ++i) base_vec.push_back(i); + for (size_t i = 0; i < input_count; i += 2) base_vec[i] *= -1; + test_vector(base_vec, rightshift()); } void double_test() { - vector long_base_vec; + vector base_vec; for (unsigned u = 0; u < input_count; ++u) { double val = double ((((boost::int64_t)rand_32()) << ((8 * sizeof(int)) -1)) + rand_32(false)); //As std::sort gives arbitrary results for NaNs and 0.0 vs. -0.0, //treat all those as just 0.0 for testing if (!(val < 0.0) && !(0.0 < val)) - long_base_vec.push_back(0.0); + base_vec.push_back(0.0); else - long_base_vec.push_back(val); + base_vec.push_back(val); } - vector long_sorted_vec = long_base_vec; - vector long_test_vec = long_base_vec; - float_sort(long_test_vec.begin(), long_test_vec.end()); - std::sort(long_sorted_vec.begin(), long_sorted_vec.end()); - BOOST_CHECK(long_test_vec == long_sorted_vec); - //One functor - long_test_vec = long_base_vec; - float_sort(long_test_vec.begin(), long_test_vec.end(), rightshift_64()); - BOOST_CHECK(long_test_vec == long_sorted_vec); - //Both functors - long_test_vec = long_base_vec; - float_sort(long_test_vec.begin(), long_test_vec.end(), rightshift_64(), - less()); - BOOST_CHECK(long_test_vec == long_sorted_vec); + test_vector(base_vec, rightshift_64()); + + // Trying both positive and negative sorted and reverse sorted data. + base_vec.clear(); + for (size_t i = 0; i < input_count; ++i) base_vec.push_back(-i); + test_vector(base_vec, rightshift_64()); + base_vec.clear(); + for (size_t i = 0; i < input_count; ++i) base_vec.push_back(i - input_count); + test_vector(base_vec, rightshift_64()); + base_vec.clear(); + for (size_t i = 0; i < input_count; ++i) base_vec.push_back(input_count - i); + test_vector(base_vec, rightshift_64()); + base_vec.clear(); + for (size_t i = 0; i < input_count; ++i) base_vec.push_back(i); + test_vector(base_vec, rightshift_64()); + base_vec.clear(); + for (size_t i = 0; i < input_count; ++i) base_vec.push_back(i); + for (size_t i = 0; i < input_count; i += 2) base_vec[i] *= -1; + test_vector(base_vec, rightshift_64()); } // Verify that 0 and 1 elements work correctly. @@ -121,6 +150,6 @@ int test_main( int, char*[] ) srand(1); float_test(); double_test(); - corner_test(); + corner_test(); return 0; } diff --git a/test/string_sort_test.cpp b/test/string_sort_test.cpp index b4dcaa8..aa2d999 100644 --- a/test/string_sort_test.cpp +++ b/test/string_sort_test.cpp @@ -29,10 +29,20 @@ struct bracket { } }; +struct wbracket { + wchar_t operator()(const wstring &x, size_t offset) const { + return x[offset]; + } +}; + struct get_size { size_t operator()(const string &x) const{ return x.size(); } }; +struct wget_size { + size_t operator()(const wstring &x) const{ return x.size(); } +}; + static const unsigned input_count = 100000; // Test that update_offset finds the first character with a difference. @@ -124,6 +134,53 @@ void string_test() BOOST_CHECK(test_vec == sorted_vec); } +void wstring_test() +{ + // Prepare inputs + vector base_vec; + const unsigned max_length = 32; + srand(1); + //Generating semirandom numbers + for (unsigned u = 0; u < input_count; ++u) { + unsigned length = rand() % max_length; + wstring result; + for (unsigned u = 0; u < length; ++u) { + wchar_t val = ((rand() % 256) << 8) + rand() % 256; + result.push_back(val); + } + base_vec.push_back(result); + } + vector sorted_vec = base_vec; + vector test_vec = base_vec; + std::sort(sorted_vec.begin(), sorted_vec.end()); + //Testing basic call + unsigned wchar_t unused = '\0'; + string_sort(test_vec.begin(), test_vec.end(), unused); + BOOST_CHECK(test_vec == sorted_vec); + //Testing boost::sort::spreadsort wrapper + boost::sort::spreadsort::spreadsort(test_vec.begin(), test_vec.end()); + BOOST_CHECK(test_vec == sorted_vec); + //Character functors + test_vec = base_vec; + string_sort(test_vec.begin(), test_vec.end(), wbracket(), wget_size()); + BOOST_CHECK(test_vec == sorted_vec); + //All functors + test_vec = base_vec; + string_sort(test_vec.begin(), test_vec.end(), wbracket(), wget_size(), + less()); + BOOST_CHECK(test_vec == sorted_vec); + //reverse order + std::sort(sorted_vec.begin(), sorted_vec.end(), greater()); + reverse_string_sort(test_vec.begin(), test_vec.end(), greater(), + unused); + BOOST_CHECK(test_vec == sorted_vec); + //reverse order with functors + test_vec = base_vec; + reverse_string_sort(test_vec.begin(), test_vec.end(), wbracket(), wget_size(), + greater()); + BOOST_CHECK(test_vec == sorted_vec); +} + // Verify that 0, 1, and input_count empty strings all sort correctly. void corner_test() { vector test_vec; @@ -145,6 +202,7 @@ int test_main( int, char*[] ) update_offset_test(); offset_comparison_test(); string_test(); + wstring_test(); corner_test(); return 0; }