diff options
author | upstream source tree <ports@midipix.org> | 2015-03-15 20:14:05 -0400 |
---|---|---|
committer | upstream source tree <ports@midipix.org> | 2015-03-15 20:14:05 -0400 |
commit | 554fd8c5195424bdbcabf5de30fdc183aba391bd (patch) | |
tree | 976dc5ab7fddf506dadce60ae936f43f58787092 /libstdc++-v3/testsuite/20_util/hash | |
download | cbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.bz2 cbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.xz |
obtained gcc-4.6.4.tar.bz2 from upstream website;upstream
verified gcc-4.6.4.tar.bz2.sig;
imported gcc-4.6.4 source tree from verified upstream tarball.
downloading a git-generated archive based on the 'upstream' tag
should provide you with a source tree that is binary identical
to the one extracted from the above tarball.
if you have obtained the source via the command 'git clone',
however, do note that line-endings of files in your working
directory might differ from line-endings of the respective
files in the upstream repository.
Diffstat (limited to 'libstdc++-v3/testsuite/20_util/hash')
4 files changed, 493 insertions, 0 deletions
diff --git a/libstdc++-v3/testsuite/20_util/hash/chi2_quality.cc b/libstdc++-v3/testsuite/20_util/hash/chi2_quality.cc new file mode 100644 index 000000000..8a388349b --- /dev/null +++ b/libstdc++-v3/testsuite/20_util/hash/chi2_quality.cc @@ -0,0 +1,218 @@ +// { dg-options "-std=gnu++0x" } + +// Use smaller statistics when running on simulators, so it takes less time. +// { dg-options "-std=gnu++0x -DSAMPLES=10000" { target simulator } } + +// Copyright (C) 2010, 2011 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +// This file uses the chi^2 test to measure the quality of a hash +// function, by computing the uniformity with which it distributes a set +// of N strings into k buckets (where k is significantly greater than N). +// +// Each bucket has B[i] strings in it. The expected value of each bucket +// for a uniform distribution is z = N/k, so +// chi^2 = Sum_i (B[i] - z)^2 / z. +// +// We check whether chi^2 is small enough to be consistent with the +// hypothesis of a uniform distribution. If F(chi^2, k-1) is close to +// 0 (where F is the cumulative probability distribution), we can +// reject that hypothesis. So we don't want F to be too small, which +// for large k, means we want chi^2 to be not too much larger than k. +// +// We use the chi^2 test for several sets of strings. Any non-horrible +// hash function should do well with purely random strings. A really +// good hash function will also do well with more structured sets, +// including ones where the strings differ by only a few bits. + +#include <algorithm> +#include <cstdlib> +#include <cstdio> +#include <fstream> +#include <functional> +#include <iostream> +#include <iterator> +#include <string> +#include <unordered_set> +#include <vector> +#include <testsuite_hooks.h> + +#ifndef SAMPLES +#define SAMPLES 300000 +#endif + +template <typename Container> + double + chi2_hash(const Container& c, long buckets) + { + std::vector<int> counts(buckets); + std::hash<std::string> hasher; + double elements = 0; + for (auto i = c.begin(); i != c.end(); ++i) + { + ++counts[hasher(*i) % buckets]; + ++elements; + } + + const double z = elements / buckets; + double sum = 0; + for (long i = 0; i < buckets; ++i) + { + double delta = counts[i] - z; + sum += delta*delta; + } + return sum/z; + } + +// Tests chi^2 for a distribution of uniformly generated random strings. +void +test_uniform_random() +{ + bool test __attribute__((unused)) = true; + std::srand(137); + std::unordered_set<std::string> set; + std::string s; + const unsigned long N = SAMPLES; + const unsigned long k = N/100; + const unsigned int len = 25; + while (set.size() < N) + { + s.clear(); + for (unsigned int i = 0; i < len; ++i) + s.push_back(rand() % 128); + set.insert(s); + } + + double chi2 = chi2_hash(set, k); + VERIFY( chi2 < k*1.1 ); +} + +// Tests chi^2 for a distribution of strings that differ from each +// other by only a few bits. We start with an arbitrary base string, and +// flip three random bits for each member of the set. +void +test_bit_flip_set() +{ + bool test __attribute__((unused)) = true; + const unsigned long N = SAMPLES; + const unsigned long k = N/100; + const unsigned int len = 67; + const unsigned int bitlen = len * 8; + const unsigned int bits_to_flip = 3; + const char base[len+1] = "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789!@#$%"; + + std::unordered_set<std::string> set; + while (set.size() < N) + { + std::string s(base, base+len); + for (unsigned int i = 0; i < bits_to_flip; ++i) + { + int bit = rand() % bitlen; + s[bit/8] ^= (1 << (bit%8)); + } + set.insert(s); + } + + double chi2 = chi2_hash(set, k); + VERIFY( chi2 < k*1.1 ); +} + +// Tests chi^2 of a set of strings that all have a similar pattern, +// intended to mimic some sort of ID string. +void +test_numeric_pattern_set() +{ + bool test __attribute__((unused)) = true; + const unsigned long N = SAMPLES; + const unsigned long k = N/100; + std::vector<std::string> set; + for (unsigned long i = 0; i < N; ++i) + { + long i1 = i % 100000; + long i2 = i / 100000; + char buf[16]; + std::sprintf(buf, "XX-%05lu-%05lu", i1, i2); + set.push_back(buf); + } + + double chi2 = chi2_hash(set, k); + VERIFY( chi2 < k*1.1 ); +} + +// Tests chi^2 for a set of strings that all consist of '1' and '0'. +void +test_bit_string_set() +{ + bool test __attribute__((unused)) = true; + const unsigned long N = SAMPLES; + const unsigned long k = N/100; + std::vector<std::string> set; + std::string s; + for (unsigned long i = 0; i < N; ++i) + { + s.clear(); + for (unsigned int j = 0; j < sizeof(unsigned long) * 8; ++j) + { + const bool bit = (1UL << j) & i; + s.push_back(bit ? '1' : '0'); + } + set.push_back(s); + } + + double chi2 = chi2_hash(set, k); + VERIFY( chi2 < k*1.1 ); +} + +// Tests chi^2 for a set of words taken from a document written in English. +void +test_document_words() +{ + // That file is 187587 single-word lines. To avoid a timeout, just skip + // this part, which would take up to 95% of the program runtime (with + // SAMPLES == 10000), if we're not supposed to run anywhere that long. +#if SAMPLES >= 100000 + bool test __attribute__((unused)) = true; + const std::string f_name = "thirty_years_among_the_dead_preproc.txt"; + std::ifstream in(f_name); + VERIFY( in.is_open() ); + std::vector<std::string> words; + words.assign(std::istream_iterator<std::string>(in), + std::istream_iterator<std::string>()); + VERIFY( words.size() > 100000 ); + std::sort(words.begin(), words.end()); + auto it = std::unique(words.begin(), words.end()); + words.erase(it, words.end()); + VERIFY( words.size() > 5000 ); + + const unsigned long k = words.size() / 20; + double chi2 = chi2_hash(words, k); + VERIFY( chi2 < k*1.1 ); +#endif +} + +int +main() +{ + test_uniform_random(); + test_bit_flip_set(); + test_numeric_pattern_set(); + test_bit_string_set(); + test_document_words(); + return 0; +} diff --git a/libstdc++-v3/testsuite/20_util/hash/operators/size_t.cc b/libstdc++-v3/testsuite/20_util/hash/operators/size_t.cc new file mode 100644 index 000000000..af0c54ec4 --- /dev/null +++ b/libstdc++-v3/testsuite/20_util/hash/operators/size_t.cc @@ -0,0 +1,54 @@ +// { dg-options "-std=gnu++0x" } +// 2007-08-20 <benjamin@redhat.com> +// +// Copyright (C) 2007, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +#include <functional> +#include <system_error> +#include <testsuite_hooks.h> + +template<typename T> + void + do_test() + { + bool test __attribute__((unused)) = true; + + typedef T value_type; + typedef std::hash<value_type> hash_type; + using std::size_t; + + value_type v; // default initialized is fine, same value all that matters. + hash_type h1; + size_t r1 = size_t(h1(v)); + + hash_type h2; + size_t r2 = size_t(h2(v)); + + VERIFY( r1 == r2 ); + } + +void test01() +{ + do_test<std::error_code>(); +} + +int main() +{ + test01(); + return 0; +} diff --git a/libstdc++-v3/testsuite/20_util/hash/quality.cc b/libstdc++-v3/testsuite/20_util/hash/quality.cc new file mode 100644 index 000000000..0bc263724 --- /dev/null +++ b/libstdc++-v3/testsuite/20_util/hash/quality.cc @@ -0,0 +1,172 @@ +// { dg-options "-std=gnu++0x" } +// { dg-options "-DNTESTS=1 -DNSTRINGS=100 -DSTRSIZE=21 -std=gnu++0x" { target simulator } } + +// Copyright (C) 2010, 2011 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +#include <cstdlib> +#include <unordered_set> +#include <string> +#include <functional> +#include <vector> +#include <testsuite_hooks.h> + +using namespace std; + +#ifndef NTESTS +#define NTESTS 5 +#endif +#ifndef NSTRINGS +#define NSTRINGS 200 +#endif +#ifndef STRSIZE +#define STRSIZE 42 +#endif + +const unsigned int num_quality_tests = NTESTS; +const unsigned int num_strings_for_quality_tests = NSTRINGS; +const unsigned int string_size = STRSIZE; + +vector<string> +random_strings(unsigned int n, unsigned int len) +{ + string s(len, '\0'); + unordered_set<string> result_set; + while (result_set.size() < n) + { + result_set.insert(s); + unsigned int tmp = rand(); + tmp %= len * 256; + s[tmp / 256] = tmp % 256; + } + return vector<string>(result_set.begin(), result_set.end()); +} + +double +score_from_varying_position(string s, unsigned int index) +{ + bool test __attribute__((unused)) = true; + unsigned int bits_in_hash_code = sizeof(size_t) * 8; + + // We'll iterate through all 256 vals for s[index], leaving the rest + // of s fixed. Then, for example, out of the 128 times that + // s[index] has its 3rd bit equal to 0 we would like roughly half 1s + // and half 0s in bit 9 of the hash codes. + // + // Bookkeeping: Conceptually we want a 3D array of ints. We want to + // count the number of times each output position (of which there are + // bits_in_hash_code) is 1 for each bit position within s[index] (of + // which there are 8) and value of that bit (of which there are 2). + const unsigned int jj = 2; + const unsigned int kk = jj * bits_in_hash_code; + const unsigned int array_size = 8 * kk; + vector<int> ones(array_size, 0); + + for (int i = 0; i < 256; i++) + { + s[index] = i; + size_t h = hash<string>()(s); + for (int j = 0; h != 0; j++, h >>= 1) + { + if (h & 1) + { + for (int k = 0; k < 8; k++) + ++ones[k * kk + j * jj + ((i >> k) & 1)]; + } + } + } + + // At most, the innermost statement in the above loop nest can + // execute 256 * bits_in_hash_code * 8 times. If the hash is good, + // it'll execute about half that many times, with a pretty even + // spread across the elements of ones[]. + VERIFY( 256 * bits_in_hash_code * 8 / array_size == 128 ); + int max_ones_possible = 128; + int good = 0, bad = 0; + for (int bit = 0; bit <= 1; bit++) + { + for (unsigned int j = 0; j < bits_in_hash_code; j++) + { + for (int bitpos = 0; bitpos < 8; bitpos++) + { + int z = ones[bitpos * kk + j * jj + bit]; + if (z <= max_ones_possible / 6 + || z >= max_ones_possible * 5 / 6) + { + // The hash function screwed up, or was just unlucky, + // as 128 flips of a perfect coin occasionally yield + // far from 64 heads. + bad++; + } + else + good++; + } + } + } + return good / (double)(good + bad); +} + +double +score_from_varying_position(const vector<string>& v, unsigned int index) +{ + double score = 0; + for (unsigned int i = 0; i < v.size(); i++) + score += score_from_varying_position(v[i], index); + return score / v.size(); +} + +double +quality_test(unsigned int num_strings, unsigned int string_size) +{ + // Construct random strings. + vector<string> v = random_strings(num_strings, string_size); + double sum_of_scores = 0; + for (unsigned int i = 0; i < string_size; i++) + sum_of_scores += score_from_varying_position(v, i); + + // A good hash function should have a score very close to 1, and a bad + // hash function will have a score close to 0. + return sum_of_scores / string_size; +} + +void +quality_test() +{ + bool test __attribute__((unused)) = true; + srand(137); + double sum_of_scores = 0; + for (unsigned int i = 0; i < num_quality_tests; i++) + { + double score = quality_test(num_strings_for_quality_tests, + string_size); + sum_of_scores += score; + VERIFY( score > 0.99 ); + } + + if (num_quality_tests > 1) + { + double mean_quality = sum_of_scores / num_quality_tests; + VERIFY( mean_quality > 0.9999 ); + } +} + +int +main() +{ + quality_test(); + return 0; +} diff --git a/libstdc++-v3/testsuite/20_util/hash/requirements/explicit_instantiation.cc b/libstdc++-v3/testsuite/20_util/hash/requirements/explicit_instantiation.cc new file mode 100644 index 000000000..9c71a5fc9 --- /dev/null +++ b/libstdc++-v3/testsuite/20_util/hash/requirements/explicit_instantiation.cc @@ -0,0 +1,49 @@ +// { dg-options "-std=gnu++0x" } +// { dg-do compile } + +// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +#include <functional> +#include <string> +#include <system_error> + +// Verify that we can instantiate hash for every required type. +template class std::hash<bool>; +template class std::hash<char>; +template class std::hash<signed char>; +template class std::hash<unsigned char>; +template class std::hash<char16_t>; +template class std::hash<char32_t>; +template class std::hash<short>; +template class std::hash<int>; +template class std::hash<long>; +template class std::hash<unsigned short>; +template class std::hash<unsigned int>; +template class std::hash<unsigned long>; +template class std::hash<float>; +template class std::hash<double>; +template class std::hash<long double>; +template class std::hash<void*>; +template class std::hash<std::string>; +template class std::hash<std::error_code>; + +#ifdef _GLIBCXX_USE_WCHAR_T +template class std::hash<wchar_t>; +template class std::hash<std::wstring>; +#endif + |