test_hash.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#include "crucible/core.hpp" #include "crucible/test.hpp" #include <cstdint> namespace test_hash { CRUCIBLE_TEST_SCENARIO(hash_short_strings) { // A list of 256 random dictionary words auto const all_values { crucible::make_heap_buffer<crucible::string_view>({ "physoclist", "Gordyaean", "Panionian", "infirm", "gigerium", "gymnosporous", "discredit", "angulated", "onus", "pharmacopsychology", "brutalize", "variolization", "unmindfulness", "woodwall", "cryingly", "phytolithological", "convenience", "tregadyne", "arecoline", "reunite", "horsefoot", "spatted", "nannoplankton", "Noam", "Tabellariaceae", "spaework", "align", "tell", "preworthy", "photopography", "scleranth", "ungainly", "cumene", "nosotrophy", "barb", "semiduplex", "Lapageria", "hyperimmunity", "tritely", "oversusceptible", "trilabe", "sufficientness", "valethood", "Malebranchism", "Paumari", "preliterally", "slobbery", "refringence", "lam", "preindulge", "agitative", "cheirography", "Mephistopheleanly", "ladkin", "turnhall", "sunburntness", "metaplasm", "Madonna", "pollable", "mootman", "cheddaring", "pontic", "hypsoisotherm", "libretto", "granulation", "sarsenet", "megaerg", "adrenocorticotropic", "Soohong", "gris", "Eucommia", "wrathfulness", "woodruff", "corrigibleness", "enthraldom", "logeum", "unreeving", "archdetective", "Maskins", "pancreatalgia", "miseffect", "Norseland", "ureteropyosis", "Coniothyrium", "pteropaedes", "Ctenophora", "enterocele", "was", "superambitious", "bridgebuilding", "triglochin", "glider", "exaltative", "drivescrew", "hypophloeous", "deboistness", "declensionally", "constringe", "uncontrovertibly", "testaceology", "Kate", "phrenosinic", "Venezuelan", "syllogistics", "megacerine", "speech", "unordinariness", "fooling", "reannexation", "blackfire", "sloosh", "tracheolar", "callidness", "zokor", "telemeteorographic", "basial", "pteridophilism", "wristfall", "unproportionedness", "amidoketone", "thermantic", "Kshatriyahood", "Sertularia", "neurataxia", "Calite", "thaumoscopic", "pruning", "unalleviably", "kingless", "irreconcilability", "Claude", "Sphaerocarpus", "parasitophobia", "nebbuck", "kinnikinnick", "dockization", "Abrahamite", "repurchase", "Homerian", "charlatanically", "polderboy", "saurischian", "cardiorrhaphy", "apoplectoid", "mankin", "allotee", "aminate", "notal", "Hypnaceae", "identism", "understring", "Anaxagorean", "titty", "salvability", "hydrophthalmus", "esponton", "brassily", "Cocanucos", "slugging", "palet", "spontaneous", "cation", "superagency", "challie", "Mirach", "cinematographic", "alexipyretic", "corticosterone", "semihyperbolical", "mewer", "chivalresque", "nights", "subequally", "knotty", "nonvariable", "ambrosine", "Sphenopteris", "Adai", "meteorite", "tomjohn", "Yokuts", "Franklin", "unjudgelike", "violoncellist", "abolitionist", "festerment", "Novemberish", "unicostate", "noncirculation", "claught", "scattered", "curatial", "sweetberry", "unbragged", "Llanberisslate", "birational", "thurible", "thetically", "cipolin", "prestress", "tailpipe", "thiuram", "participable", "redolence", "lobstering", "feveret", "microsublimation", "choreutic", "tagrag", "platitudinization", "solipedous", "Alopiidae", "Hildebrandist", "silaginoid", "bore", "festucine", "thymocyte", "blacker", "gib", "hydrargyrism", "unapplicableness", "unintelligibly", "Mister", "rentaller", "undermine", "Emesidae", "coxcombhood", "squidge", "postponement", "magnify", "stapelia", "diathermotherapy", "chutney", "stupefactive", "holler", "vociferous", "visie", "scrounger", "homoerotic", "synchronized", "puddinghouse", "acuclosure", "zoophytical", "unprovide", "noncustomary", "twitch", "nonauthentication", "frow", "klop", "unfeigningly", "encampment", "preregistration", "mimeographic", "delphacid", "kilter", "remarry" }) }; // Hash the words in the word list auto const all_hashes { crucible::iterate(all_values) .map([](crucible::string_view const s) { return crucible::hash(s); }).materialize<crucible::heap_buffer<std::uint64_t>>() }; // Same number of buckets as keys, to simulate a really bad load factor auto buckets { crucible::make_heap_buffer<std::size_t>(all_values.size()) }; // Simulate a hash table by just counting how many items would go into each bucket, assuming we // store an item at hash(item) modulo (number of buckets) for (std::size_t i { 0 }; i < all_hashes.size(); ++i) { #if 0 // Debugging information CRUCIBLE_PRINT("hash({}) = {} ({} mod {})", all_values[i], all_hashes[i], all_hashes[i] % buckets.size(), buckets.size()); #endif auto const j { all_hashes[i] % buckets.size() }; ++buckets[j]; #if 0 // Debugging information if (buckets[j] > 1) { CRUCIBLE_PRINT("collision at {}", all_values[i]); } #endif } // A very simple conservative test: given that we have the same number of buckets as keys, we // want to have <50% of the populated buckets (iem count > 0) have a collision (item count > 1). // // There are much more sophisticated tests that can check for stricter guarantees (one worth // looking into is the chi-squared test), but this test is more about verifying that the API // works and produces more-or-less evenly distributed hashes (i.e., hash tables won't just utterly // break) than it is about proving anything in particular about the hash function. double const populated_bucket_count { crucible::iterate(buckets) .fold([](double const result, std::size_t const item_count) { return result + (item_count > 0 ? 1.0 : 0.0); }, 0.0) }; double const collision_bucket_count { crucible::iterate(buckets) .fold([](double const result, std::size_t const item_count) { return result + (item_count > 1 ? 1.0 : 0.0); }, 0.0) }; #if 0 // Debugging information double const load { all_hashes.size() / static_cast<double>(buckets.size()) }; double const ratio { collision_bucket_count / populated_bucket_count }; CRUCIBLE_PRINT("buckets: {} collisions: {} populated: {} ratio: {} load: {}", buckets.size(), collision_bucket_count, populated_bucket_count, ratio, load); #endif CRUCIBLE_ASSERT_LT(collision_bucket_count / populated_bucket_count, 0.5); } } CRUCIBLE_TEST_MAIN