00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #ifndef MORE_MATH_HASH_H
00031
00032 #include <more/lang/stdint.h>
00033 #include <more/io/syncstream.h>
00034 #include <algorithm>
00035 #include <stdexcept>
00036 #include <iostream>
00037 #include <cstdlib>
00038 #include <assert.h>
00039
00040
00041 namespace more {
00042 namespace math {
00043
00044
00045 namespace bits {
00046 using lang::uint_least32_t;
00047 using lang::uint_least64_t;
00048
00049 template<unsigned int WordWidth>
00050 struct hash_base {};
00051
00052 template<>
00053 struct hash_base<192>
00054 {
00055 typedef uint_least64_t word_type;
00056 static std::size_t const word_width = 64;
00057 static std::size_t const word_count = 3;
00058 typedef word_type* word_iterator;
00059 typedef word_type const* word_const_iterator;
00060
00061 hash_base() {}
00062 hash_base(word_type init)
00063 : m_n_free(16)
00064 {
00065 m_st[0] = init;
00066 m_st[1] = m_st[2] = 0x9e3779b97f4a7c13LL;
00067 }
00068
00069 void insert_impl(unsigned char const* it,
00070 unsigned char const* it_end);
00071
00072 word_iterator word_begin() { return m_st; }
00073 word_iterator word_end() { return m_st + 3; }
00074 word_const_iterator word_begin() const { return m_st; }
00075 word_const_iterator word_end() const { return m_st + 3; }
00076
00077 bool operator==(hash_base const& rhs) const
00078 {
00079 return m_st[0] == rhs.m_st[0]
00080 && m_st[1] == rhs.m_st[1]
00081 && m_st[2] == rhs.m_st[2];
00082 }
00083
00084 bool operator<(hash_base const& rhs) const
00085 {
00086 if (m_st[0] < rhs.m_st[0]) return true;
00087 if (m_st[0] > rhs.m_st[0]) return false;
00088 if (m_st[1] < rhs.m_st[1]) return true;
00089 if (m_st[1] > rhs.m_st[1]) return false;
00090 return m_st[2] < rhs.m_st[2];
00091 }
00092
00093 protected:
00094 uint_least64_t m_st[3];
00095 std::size_t m_n_free;
00096 };
00097
00098 template<>
00099 struct hash_base<160> : hash_base<192>
00100 {
00101 hash_base() {}
00102 hash_base(word_type init)
00103 : hash_base<192>(init) {}
00104 };
00105
00106 template<>
00107 struct hash_base<128> : hash_base<192>
00108 {
00109 static std::size_t const word_count = 2;
00110
00111 hash_base() {}
00112 hash_base(word_type init)
00113 : hash_base<192>(init) {}
00114
00115 word_iterator word_end() { return m_st + 2; }
00116 word_const_iterator word_end() const { return m_st + 2; }
00117
00118 bool operator==(hash_base const& rhs) const
00119 {
00120 return m_st[0] == rhs.m_st[0]
00121 && m_st[1] == rhs.m_st[1];
00122 }
00123
00124 bool operator<(hash_base const& rhs) const
00125 {
00126 if (m_st[0] < rhs.m_st[0]) return true;
00127 if (m_st[0] > rhs.m_st[0]) return false;
00128 return m_st[1] < rhs.m_st[1];
00129 }
00130 };
00131
00132 template<>
00133 struct hash_base<96>
00134 {
00135 typedef uint_least32_t word_type;
00136 static std::size_t const word_width = 32;
00137 static std::size_t const word_count = 3;
00138 typedef word_type* word_iterator;
00139 typedef word_type const* word_const_iterator;
00140
00141 hash_base() {}
00142 hash_base(word_type init)
00143 : m_n_free(8)
00144 {
00145 m_st[0] = init;
00146 m_st[1] = m_st[2] = 0x9e3779b9;
00147 }
00148
00149 void insert_impl(unsigned char const* it,
00150 unsigned char const* it_end);
00151
00152
00153 word_iterator word_begin() { return m_st; }
00154 word_iterator word_end() { return m_st + 3; }
00155 word_const_iterator word_begin() const { return m_st; }
00156 word_const_iterator word_end() const { return m_st + 3; }
00157
00158 bool operator==(hash_base const& rhs) const
00159 {
00160 return m_st[0] == rhs.m_st[0]
00161 && m_st[1] == rhs.m_st[1]
00162 && m_st[2] == rhs.m_st[2];
00163 }
00164
00165 bool operator<(hash_base const& rhs) const
00166 {
00167 if (m_st[0] < rhs.m_st[0]) return true;
00168 if (m_st[0] > rhs.m_st[0]) return false;
00169 if (m_st[1] < rhs.m_st[1]) return true;
00170 if (m_st[1] > rhs.m_st[1]) return false;
00171 return m_st[2] < rhs.m_st[2];
00172 }
00173
00174 protected:
00175 uint_least32_t m_st[3];
00176 unsigned short m_n_free;
00177 };
00178
00179 template<>
00180 struct hash_base<64> : hash_base<96>
00181 {
00182 static std::size_t const word_count = 2;
00183
00184 hash_base() {}
00185 hash_base(word_type init)
00186 : hash_base<96>(init) {}
00187
00188 word_iterator word_end() { return m_st + 2; }
00189 word_const_iterator word_end() const { return m_st + 2; }
00190
00191 bool operator==(hash_base const& rhs) const
00192 {
00193 return m_st[0] == rhs.m_st[0]
00194 && m_st[1] == rhs.m_st[1];
00195 }
00196
00197 bool operator<(hash_base const& rhs) const
00198 {
00199 if (m_st[0] < rhs.m_st[0]) return true;
00200 if (m_st[0] > rhs.m_st[0]) return false;
00201 return m_st[1] < rhs.m_st[1];
00202 }
00203 };
00204
00205 template<>
00206 struct hash_base<32> : hash_base<96>
00207 {
00208 static std::size_t const word_count = 1;
00209
00210 hash_base() {}
00211 hash_base(word_type init)
00212 : hash_base<96>(init) {}
00213
00214 word_iterator word_end() { return m_st + 1; }
00215 word_const_iterator word_end() const { return m_st + 1; }
00216
00217 bool operator==(hash_base const& rhs) const
00218 {
00219 return m_st[0] == rhs.m_st[0];
00220 }
00221
00222 bool operator<(hash_base const& rhs) const
00223 {
00224 return m_st[0] < rhs.m_st[0];
00225 }
00226 };
00227 }
00228
00229
00230
00231
00232 inline char
00233 encode_alpha(unsigned int k)
00234 {
00235 if (k < 26)
00236 return 'A' + k;
00237 else if (k < 52)
00238 return 'a' - 26 + k;
00239 else
00240 throw std::out_of_range("more::math::encode_alnum: "
00241 "Number is too lange to encode.");
00242 }
00243
00244
00245 inline unsigned char
00246 decode_alpha(char ch)
00247 {
00248 if ('A' <= ch && ch <= 'Z')
00249 return ch - 'A';
00250 else if ('a' <= ch && ch <= 'z')
00251 return ch - ('a' - 26);
00252 else
00253 throw std::logic_error("more::math::decode_alpha: "
00254 "Invalid character in encoding.");
00255 }
00256
00257
00258
00259 inline char
00260 encode_alnum(unsigned int k)
00261 {
00262 if (k < 26)
00263 return 'A' + k;
00264 else if (k < 52)
00265 return 'a' - 26 + k;
00266 else if (k < 62)
00267 return '0' - 52 + k;
00268 else
00269 throw std::out_of_range("more::math::encode_alnum: "
00270 "Number is too lange to encode.");
00271 }
00272
00273
00274 inline unsigned char
00275 decode_alnum(char ch)
00276 {
00277 if ('A' <= ch && ch <= 'Z')
00278 return ch - 'A';
00279 else if ('a' <= ch && ch <= 'z')
00280 return ch - ('a' - 26);
00281 else if ('0' <= ch && ch <= '9')
00282 return ch - ('0' - 52);
00283 else
00284 throw std::logic_error("more::math::decode_alnum: "
00285 "Invalid character in encoding.");
00286 }
00287
00288
00289
00290
00291
00292
00293
00294
00295 inline lang::uint_least32_t
00296 hash_id32(char ch0 = 0, char ch1 = 0, char ch2 = 0,
00297 char ch3 = 0, char ch4 = 0, char ch5 = 0)
00298 {
00299
00300
00301
00302 return 31719
00303 + ch0 + 32*(ch1 + 32*(ch2 + 32*(ch3 + 32*(ch4 + 32*ch5))));
00304 }
00305
00306
00307
00308
00309 void encode64_alnum(lang::uint_least64_t, char* s);
00310
00311
00312
00313 lang::uint_least64_t decode64_alnum(char const* s);
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332 template<unsigned int Width>
00333 struct hash
00334 : bits::hash_base<(Width + 31)/32*32>
00335 {
00336 private:
00337 typedef bits::hash_base<(Width + 31)/32*32> base;
00338 public:
00339 typedef typename base::word_type word_type;
00340 typedef typename base::word_iterator word_iterator;
00341 typedef typename base::word_const_iterator word_const_iterator;
00342 static std::size_t const word_count = base::word_count;
00343 static std::size_t const word_width = base::word_width;
00344 static std::size_t const word_size = word_width/8;
00345 static std::size_t const width = word_count*word_width;
00346 static std::size_t const c_name_strlen
00347 = (word_width == 64? word_count*11 : word_count*word_size/2*3);
00348
00349
00350 hash() {}
00351
00352
00353 hash(word_type x)
00354 : base(x) {}
00355
00356
00357
00358
00359 template <typename T>
00360 void
00361 insert(T const* it, T const* it_end)
00362 {
00363
00364
00365
00366 insert_impl((unsigned char*)it, (unsigned char*)it_end);
00367 }
00368
00369
00370 template <typename T>
00371 void
00372 insert(T const& x)
00373 {
00374 insert(&x, &x + 1);
00375 }
00376
00377
00378
00379
00380 void
00381 strncpy_to_c_name(char* s) const
00382 {
00383 for (word_const_iterator it = word_begin();
00384 it != word_end(); ++it)
00385 encode_word(*it, s);
00386 }
00387
00388
00389
00390
00391
00392
00393 void
00394 assign_from_c_name(char const* s)
00395 {
00396 s += c_name_strlen;
00397 word_iterator it = word_end();
00398 while (it != word_begin()) {
00399 --it;
00400 *it = decode_word(s);
00401 }
00402 }
00403
00404 #if 0
00405 std::string
00406 to_c_name() const
00407 {
00408 char buf[c_name_strlen];
00409 to_c_name(c_name_strlen);
00410 return std::string(buf, c_name_strlen);
00411 }
00412
00413 void
00414 from_c_name(std::string const& str)
00415 {
00416 from_c_name(str.c_str());
00417 }
00418 #endif
00419
00420 void
00421 sync(io::syncstream& sio)
00422 {
00423 more::io::sync_range(sio, begin(), end());
00424 }
00425
00426 private:
00427 static void
00428 encode_word(word_type k, char*& s)
00429 {
00430 if (word_width == 64) {
00431 encode64_alnum(k, s);
00432 s += 11;
00433 }
00434 else
00435 for (std::size_t j = 0; j < word_size/2; ++j) {
00436 *s++ = encode_alpha(k & 0x1f);
00437 k >>= 5;
00438 *s++ = encode_alpha((k & 0x7ff) % 52);
00439 *s++ = encode_alpha((k & 0x7ff) / 52);
00440 k >>= 11;
00441 }
00442 }
00443
00444 static word_type
00445 decode_word(char const*& s)
00446 {
00447 if (word_width == 64) {
00448 s -= 11;
00449 return decode64_alnum(s);
00450 }
00451 else {
00452 word_type k = 0;
00453 for (std::size_t j = 0; j < word_size/2; ++j) {
00454 k <<= 11;
00455 k += decode_alpha(*--s)*52;
00456 k += decode_alpha(*--s);
00457 k <<= 5;
00458 k |= decode_alpha(*--s);
00459 }
00460 return k;
00461 }
00462 }
00463 };
00464
00465 template <unsigned int Width>
00466 inline std::ostream&
00467 operator<<(std::ostream& os, hash<Width> const& h)
00468 {
00469 typename hash<Width>::word_const_iterator it = h.word_begin();
00470 os << std::setfill('0') << std::setbase(16) << std::setw(4) << *it;
00471 while (++it != h.word_end())
00472 os << ':' << std::setbase(16) << std::setw(4) << *it;
00473 return os;
00474 }
00475
00476
00477 }}
00478
00479 #endif