Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

more/math/hash.h

Go to the documentation of this file.
00001 //  Copyright (C) 2001--2002  Petter Urkedal
00002 //
00003 //  This file is free software; you can redistribute it and/or modify
00004 //  it under the terms of the GNU General Public License as published by
00005 //  the Free Software Foundation; either version 2 of the License, or
00006 //  (at your option) any later version.
00007 //
00008 //  This file is distributed in the hope that it will be useful,
00009 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00010 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00011 //  GNU General Public License for more details.
00012 //
00013 //  You should have received a copy of the GNU General Public License
00014 //  along with this program; if not, write to the Free Software
00015 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00016 //
00017 //  As a special exception, you may use this file as part of a free
00018 //  software library without restriction.  Specifically, if other files
00019 //  instantiate templates or use macros or inline functions from this
00020 //  file, or you compile this file and link it with other files to
00021 //  produce an executable, this file does not by itself cause the
00022 //  resulting executable to be covered by the GNU General Public
00023 //  License.  This exception does not however invalidate any other
00024 //  reasons why the executable file might be covered by the GNU General
00025 //  Public License.
00026 //
00027 //  $Id: hash.h,v 1.2 2002/08/24 13:52:14 petter_urkedal Exp $
00028 
00029 
00030 #ifndef MORE_MATH_HASH_H
00031 
00032 #include <more/lang/stdint.h>
00033 #include <more/io/syncstream.h>
00034 #include <algorithm>
00035 #include <stdexcept>
00036 #include <iostream>
00037 #include <cstdlib>
00038 #include <assert.h>
00039 
00040 
00041 namespace more {
00042 namespace math {
00043 
00044   ///\if bits
00045   namespace bits {
00046     using lang::uint_least32_t;
00047     using lang::uint_least64_t;
00048 
00049     template<unsigned int WordWidth>
00050       struct hash_base {};
00051 
00052     template<>
00053       struct hash_base<192>
00054       {
00055           typedef uint_least64_t word_type;
00056           static std::size_t const word_width = 64;
00057           static std::size_t const word_count = 3;
00058           typedef word_type* word_iterator;
00059           typedef word_type const* word_const_iterator;
00060 
00061           hash_base() {}
00062           hash_base(word_type init)
00063               : m_n_free(16)
00064           {
00065               m_st[0] = init;
00066               m_st[1] = m_st[2] = 0x9e3779b97f4a7c13LL;
00067           }
00068 
00069           void insert_impl(unsigned char const* it,
00070                            unsigned char const* it_end);
00071 
00072           word_iterator word_begin() { return m_st; }
00073           word_iterator word_end() { return m_st + 3; }
00074           word_const_iterator word_begin() const { return m_st; }
00075           word_const_iterator word_end() const { return m_st + 3; }
00076 
00077           bool operator==(hash_base const& rhs) const
00078           {
00079               return m_st[0] == rhs.m_st[0]
00080                   && m_st[1] == rhs.m_st[1]
00081                   && m_st[2] == rhs.m_st[2];
00082           }
00083 
00084           bool operator<(hash_base const& rhs) const
00085           {
00086               if (m_st[0] < rhs.m_st[0]) return true;
00087               if (m_st[0] > rhs.m_st[0]) return false;
00088               if (m_st[1] < rhs.m_st[1]) return true;
00089               if (m_st[1] > rhs.m_st[1]) return false;
00090               return m_st[2] < rhs.m_st[2];
00091           }
00092 
00093         protected:
00094           uint_least64_t m_st[3];
00095           std::size_t m_n_free;
00096       };
00097 
00098     template<>
00099       struct hash_base<160> : hash_base<192>
00100       {
00101           hash_base() {}
00102           hash_base(word_type init)
00103               : hash_base<192>(init) {}
00104       };
00105 
00106     template<>
00107       struct hash_base<128> : hash_base<192>
00108       {
00109           static std::size_t const word_count = 2;
00110 
00111           hash_base() {}
00112           hash_base(word_type init)
00113               : hash_base<192>(init) {}
00114 
00115           word_iterator word_end() { return m_st + 2; }
00116           word_const_iterator word_end() const { return m_st + 2; }
00117 
00118           bool operator==(hash_base const& rhs) const
00119           {
00120               return m_st[0] == rhs.m_st[0]
00121                   && m_st[1] == rhs.m_st[1];
00122           }
00123 
00124           bool operator<(hash_base const& rhs) const
00125           {
00126               if (m_st[0] < rhs.m_st[0]) return true;
00127               if (m_st[0] > rhs.m_st[0]) return false;
00128               return m_st[1] < rhs.m_st[1];
00129           }
00130       };
00131 
00132     template<>
00133       struct hash_base<96>
00134       {
00135           typedef uint_least32_t word_type;
00136           static std::size_t const word_width = 32;
00137           static std::size_t const word_count = 3;
00138           typedef word_type* word_iterator;
00139           typedef word_type const* word_const_iterator;
00140 
00141           hash_base() {}
00142           hash_base(word_type init)
00143               : m_n_free(8)
00144           {
00145               m_st[0] = init;
00146               m_st[1] = m_st[2] = 0x9e3779b9;
00147           }
00148 
00149           void insert_impl(unsigned char const* it,
00150                            unsigned char const* it_end);
00151 //XXX     void insert_impl(word_type const* it, word_type const* it_end)
00152 
00153           word_iterator word_begin() { return m_st; }
00154           word_iterator word_end() { return m_st + 3; }
00155           word_const_iterator word_begin() const { return m_st; }
00156           word_const_iterator word_end() const { return m_st + 3; }
00157 
00158           bool operator==(hash_base const& rhs) const
00159           {
00160               return m_st[0] == rhs.m_st[0]
00161                   && m_st[1] == rhs.m_st[1]
00162                   && m_st[2] == rhs.m_st[2];
00163           }
00164 
00165           bool operator<(hash_base const& rhs) const
00166           {
00167               if (m_st[0] < rhs.m_st[0]) return true;
00168               if (m_st[0] > rhs.m_st[0]) return false;
00169               if (m_st[1] < rhs.m_st[1]) return true;
00170               if (m_st[1] > rhs.m_st[1]) return false;
00171               return m_st[2] < rhs.m_st[2];
00172           }
00173 
00174         protected:
00175           uint_least32_t m_st[3];
00176           unsigned short m_n_free;
00177       };
00178 
00179     template<>
00180       struct hash_base<64> : hash_base<96>
00181       {
00182           static std::size_t const word_count = 2;
00183 
00184           hash_base() {}
00185           hash_base(word_type init)
00186               : hash_base<96>(init) {}
00187 
00188           word_iterator word_end() { return m_st + 2; }
00189           word_const_iterator word_end() const { return m_st + 2; }
00190 
00191           bool operator==(hash_base const& rhs) const
00192           {
00193               return m_st[0] == rhs.m_st[0]
00194                   && m_st[1] == rhs.m_st[1];
00195           }
00196 
00197           bool operator<(hash_base const& rhs) const
00198           {
00199               if (m_st[0] < rhs.m_st[0]) return true;
00200               if (m_st[0] > rhs.m_st[0]) return false;
00201               return m_st[1] < rhs.m_st[1];
00202           }
00203       };
00204 
00205     template<>
00206       struct hash_base<32> : hash_base<96>
00207       {
00208           static std::size_t const word_count = 1;
00209 
00210           hash_base() {}
00211           hash_base(word_type init)
00212               : hash_base<96>(init) {}
00213 
00214           word_iterator word_end() { return m_st + 1; }
00215           word_const_iterator word_end() const { return m_st + 1; }
00216 
00217           bool operator==(hash_base const& rhs) const
00218           {
00219               return m_st[0] == rhs.m_st[0];
00220           }
00221 
00222           bool operator<(hash_base const& rhs) const
00223           {
00224               return m_st[0] < rhs.m_st[0];
00225           }
00226       };
00227   }
00228   ///\endif
00229 
00230   /** Encode an integer in the range [0.. 52) as an alphabetic ASCII
00231       character. */
00232   inline char
00233   encode_alpha(unsigned int k)
00234   {
00235       if (k < 26)
00236           return 'A' + k;
00237       else if (k < 52)
00238           return 'a' - 26 + k;
00239       else
00240           throw std::out_of_range("more::math::encode_alnum: "
00241                                   "Number is too lange to encode.");
00242   }
00243 
00244   /** Decode an integer encoded with encode_alpha. */
00245   inline unsigned char
00246   decode_alpha(char ch)
00247   {
00248       if ('A' <= ch && ch <= 'Z')
00249           return ch - 'A';
00250       else if ('a' <= ch && ch <= 'z')
00251           return ch - ('a' - 26);
00252       else
00253           throw std::logic_error("more::math::decode_alpha: "
00254                                  "Invalid character in encoding.");
00255   }
00256 
00257   /** Encode an integer in the range [0..62) as an alphanumeric ASCII
00258       character. */
00259   inline char
00260   encode_alnum(unsigned int k)
00261   {
00262       if (k < 26)
00263           return 'A' + k;
00264       else if (k < 52)
00265           return 'a' - 26 + k;
00266       else if (k < 62)
00267           return '0' - 52 + k;
00268       else
00269           throw std::out_of_range("more::math::encode_alnum: "
00270                                   "Number is too lange to encode.");
00271   }
00272 
00273   /** Decode an integer encoded with encode_alnum. */
00274   inline unsigned char
00275   decode_alnum(char ch)
00276   {
00277       if ('A' <= ch && ch <= 'Z')
00278           return ch - 'A';
00279       else if ('a' <= ch && ch <= 'z')
00280           return ch - ('a' - 26);
00281       else if ('0' <= ch && ch <= '9')
00282           return ch - ('0' - 52);
00283       else
00284           throw std::logic_error("more::math::decode_alnum: "
00285                                  "Invalid character in encoding.");
00286   }
00287 
00288   /** Returns a 32 bits encoding of 6 lower case characters.  This may
00289       be useful for putting tags into hashes.  Picking a random number
00290       does the same effect, though macro can increases readability,
00291       and together with a consistent scheme help avoid collisions
00292       without resorting to big random numbers.  All sequences of
00293       letters of the same case will give differt values.  Mixing lower
00294       case letters with uppercase or digits may lead to collitions. */
00295   inline lang::uint_least32_t
00296   hash_id32(char ch0 = 0, char ch1 = 0, char ch2 = 0,
00297             char ch3 = 0, char ch4 = 0, char ch5 = 0)
00298   {
00299       // The first term is a random number small enough not to cause
00300       // overflow and thus system dependecy.  The last char is shifted
00301       // 25 bits, thus leaving 7 bits to avoid overflow.
00302       return 31719
00303           + ch0 + 32*(ch1 + 32*(ch2 + 32*(ch3 + 32*(ch4 + 32*ch5))));
00304   }
00305 
00306   /** Encodes a 64 bits numbers into 1 ASCII alphabetic followed by 10
00307       ASCII alphanumeric characters.  This function may me moved to
00308       another header and namespace. */
00309   void encode64_alnum(lang::uint_least64_t, char* s);
00310 
00311   /** Decodes a 64 bits number encoded with encode64_alnum. This
00312       function may me moved to another header and namespace.*/
00313   lang::uint_least64_t decode64_alnum(char const* s);
00314 
00315   /*x* Encodes a 32 bits number into 6 ASCII alphabetic
00316       characters. This function may me moved to another header and
00317       namespace.*/
00318 //  void encode32_alpha(uint_least32_t, char*& s);
00319 
00320   /*x* Decodes a number encoded with encode32_alnum. This function may
00321       me moved to another header and namespace.*/
00322 //  uint_least32_t decode32_alpha(char const*& s);
00323 
00324   /** \class hash hash.h more/math/hash.h
00325       A hash of minimum Width number of bits.  The implemented values
00326       for Width at the moment are 96 and 192, other values will be
00327       eigther truncated or non-truncated versions of an implementation
00328       with more bits.  Width > 192 gives compile-time error.  I
00329       believe this hash function is rather good, but DON'T USE IT FOR
00330       SECURITY without checking it, I take no responsibility!  It is
00331       used by the lang module to create unique file names.  */
00332   template<unsigned int Width>
00333     struct hash
00334         : bits::hash_base<(Width + 31)/32*32>
00335     {
00336       private:
00337         typedef bits::hash_base<(Width + 31)/32*32> base;
00338       public:
00339         typedef typename base::word_type word_type;
00340         typedef typename base::word_iterator word_iterator;
00341         typedef typename base::word_const_iterator word_const_iterator;
00342         static std::size_t const word_count = base::word_count;
00343         static std::size_t const word_width = base::word_width;
00344         static std::size_t const word_size = word_width/8;
00345         static std::size_t const width = word_count*word_width;
00346         static std::size_t const c_name_strlen
00347             = (word_width == 64? word_count*11 : word_count*word_size/2*3);
00348 
00349         /** Trivial constructor. */
00350         hash() {}
00351 
00352         /** Construct a hash with the given start value (seed). */
00353         hash(word_type x)
00354             : base(x) {}
00355 
00356         /** Chop a sequence of arithmetic values into the hash.  This
00357             is equivalent to inserting each value separately in the
00358             same order. */
00359         template <typename T>
00360           void
00361           insert(T const* it, T const* it_end)
00362           {
00363 //XXX         if (sizeof(T) == sizeof(word_type))
00364 //XXX             base::insert_impl((word_type*)it, (word_type*)it_end);
00365 //XXX         else
00366                   insert_impl((unsigned char*)it, (unsigned char*)it_end);
00367           }
00368 
00369         /** Chop an arithmetic value into the hash. */
00370         template <typename T>
00371           void
00372           insert(T const& x)
00373           {
00374               insert(&x, &x + 1);
00375           }
00376 
00377         /** Writes c_name_strlen alphanumeric ascii characters to s,
00378             starting with an alphabetic.  The result is not
00379             0-terminated. */
00380         void
00381         strncpy_to_c_name(char* s) const
00382         {
00383             for (word_const_iterator it = word_begin();
00384                  it != word_end(); ++it)
00385                 encode_word(*it, s);
00386         }
00387 
00388         /** Initializes the hash from a name written by
00389             strncpy_c_name.  The hash will compare equal to the
00390             original, but if this is a truncated hash type (cf the
00391             Width template argument), the hashes will develop
00392             differtly on further insertions. */
00393         void
00394         assign_from_c_name(char const* s)
00395         {
00396             s += c_name_strlen;
00397             word_iterator it = word_end();
00398             while (it != word_begin()) {
00399                 --it;
00400                 *it = decode_word(s);
00401             }
00402         }
00403 
00404 #if 0
00405         std::string
00406         to_c_name() const
00407         {
00408             char buf[c_name_strlen];
00409             to_c_name(c_name_strlen);
00410             return std::string(buf, c_name_strlen);
00411         }
00412 
00413         void
00414         from_c_name(std::string const& str)
00415         {
00416             from_c_name(str.c_str());
00417         }
00418 #endif
00419 
00420         void
00421         sync(io::syncstream& sio)
00422         {
00423             more::io::sync_range(sio, begin(), end());
00424         }
00425 
00426       private:
00427         static void
00428         encode_word(word_type k, char*& s)
00429         {
00430             if (word_width == 64) {
00431                 encode64_alnum(k, s);
00432                 s += 11;
00433             }
00434             else
00435                 for (std::size_t j = 0; j < word_size/2; ++j) {
00436                     *s++ = encode_alpha(k & 0x1f);
00437                     k >>= 5;
00438                     *s++ = encode_alpha((k & 0x7ff) % 52);
00439                     *s++ = encode_alpha((k & 0x7ff) / 52);
00440                     k >>= 11;
00441                 }
00442         }
00443 
00444         static word_type
00445         decode_word(char const*& s)
00446         {
00447             if (word_width == 64) {
00448                 s -= 11;
00449                 return decode64_alnum(s);
00450             }
00451             else {
00452                 word_type k = 0;
00453                 for (std::size_t j = 0; j < word_size/2; ++j) {
00454                     k <<= 11;
00455                     k += decode_alpha(*--s)*52;
00456                     k += decode_alpha(*--s);
00457                     k <<= 5;
00458                     k |= decode_alpha(*--s);
00459                 }
00460                 return k;
00461             }
00462         }
00463     };
00464 
00465   template <unsigned int Width>
00466     inline std::ostream&
00467     operator<<(std::ostream& os, hash<Width> const& h)
00468     {
00469         typename hash<Width>::word_const_iterator it = h.word_begin();
00470         os << std::setfill('0') << std::setbase(16) << std::setw(4) << *it;
00471         while (++it != h.word_end())
00472             os << ':' << std::setbase(16) << std::setw(4) << *it;
00473         return os;
00474     }
00475 
00476 
00477 }}
00478 
00479 #endif

Generated on Sat Sep 7 19:11:18 2002 for more with Doxygen 1.2.13.1. Doxygen 1.2.13.1 is written and copyright 1997-2002 by Dimitri van Heesch.