blob: 54a90cb7ff806fd9b5aa259b9d068fcceb4a1245 [file] [log] [blame]
Haowei Yuanf52dac72014-03-24 23:35:03 -05001// Copyright (c) 2011 Google, Inc.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20//
21// CityHash, by Geoff Pike and Jyrki Alakuijala
22//
23// http://code.google.com/p/cityhash/
24//
25// This file provides a few functions for hashing strings. All of them are
26// high-quality functions in the sense that they pass standard tests such
27// as Austin Appleby's SMHasher. They are also fast.
28//
29// For 64-bit x86 code, on short strings, we don't know of anything faster than
30// CityHash64 that is of comparable quality. We believe our nearest competitor
31// is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash
32// tables and most other hashing (excluding cryptography).
33//
34// For 64-bit x86 code, on long strings, the picture is more complicated.
35// On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc.,
36// CityHashCrc128 appears to be faster than all competitors of comparable
37// quality. CityHash128 is also good but not quite as fast. We believe our
38// nearest competitor is Bob Jenkins' Spooky. We don't have great data for
39// other 64-bit CPUs, but for long strings we know that Spooky is slightly
40// faster than CityHash on some relatively recent AMD x86-64 CPUs, for example.
41// Note that CityHashCrc128 is declared in citycrc.h.
42//
43// For 32-bit x86 code, we don't know of anything faster than CityHash32 that
44// is of comparable quality. We believe our nearest competitor is Murmur3A.
45// (On 64-bit CPUs, it is typically faster to use the other CityHash variants.)
46//
47// Functions in the CityHash family are not suitable for cryptography.
48//
49// Please see CityHash's README file for more details on our performance
50// measurements and so on.
51//
52// WARNING: This code has been only lightly tested on big-endian platforms!
53// It is known to work well on little-endian platforms that have a small penalty
54// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
55// It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
56// bug reports are welcome.
57//
58// By the way, for some hash functions, given strings a and b, the hash
59// of a+b is easily derived from the hashes of a and b. This property
60// doesn't hold for any hash functions in this file.
61
62#ifndef CITY_HASH_HPP
63#define CITY_HASH_HPP
64
65#include <stdlib.h> // for size_t.
66#include <stdint.h>
67#include <utility>
68
69typedef uint8_t uint8;
70typedef uint32_t uint32;
71typedef uint64_t uint64;
72typedef std::pair<uint64, uint64> uint128;
73
74inline uint64 Uint128Low64(const uint128& x) { return x.first; }
75inline uint64 Uint128High64(const uint128& x) { return x.second; }
76
77// Hash function for a byte array.
78uint64 CityHash64(const char *buf, size_t len);
79
80// Hash function for a byte array. For convenience, a 64-bit seed is also
81// hashed into the result.
82uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
83
84// Hash function for a byte array. For convenience, two seeds are also
85// hashed into the result.
86uint64 CityHash64WithSeeds(const char *buf, size_t len,
87 uint64 seed0, uint64 seed1);
88
89// Hash function for a byte array.
90uint128 CityHash128(const char *s, size_t len);
91
92// Hash function for a byte array. For convenience, a 128-bit seed is also
93// hashed into the result.
94uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
95
96// Hash function for a byte array. Most useful in 32-bit binaries.
97uint32 CityHash32(const char *buf, size_t len);
98
99// Hash 128 input bits down to 64 bits of output.
100// This is intended to be a reasonably good hash function.
101inline uint64 Hash128to64(const uint128& x) {
102 // Murmur-inspired hashing.
103 const uint64 kMul = 0x9ddfea08eb382d69ULL;
104 uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
105 a ^= (a >> 47);
106 uint64 b = (Uint128High64(x) ^ a) * kMul;
107 b ^= (b >> 47);
108 b *= kMul;
109 return b;
110}
111
112#endif // CITY_HASH_H_