diff --git a/pandas/hashtable.pyx b/pandas/hashtable.pyx
index cf9428d5862ec..37abcf6d6ec73 100644
--- a/pandas/hashtable.pyx
+++ b/pandas/hashtable.pyx
@@ -1,4 +1,5 @@
-from cpython cimport PyObject, Py_INCREF, PyList_Check, PyTuple_Check
+from cpython cimport (PyObject, Py_INCREF, PyList_Check, PyTuple_Check,
+                      PyString_AsStringAndSize, PyDict_Copy)
 
 from khash cimport *
 from numpy cimport *
@@ -843,6 +844,127 @@ cdef class PyObjectHashTable(HashTable):
         return labels
 
 
+cdef inline cbuf_t to_cbuf(object s):
+    cdef cbuf_t output
+    PyString_AsStringAndSize(s, <char**>&output.buf, &output.len)
+    return output
+
+
+cdef class CBufHashTable(HashTable):
+    cdef kh_cbuf_map_t *table
+
+    def __cinit__(self, int size_hint=1):
+        self.table = kh_init_cbuf_map()
+        if size_hint is not None:
+            kh_resize_cbuf_map(self.table, size_hint)
+
+    def __dealloc__(self):
+        kh_destroy_cbuf_map(self.table)
+
+    cdef inline int check_type(self, object val):
+        return util.is_string_object(val)
+
+    cpdef get_item(self, object val):
+        cdef khiter_t it
+        it = kh_get_cbuf_map(self.table, to_cbuf(val))
+        if it != self.table.n_buckets:
+            return self.table.vals[it]
+        else:
+            raise KeyError(val)
+
+    def get_iter_test(self, object key, Py_ssize_t iterations):
+        cdef khiter_t it
+        cdef Py_ssize_t i, val
+        for i in range(iterations):
+            it = kh_get_cbuf_map(self.table, to_cbuf(key))
+            if it != self.table.n_buckets:
+                val = self.table.vals[it]
+
+    cpdef set_item(self, object key, Py_ssize_t val):
+        cdef:
+            khiter_t it
+            int ret = 0
+            cbuf_t buf
+
+        buf = to_cbuf(key)
+
+        it = kh_put_cbuf_map(self.table, buf, &ret)
+        self.table.keys[it] = buf
+        if kh_exist_cbuf_map(self.table, it):
+            self.table.vals[it] = val
+        else:
+            raise KeyError(key)
+
+    def get_indexer(self, ndarray[object] values):
+        cdef:
+            Py_ssize_t i, n = len(values)
+            ndarray[int64_t] labels = np.empty(n, dtype=np.int64)
+            cbuf_t buf
+            int64_t[::1] out = labels
+            khiter_t it
+            kh_cbuf_map_t *table = self.table
+
+        for i in range(n):
+            buf = to_cbuf(values[i])
+            it = kh_get_cbuf_map(table, buf)
+            if it != table.n_buckets:
+                out[i] = table.vals[it]
+            else:
+                out[i] = -1
+        return labels
+
+    def unique(self, ndarray[object] values):
+        cdef:
+            Py_ssize_t i, n = len(values)
+            Py_ssize_t idx, count = 0
+            int ret = 0
+            object val
+            cbuf_t buf
+            khiter_t it
+            ObjectVector uniques = ObjectVector()
+
+        for i in range(n):
+            val = values[i]
+            buf = to_cbuf(val)
+            it = kh_get_cbuf_map(self.table, buf)
+            if it == self.table.n_buckets:
+                it = kh_put_cbuf_map(self.table, buf, &ret)
+                count += 1
+                uniques.append(val)
+
+        return uniques.to_array()
+
+    def factorize(self, ndarray[object] values):
+        cdef:
+            Py_ssize_t i, n = len(values)
+            ndarray[int64_t] labels = np.empty(n, dtype=np.int64)
+            list reverse = []
+            Py_ssize_t idx, count = 0
+            int ret = 0
+            object val
+            cbuf_t buf
+            khiter_t it
+
+        for i in range(n):
+            val = values[i]
+            buf = to_cbuf(val)
+            it = kh_get_cbuf_map(self.table, buf)
+            if it != self.table.n_buckets:
+                idx = self.table.vals[it]
+                labels[i] = idx
+            else:
+                it = kh_put_cbuf_map(self.table, buf, &ret)
+
+                self.table.vals[it] = count
+                reverse.append(val)
+                labels[i] = count
+                count += 1
+
+        return PyDict_Copy(enumerate(reverse)), labels
+
+
+
+
 cdef class Factorizer:
     cdef public PyObjectHashTable table
     cdef public ObjectVector uniques
diff --git a/pandas/src/khash.pxd b/pandas/src/khash.pxd
index a8fd51a62cfbe..622c0d70daaf0 100644
--- a/pandas/src/khash.pxd
+++ b/pandas/src/khash.pxd
@@ -17,7 +17,6 @@ cdef extern from "khash_python.h":
     inline khint_t kh_get_pymap(kh_pymap_t*, PyObject*)
     inline void kh_resize_pymap(kh_pymap_t*, khint_t)
     inline khint_t kh_put_pymap(kh_pymap_t*, PyObject*, int*)
-    inline void kh_del_pymap(kh_pymap_t*, khint_t)
 
     bint kh_exist_pymap(kh_pymap_t*, khiter_t)
 
@@ -33,7 +32,6 @@ cdef extern from "khash_python.h":
     inline khint_t kh_get_pyset(kh_pyset_t*, PyObject*)
     inline void kh_resize_pyset(kh_pyset_t*, khint_t)
     inline khint_t kh_put_pyset(kh_pyset_t*, PyObject*, int*)
-    inline void kh_del_pyset(kh_pyset_t*, khint_t)
 
     bint kh_exist_pyset(kh_pyset_t*, khiter_t)
 
@@ -51,7 +49,6 @@ cdef extern from "khash_python.h":
     inline khint_t kh_get_str(kh_str_t*, kh_cstr_t)
     inline void kh_resize_str(kh_str_t*, khint_t)
     inline khint_t kh_put_str(kh_str_t*, kh_cstr_t, int*)
-    inline void kh_del_str(kh_str_t*, khint_t)
 
     bint kh_exist_str(kh_str_t*, khiter_t)
 
@@ -68,7 +65,6 @@ cdef extern from "khash_python.h":
     inline khint_t kh_get_int64(kh_int64_t*, int64_t)
     inline void kh_resize_int64(kh_int64_t*, khint_t)
     inline khint_t kh_put_int64(kh_int64_t*, int64_t, int*)
-    inline void kh_del_int64(kh_int64_t*, khint_t)
 
     bint kh_exist_int64(kh_int64_t*, khiter_t)
 
@@ -84,7 +80,6 @@ cdef extern from "khash_python.h":
     inline khint_t kh_get_float64(kh_float64_t*, float64_t)
     inline void kh_resize_float64(kh_float64_t*, khint_t)
     inline khint_t kh_put_float64(kh_float64_t*, float64_t, int*)
-    inline void kh_del_float64(kh_float64_t*, khint_t)
 
     bint kh_exist_float64(kh_float64_t*, khiter_t)
 
@@ -100,7 +95,6 @@ cdef extern from "khash_python.h":
     inline khint_t kh_get_int32(kh_int32_t*, int32_t)
     inline void kh_resize_int32(kh_int32_t*, khint_t)
     inline khint_t kh_put_int32(kh_int32_t*, int32_t, int*)
-    inline void kh_del_int32(kh_int32_t*, khint_t)
 
     bint kh_exist_int32(kh_int32_t*, khiter_t)
 
@@ -118,7 +112,24 @@ cdef extern from "khash_python.h":
     inline khint_t kh_get_strbox(kh_strbox_t*, kh_cstr_t)
     inline void kh_resize_strbox(kh_strbox_t*, khint_t)
     inline khint_t kh_put_strbox(kh_strbox_t*, kh_cstr_t, int*)
-    inline void kh_del_strbox(kh_strbox_t*, khint_t)
 
     bint kh_exist_strbox(kh_strbox_t*, khiter_t)
 
+    ctypedef struct cbuf_t:
+        kh_cstr_t buf
+        Py_ssize_t len
+
+    ctypedef struct kh_cbuf_map_t:
+        khint_t n_buckets, size, n_occupied, upper_bound
+        uint32_t *flags
+        cbuf_t *keys
+        size_t *vals
+
+    inline kh_cbuf_map_t* kh_init_cbuf_map()
+    inline void kh_destroy_cbuf_map(kh_cbuf_map_t*)
+    inline void kh_clear_cbuf_map(kh_cbuf_map_t*)
+    inline khint_t kh_get_cbuf_map(kh_cbuf_map_t*, cbuf_t)
+    inline void kh_resize_cbuf_map(kh_cbuf_map_t*, khint_t)
+    inline khint_t kh_put_cbuf_map(kh_cbuf_map_t*, cbuf_t, int*)
+
+    bint kh_exist_cbuf_map(kh_cbuf_map_t*, khiter_t)
diff --git a/pandas/src/klib/khash.h b/pandas/src/klib/khash.h
index 4350ff06f37f0..22eb46063cf60 100644
--- a/pandas/src/klib/khash.h
+++ b/pandas/src/klib/khash.h
@@ -47,6 +47,23 @@ int main() {
 */
 
 /*
+  2013-05-02 (0.2.8):
+
+	* Use quadratic probing. When the capacity is power of 2, stepping function
+	  i*(i+1)/2 guarantees to traverse each bucket. It is better than double
+	  hashing on cache performance and is more robust than linear probing.
+
+	  In theory, double hashing should be more robust than quadratic probing.
+	  However, my implementation is probably not for large hash tables, because
+	  the second hash function is closely tied to the first hash function,
+	  which reduce the effectiveness of double hashing.
+
+	Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
+
+  2011-12-29 (0.2.7):
+
+    * Minor code clean up; no actual effect.
+
   2011-09-16 (0.2.6):
 
 	* The capacity is a power of 2. This seems to dramatically improve the
@@ -107,12 +124,13 @@ int main() {
   Generic hash table library.
  */
 
-#define AC_VERSION_KHASH_H "0.2.6"
+#define AC_VERSION_KHASH_H "0.2.8"
 
 #include <stdlib.h>
 #include <string.h>
 #include <limits.h>
 
+/* compiler specific configuration */
 
 #if UINT_MAX == 0xffffffffu
 typedef unsigned int khint32_t;
@@ -121,26 +139,20 @@ typedef unsigned long khint32_t;
 #endif
 
 #if ULONG_MAX == ULLONG_MAX
-typedef unsigned long khuint64_t;
-typedef signed long khint64_t;
+typedef unsigned long khint64_t;
 #else
-typedef unsigned long long khuint64_t;
-typedef signed long long khint64_t;
+typedef unsigned long long khint64_t;
 #endif
 
 typedef double khfloat64_t;
 
-#ifndef PANDAS_INLINE
-  #if defined(__GNUC__)
-    #define PANDAS_INLINE __inline__
-  #elif defined(_MSC_VER)
-    #define PANDAS_INLINE __inline
-  #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
-    #define PANDAS_INLINE inline
-  #else
-    #define PANDAS_INLINE
-  #endif
+#ifndef kh_inline
+#ifdef _MSC_VER
+#define kh_inline __inline
+#else
+#define kh_inline inline
 #endif
+#endif /* kh_inline */
 
 typedef khint32_t khint_t;
 typedef khint_t khiter_t;
@@ -154,11 +166,6 @@ typedef khint_t khiter_t;
 #define __ac_set_isboth_false(flag, i) __ac_set_isempty_false(flag, i)
 #define __ac_set_isdel_true(flag, i) (0)
 
-#ifdef KHASH_LINEAR
-#define __ac_inc(k, m) 1
-#else
-#define __ac_inc(k, m) (((k)>>3 ^ (k)<<3) | 1) & (m)
-#endif
 
 #define __ac_fsize(m) ((m) < 32? 1 : (m)>>5)
 
@@ -166,39 +173,47 @@ typedef khint_t khiter_t;
 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
 #endif
 
+#ifndef kcalloc
+#define kcalloc(N,Z) calloc(N,Z)
+#endif
+#ifndef kmalloc
+#define kmalloc(Z) malloc(Z)
+#endif
+#ifndef krealloc
+#define krealloc(P,Z) realloc(P,Z)
+#endif
+#ifndef kfree
+#define kfree(P) free(P)
+#endif
+
 static const double __ac_HASH_UPPER = 0.77;
 
-#define KHASH_DECLARE(name, khkey_t, khval_t)		 					\
-	typedef struct {													\
-		khint_t n_buckets, size, n_occupied, upper_bound;				\
-		khint32_t *flags;												\
-		khkey_t *keys;													\
-		khval_t *vals;													\
-	} kh_##name##_t;													\
-	extern kh_##name##_t *kh_init_##name();								\
+#define __KHASH_TYPE(name, khkey_t, khval_t) \
+	typedef struct { \
+		khint_t n_buckets, size, n_occupied, upper_bound; \
+		khint32_t *flags; \
+		khkey_t *keys; \
+		khval_t *vals; \
+	} kh_##name##_t;
+
+#define __KHASH_PROTOTYPES(name, khkey_t, khval_t)	 					\
+	extern kh_##name##_t *kh_init_##name(void);							\
 	extern void kh_destroy_##name(kh_##name##_t *h);					\
 	extern void kh_clear_##name(kh_##name##_t *h);						\
 	extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); 	\
-	extern void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
-	extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
-	extern void kh_del_##name(kh_##name##_t *h, khint_t x);
+	extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
+	extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret);
 
-#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
-	typedef struct {													\
-		khint_t n_buckets, size, n_occupied, upper_bound;				\
-		khint32_t *flags;												\
-		khkey_t *keys;													\
-		khval_t *vals;													\
-	} kh_##name##_t;													\
+#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
 	SCOPE kh_##name##_t *kh_init_##name(void) {								\
-		return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t));		\
+		return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t));		\
 	}																	\
 	SCOPE void kh_destroy_##name(kh_##name##_t *h)						\
 	{																	\
 		if (h) {														\
-			free(h->keys); free(h->flags);								\
-			free(h->vals);												\
-			free(h);													\
+			kfree((void *)h->keys); kfree(h->flags);					\
+			kfree((void *)h->vals);										\
+			kfree(h);													\
 		}																\
 	}																	\
 	SCOPE void kh_clear_##name(kh_##name##_t *h)						\
@@ -211,19 +226,19 @@ static const double __ac_HASH_UPPER = 0.77;
 	SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) 	\
 	{																	\
 		if (h->n_buckets) {												\
-			khint_t inc, k, i, last, mask;								\
+			khint_t k, i, last, mask, step = 0; \
 			mask = h->n_buckets - 1;									\
 			k = __hash_func(key); i = k & mask;							\
-			inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \
+			last = i; \
 			while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
-				i = (i + inc) & mask; 									\
+				i = (i + (++step)) & mask; \
 				if (i == last) return h->n_buckets;						\
 			}															\
 			return __ac_iseither(h->flags, i)? h->n_buckets : i;		\
 		} else return 0;												\
 	}																	\
-	SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
-	{ /* This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
+	SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
+	{ /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
 		khint32_t *new_flags = 0;										\
 		khint_t j = 1;													\
 		{																\
@@ -231,11 +246,18 @@ static const double __ac_HASH_UPPER = 0.77;
 			if (new_n_buckets < 4) new_n_buckets = 4;					\
 			if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0;	/* requested size is too small */ \
 			else { /* hash table size to be changed (shrink or expand); rehash */ \
-				new_flags = (khint32_t*)malloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t));	\
+				new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t));	\
+				if (!new_flags) return -1;								\
 				memset(new_flags, 0xff, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
 				if (h->n_buckets < new_n_buckets) {	/* expand */		\
-					h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
-					if (kh_is_map) h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
+					khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
+					if (!new_keys) { kfree(new_flags); return -1; }		\
+					h->keys = new_keys;									\
+					if (kh_is_map) {									\
+						khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
+						if (!new_vals) { kfree(new_flags); return -1; }	\
+						h->vals = new_vals;								\
+					}													\
 				} /* otherwise shrink */								\
 			}															\
 		}																\
@@ -249,11 +271,10 @@ static const double __ac_HASH_UPPER = 0.77;
 					if (kh_is_map) val = h->vals[j];					\
 					__ac_set_isempty_true(h->flags, j);					\
 					while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
-						khint_t inc, k, i;								\
+						khint_t k, i, step = 0; \
 						k = __hash_func(key);							\
 						i = k & new_mask;								\
-						inc = __ac_inc(k, new_mask);					\
-						while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \
+						while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \
 						__ac_set_isempty_false(new_flags, i);			\
 						if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
 							{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
@@ -268,32 +289,38 @@ static const double __ac_HASH_UPPER = 0.77;
 				}														\
 			}															\
 			if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
-				h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
-				if (kh_is_map) h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
+				h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
+				if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
 			}															\
-			free(h->flags); /* free the working space */				\
+			kfree(h->flags); /* free the working space */				\
 			h->flags = new_flags;										\
 			h->n_buckets = new_n_buckets;								\
 			h->n_occupied = h->size;									\
 			h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
 		}																\
+		return 0;														\
 	}																	\
 	SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
 	{																	\
 		khint_t x;														\
 		if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
-			if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); /* clear "deleted" elements */ \
-			else kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \
+			if (h->n_buckets > (h->size<<1)) {							\
+				if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \
+					*ret = -1; return h->n_buckets;						\
+				}														\
+			} else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \
+				*ret = -1; return h->n_buckets;							\
+			}															\
 		} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
 		{																\
-			khint_t inc, k, i, site, last, mask = h->n_buckets - 1;		\
+			khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \
 			x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
 			if (__ac_isempty(h->flags, i)) x = i; /* for speed up */	\
 			else {														\
-				inc = __ac_inc(k, mask); last = i;						\
+				last = i; \
 				while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
 					if (__ac_isdel(h->flags, i)) site = i;				\
-					i = (i + inc) & mask; 								\
+					i = (i + (++step)) & mask; \
 					if (i == last) { x = site; break; }					\
 				}														\
 				if (x == h->n_buckets) {								\
@@ -314,17 +341,18 @@ static const double __ac_HASH_UPPER = 0.77;
 			*ret = 2;													\
 		} else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
 		return x;														\
-	}																	\
-	SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x)				\
-	{																	\
-		if (x != h->n_buckets && !__ac_iseither(h->flags, x)) {			\
-			__ac_set_isdel_true(h->flags, x);							\
-			--h->size;													\
-		}																\
 	}
 
+#define KHASH_DECLARE(name, khkey_t, khval_t)		 					\
+	__KHASH_TYPE(name, khkey_t, khval_t) 								\
+	__KHASH_PROTOTYPES(name, khkey_t, khval_t)
+
+#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
+	__KHASH_TYPE(name, khkey_t, khval_t) 								\
+	__KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
+
 #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
-	KHASH_INIT2(name, static PANDAS_INLINE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
+	KHASH_INIT2(name, static kh_inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
 
 /* --- BEGIN OF HASH FUNCTIONS --- */
 
@@ -354,10 +382,10 @@ static const double __ac_HASH_UPPER = 0.77;
   @param  s     Pointer to a null terminated string
   @return       The hash value
  */
-static PANDAS_INLINE khint_t __ac_X31_hash_string(const char *s)
+static kh_inline khint_t __ac_X31_hash_string(const char *s)
 {
-	khint_t h = *s;
-	if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
+	khint_t h = (khint_t)*s;
+	if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s;
 	return h;
 }
 /*! @function
@@ -371,7 +399,7 @@ static PANDAS_INLINE khint_t __ac_X31_hash_string(const char *s)
  */
 #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
 
-static PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key)
+static kh_inline khint_t __ac_Wang_hash(khint_t key)
 {
     key += ~(key << 15);
     key ^=  (key >> 10);
@@ -427,7 +455,8 @@ static PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key)
   @param  name  Name of the hash table [symbol]
   @param  h     Pointer to the hash table [khash_t(name)*]
   @param  k     Key [type of keys]
-  @param  r     Extra return code: 0 if the key is present in the hash table;
+  @param  r     Extra return code: -1 if the operation failed;
+                0 if the key is present in the hash table;
                 1 if the bucket is empty (never used); 2 if the element in
 				the bucket has been deleted [int*]
   @return       Iterator to the inserted element [khint_t]
@@ -439,18 +468,10 @@ static PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key)
   @param  name  Name of the hash table [symbol]
   @param  h     Pointer to the hash table [khash_t(name)*]
   @param  k     Key [type of keys]
-  @return       Iterator to the found element, or kh_end(h) is the element is absent [khint_t]
+  @return       Iterator to the found element, or kh_end(h) if the element is absent [khint_t]
  */
 #define kh_get(name, h, k) kh_get_##name(h, k)
 
-/*! @function
-  @abstract     Remove a key from the hash table.
-  @param  name  Name of the hash table [symbol]
-  @param  h     Pointer to the hash table [khash_t(name)*]
-  @param  k     Iterator to the element to be deleted [khint_t]
- */
-#define kh_del(name, h, k) kh_del_##name(h, k)
-
 /*! @function
   @abstract     Test whether a bucket contains data.
   @param  h     Pointer to the hash table [khash_t(name)*]
@@ -509,6 +530,34 @@ static PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key)
  */
 #define kh_n_buckets(h) ((h)->n_buckets)
 
+/*! @function
+  @abstract     Iterate over the entries in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  kvar  Variable to which key will be assigned
+  @param  vvar  Variable to which value will be assigned
+  @param  code  Block of code to execute
+ */
+#define kh_foreach(h, kvar, vvar, code) { khint_t __i;		\
+	for (__i = kh_begin(h); __i != kh_end(h); ++__i) {		\
+		if (!kh_exist(h,__i)) continue;						\
+		(kvar) = kh_key(h,__i);								\
+		(vvar) = kh_val(h,__i);								\
+		code;												\
+	} }
+
+/*! @function
+  @abstract     Iterate over the values in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  vvar  Variable to which value will be assigned
+  @param  code  Block of code to execute
+ */
+#define kh_foreach_value(h, vvar, code) { khint_t __i;		\
+	for (__i = kh_begin(h); __i != kh_end(h); ++__i) {		\
+		if (!kh_exist(h,__i)) continue;						\
+		(vvar) = kh_val(h,__i);								\
+		code;												\
+	} }
+
 /* More conenient interfaces */
 
 /*! @function
@@ -530,9 +579,6 @@ static PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key)
   @abstract     Instantiate a hash map containing 64-bit integer keys
   @param  name  Name of the hash table [symbol]
  */
-#define KHASH_SET_INIT_UINT64(name)										\
-	KHASH_INIT(name, khuint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
-
 #define KHASH_SET_INIT_INT64(name)										\
 	KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
 
@@ -541,13 +587,9 @@ static PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key)
   @param  name  Name of the hash table [symbol]
   @param  khval_t  Type of values [type]
  */
-#define KHASH_MAP_INIT_UINT64(name, khval_t)								\
-	KHASH_INIT(name, khuint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
-
 #define KHASH_MAP_INIT_INT64(name, khval_t)								\
 	KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
 
-
 typedef const char *kh_cstr_t;
 /*! @function
   @abstract     Instantiate a hash map containing const char* keys
@@ -565,14 +607,4 @@ typedef const char *kh_cstr_t;
 	KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
 
 
-#define kh_exist_str(h, k) (kh_exist(h, k))
-#define kh_exist_float64(h, k) (kh_exist(h, k))
-#define kh_exist_int64(h, k) (kh_exist(h, k))
-#define kh_exist_int32(h, k) (kh_exist(h, k))
-
-KHASH_MAP_INIT_STR(str, size_t)
-KHASH_MAP_INIT_INT(int32, size_t)
-KHASH_MAP_INIT_INT64(int64, size_t)
-
-
 #endif /* __AC_KHASH_H */
diff --git a/pandas/src/klib/khash_python.h b/pandas/src/klib/khash_python.h
index d3ef48de0f831..e196e9b23434f 100644
--- a/pandas/src/klib/khash_python.h
+++ b/pandas/src/klib/khash_python.h
@@ -1,7 +1,126 @@
+#ifndef _KLIB_KHASH_PYTHON_H_
+#define _KLIB_KHASH_PYTHON_H_
+
 #include <Python.h>
 
+#ifndef PANDAS_INLINE
+  #if defined(__GNUC__)
+    #define PANDAS_INLINE __inline__
+  #elif defined(_MSC_VER)
+    #define PANDAS_INLINE __inline
+  #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+    #define PANDAS_INLINE inline
+  #else
+    #define PANDAS_INLINE
+  #endif
+#endif
+
+#define kh_inline PANDAS_INLINE
 #include "khash.h"
 
+#define kh_exist_str(h, k) (kh_exist(h, k))
+#define kh_exist_float64(h, k) (kh_exist(h, k))
+#define kh_exist_int64(h, k) (kh_exist(h, k))
+#define kh_exist_int32(h, k) (kh_exist(h, k))
+
+#include "xxhash/xxhash.h"
+
+/*
+ * By default khash uses crappy x31 hash function which puts strings that
+ * differ only in the last character into neighbouring buckets which is not
+ * good given that quadratic probing tries small steps first.
+ *
+ * xxhash gives better bucket distribution and performance-wise is great for
+ * long-ish strings, but it is a bit slower than x31 on the shortest ones
+ * (turns out at length == 2 the difference is already negligible).
+ *
+ * Inlining will hinder merging in upstream releases, but 1-character strings
+ * are a valid use case for pandas, so let's pre-calculate a vector of 256
+ * values to avoid calling two functions (strlen and XXH32) if there's only one
+ * character to hash.
+ *
+ * This table was generated with the following code.  Feel free to re-run it if
+ * an update comes in:
+
+#include <stdio.h>
+#include "xxhash.h"
+
+int main(int argc, char *argv[])
+{
+  printf("static khint_t XXH32_EMPTY_HASH = 0x%08x;\n",
+         XXH32("", 0, 0xdeadbeef));
+  printf("static khint_t XXH32_ONECHAR_HASH[256] = {");
+  unsigned char s[2] = {0};
+  for (int i = 0; i < 256; ++i) {
+    if (i % 8 == 0) {
+      printf("\n    ");
+    }
+    s[0] = i;
+    printf("0x%08x", XXH32(s, 1, 0xdeadbeef));
+    if (i < 255) {
+      printf(", ");
+    }
+  }
+  printf("\n};\n");
+  return 0;
+}
+*/
+
+static khint_t XXH32_EMPTY_HASH = 0xc372c6cb;
+static khint_t XXH32_ONECHAR_HASH[256] = {
+    0x39110451, 0xd3efa134, 0xea8d6dc4, 0xe59a066b, 0x89f3a4f5, 0xdcce5bc9, 0x44be0c3e, 0x96469248, 
+    0x7885ddeb, 0x24417b24, 0xb77b30b2, 0xa83d21eb, 0x6f6ba52b, 0x7315bbe5, 0xce858701, 0x52299f26, 
+    0x440ec810, 0xd02a934f, 0xf873d394, 0xd168a8e1, 0x31c30198, 0x37c3967b, 0xc1bdbdf8, 0x3ddaf3cc, 
+    0xb7222f4a, 0x96625cdf, 0xabf92a2f, 0x69e97975, 0x55f24523, 0x6b1abaa0, 0xe5b033ab, 0x9e21842c, 
+    0x3ac2a339, 0x827b0af2, 0xd7ea0f97, 0x72317ee6, 0xe6bd4439, 0xb0b183f1, 0xca90e5e0, 0x57960753, 
+    0x6eefe374, 0xb9c9c5b5, 0x57396d1f, 0x6db79351, 0xab55c12d, 0x32229df4, 0xbfa3a164, 0x58f9f4ba, 
+    0x5987c643, 0xffbfa961, 0x1080d4eb, 0xc5c3d846, 0x16a7fd8e, 0xed29fd3a, 0x8d78613d, 0xd088b720, 
+    0x8d597f4c, 0x2df1ce8f, 0x79bc5215, 0x749d67c1, 0xa9ad300c, 0x60c6237d, 0xeeb080e7, 0xb74eef62, 
+    0x6ddba2f2, 0x3d9f18cf, 0x0b6ad1bd, 0xc7a33d19, 0x3cb6352f, 0x872839f9, 0x259ced1e, 0x0f9d713b, 
+    0x6816620f, 0x8d2c96a7, 0x377fb2f9, 0x2616b5b5, 0x9bae3a05, 0x8368a004, 0x3a67fd94, 0x312529c4, 
+    0xc9238f87, 0x3e85e142, 0x973dedc6, 0xcbc3d4ba, 0xd2629b58, 0x2aae9a6d, 0x82ffc598, 0x4a8512b3, 
+    0x51146ceb, 0x85ddc3f4, 0xa83b942f, 0x55769a32, 0xf7fa3fdf, 0xfbe35842, 0x342ff574, 0x848400a6, 
+    0x92707153, 0x48cd58fd, 0xbdae4a11, 0x701bbadb, 0x4a5b37c4, 0x98770eeb, 0xfc1b98fc, 0x05dd6894, 
+    0xd3ba005c, 0x453bc774, 0xfe186d14, 0xa25acde2, 0xcc738313, 0x1dbdefa7, 0x83ed6f1e, 0xf9d8e195, 
+    0x5f10c546, 0xf22c5a0f, 0x31da5f5e, 0x5341c163, 0xabd3f750, 0x882e33d8, 0x4d8105cd, 0xc1f6f3d9, 
+    0x347e1d5c, 0xdb06193c, 0x64841a53, 0x3991a6e6, 0x0abdd625, 0xedcf00f7, 0xa8e64229, 0x2fc9029b, 
+    0x4fc5ca41, 0x1f5aaae5, 0x29bdda91, 0x55446dae, 0x1566ec40, 0x9ac8391e, 0xcd4d6ab1, 0x0f3807f6, 
+    0xf3be6887, 0x9f4b88bd, 0x33c401df, 0xaa9df64f, 0xce5c70ac, 0x9ee55a87, 0x4cb91c84, 0x8c322b3d, 
+    0x8e40fb24, 0x3af430fb, 0xeea567c2, 0xe80c7dc2, 0x6f619449, 0xe0ca8048, 0x984c626e, 0x50bf1281, 
+    0x4895cbee, 0x5d016a96, 0xe58b8980, 0x3457ef7c, 0x2a24f819, 0x0641cc30, 0xbddc5f84, 0x03ce4656, 
+    0xbcb73c9c, 0xcd29be82, 0x0930d945, 0xf3fc8e3c, 0xbed775cd, 0xd6668fae, 0x6876f949, 0xcf34fbd7, 
+    0x0537d916, 0x7efd5f26, 0xb2d32520, 0x10d58995, 0x19d64e1c, 0xacae767c, 0xf23a4e7d, 0xdcb654fe, 
+    0xe1ec9a9f, 0x3061302b, 0x453a0b7c, 0xe845436e, 0xb2b690df, 0x245c17b5, 0x756a9374, 0x470998f5, 
+    0xe31a5f5b, 0x60dbad02, 0xf738299d, 0x0db8b11a, 0xd34cb801, 0xb2f3597d, 0xa627e466, 0xda4f9935, 
+    0x5c58e1df, 0x4b5319d6, 0x48acc08f, 0xce18d68e, 0xeb995e7f, 0x11a07cba, 0x025127b2, 0xd1325331, 
+    0x55d76240, 0x281bba14, 0xb9ac069d, 0x25e60bcc, 0xf077fbd3, 0xe460ece9, 0x725a9971, 0xa6b5c6b4, 
+    0xe5f216a3, 0xbee80d71, 0x1a049114, 0x851012d4, 0xa6e175cc, 0x6ec98c95, 0x56a77202, 0x7e2ab05f, 
+    0x4850279c, 0x1b009afe, 0xf71e36b6, 0x9cadc37a, 0x43a167da, 0x5d75b5f3, 0xc432215c, 0x93ff1905, 
+    0x8764d057, 0xf44cd35d, 0x03d3a324, 0xd65a5047, 0xe872b4d8, 0x8dcb9a23, 0xfebf9113, 0x59701be9, 
+    0xdf9f6090, 0xce9b2907, 0x664c6a5a, 0x81bfefc4, 0x13829979, 0xda98b6ab, 0x7b7e9ff0, 0x13c24005, 
+    0xcee61b6b, 0x15737a85, 0xe2f95e48, 0xf2136570, 0xd1ccfdab, 0xa9adfb16, 0x1f7339a9, 0x83247f43, 
+    0x68c6c8bf, 0x5046f6fc, 0x2d3dea84, 0x79a0be74, 0x39dd7eb3, 0x4d5cc636, 0xe4e1352d, 0xd1317a99
+};
+
+/* Seed value is chosen arbitrarily. */
+static khint_t XXH32_SEED = 0xdeadbeef;
+
+static khint_t PANDAS_INLINE str_xxhash_hash_func(kh_cstr_t key) {
+    if (!key[0]) {
+        return XXH32_EMPTY_HASH;
+    }
+    if (!key[1]) {
+        return XXH32_ONECHAR_HASH[(uint8_t)key[0]];
+    }
+    return XXH32(key, strlen(key), XXH32_SEED);
+}
+
+KHASH_INIT(str, kh_cstr_t, size_t, 1,
+           str_xxhash_hash_func, kh_str_hash_equal)
+
+KHASH_MAP_INIT_INT(int32, size_t)
+KHASH_MAP_INIT_INT64(int64, size_t)
+
 // kludge
 
 #define kh_float64_hash_func _Py_HashDouble
@@ -13,7 +132,7 @@
 KHASH_MAP_INIT_FLOAT64(float64, size_t)
 
 
-int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) {
+static int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) {
 	int result = PyObject_RichCompareBool(a, b, Py_EQ);
 	if (result < 0) {
 		PyErr_Clear();
@@ -46,4 +165,44 @@ KHASH_SET_INIT_PYOBJECT(pyset)
 #define kh_exist_pymap(h, k) (kh_exist(h, k))
 #define kh_exist_pyset(h, k) (kh_exist(h, k))
 
-KHASH_MAP_INIT_STR(strbox, kh_pyobject_t)
+KHASH_INIT(strbox, kh_cstr_t, kh_pyobject_t, 1,
+           str_xxhash_hash_func, kh_str_hash_equal)
+
+/* Plain old C buffer structure */
+typedef struct {
+    kh_cstr_t buf;
+    Py_ssize_t len;
+} cbuf_t;
+
+static khint_t PANDAS_INLINE cbuf_xxhash(cbuf_t val) {
+    switch (val.len) {
+    case 0:
+        return XXH32_EMPTY_HASH;
+    case 1:
+        return XXH32_ONECHAR_HASH[(uint8_t)val.buf[0]];
+    default:
+        return XXH32(val.buf, val.len, XXH32_SEED);
+    }
+}
+
+static int PANDAS_INLINE cbuf_equal(cbuf_t a, cbuf_t b) {
+    int i;
+    if (a.len != b.len) {
+        return 0;
+    }
+    if (a.buf == b.buf) {
+        return 1;
+    }
+    for (i = 0; i < a.len; ++i) {
+        if (a.buf[i] != b.buf[i]) {
+            return 0;
+        }
+    }
+    return 1;
+}
+
+/* [cbuf_t -> size_t] hash map */
+KHASH_INIT(cbuf_map, cbuf_t, size_t, 1, cbuf_xxhash, cbuf_equal)
+#define kh_exist_cbuf_map(h, k) (kh_exist(h, k))
+
+#endif /* _KLIB_KHASH_PYTHON_H_ */
diff --git a/pandas/src/parser/tokenizer.h b/pandas/src/parser/tokenizer.h
index 0947315fbe6b7..4020dd24c87e8 100644
--- a/pandas/src/parser/tokenizer.h
+++ b/pandas/src/parser/tokenizer.h
@@ -33,7 +33,7 @@ See LICENSE for the license
 #include <stdint.h>
 #endif
 
-#include "khash.h"
+#include "khash_python.h"
 
 #define CHUNKSIZE 1024*256
 #define KB 1024
diff --git a/pandas/src/xxhash/LICENSE b/pandas/src/xxhash/LICENSE
new file mode 100644
index 0000000000000..7de801ed1bc78
--- /dev/null
+++ b/pandas/src/xxhash/LICENSE
@@ -0,0 +1,24 @@
+xxHash Library
+Copyright (c) 2012-2014, Yann Collet
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/pandas/src/xxhash/xxhash.c b/pandas/src/xxhash/xxhash.c
new file mode 100644
index 0000000000000..529d69ccc4caa
--- /dev/null
+++ b/pandas/src/xxhash/xxhash.c
@@ -0,0 +1,934 @@
+/*
+xxHash - Fast Hash algorithm
+Copyright (C) 2012-2014, Yann Collet.
+BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+You can contact the author at :
+- xxHash source repository : http://code.google.com/p/xxhash/
+- public discussion board : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+
+//**************************************
+// Tuning parameters
+//**************************************
+// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
+// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected.
+// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance.
+// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32).
+#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#  define XXH_USE_UNALIGNED_ACCESS 1
+#endif
+
+// XXH_ACCEPT_NULL_INPUT_POINTER :
+// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
+// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
+// This option has a very small performance cost (only measurable on small inputs).
+// By default, this option is disabled. To enable it, uncomment below define :
+// #define XXH_ACCEPT_NULL_INPUT_POINTER 1
+
+// XXH_FORCE_NATIVE_FORMAT :
+// By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
+// Results are therefore identical for little-endian and big-endian CPU.
+// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
+// Should endian-independance be of no importance for your application, you may set the #define below to 1.
+// It will improve speed for Big-endian CPU.
+// This option has no impact on Little_Endian CPU.
+#define XXH_FORCE_NATIVE_FORMAT 0
+
+//**************************************
+// Compiler Specific Options
+//**************************************
+// Disable some Visual warning messages
+#ifdef _MSC_VER  // Visual Studio
+#  pragma warning(disable : 4127)      // disable: C4127: conditional expression is constant
+#endif
+
+#ifdef _MSC_VER    // Visual Studio
+#  define FORCE_INLINE static __forceinline
+#else
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+//**************************************
+// Includes & Memory related functions
+//**************************************
+#include "xxhash.h"
+// Modify the local functions below should you wish to use some other memory routines
+// for malloc(), free()
+#include <stdlib.h>
+FORCE_INLINE void* XXH_malloc(size_t s)
+{
+    return malloc(s);
+}
+FORCE_INLINE void  XXH_free  (void* p)
+{
+    free(p);
+}
+// for memcpy()
+#include <string.h>
+FORCE_INLINE void* XXH_memcpy(void* dest, const void* src, size_t size)
+{
+    return memcpy(dest,src,size);
+}
+
+
+//**************************************
+// Basic Types
+//**************************************
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
+# include <stdint.h>
+typedef uint8_t  BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+#else
+typedef unsigned char      BYTE;
+typedef unsigned short     U16;
+typedef unsigned int       U32;
+typedef   signed int       S32;
+typedef unsigned long long U64;
+#endif
+
+#if defined(__GNUC__)  && !defined(XXH_USE_UNALIGNED_ACCESS)
+#  define _PACKED __attribute__ ((packed))
+#else
+#  define _PACKED
+#endif
+
+#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
+#  ifdef __IBMC__
+#    pragma pack(1)
+#  else
+#    pragma pack(push, 1)
+#  endif
+#endif
+
+typedef struct _U32_S
+{
+    U32 v;
+} _PACKED U32_S;
+typedef struct _U64_S
+{
+    U64 v;
+} _PACKED U64_S;
+
+#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
+#  pragma pack(pop)
+#endif
+
+#define A32(x) (((U32_S *)(x))->v)
+#define A64(x) (((U64_S *)(x))->v)
+
+
+//***************************************
+// Compiler-specific Functions and Macros
+//***************************************
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+// Note : although _rotl exists for minGW (GCC under windows), performance seems poor
+#if defined(_MSC_VER)
+#  define XXH_rotl32(x,r) _rotl(x,r)
+#  define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+#  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
+#endif
+
+#if defined(_MSC_VER)     // Visual Studio
+#  define XXH_swap32 _byteswap_ulong
+#  define XXH_swap64 _byteswap_uint64
+#elif GCC_VERSION >= 403
+#  define XXH_swap32 __builtin_bswap32
+#  define XXH_swap64 __builtin_bswap64
+#else
+static inline U32 XXH_swap32 (U32 x)
+{
+    return  ((x << 24) & 0xff000000 ) |
+            ((x <<  8) & 0x00ff0000 ) |
+            ((x >>  8) & 0x0000ff00 ) |
+            ((x >> 24) & 0x000000ff );
+}
+static inline U64 XXH_swap64 (U64 x)
+{
+    return  ((x << 56) & 0xff00000000000000ULL) |
+            ((x << 40) & 0x00ff000000000000ULL) |
+            ((x << 24) & 0x0000ff0000000000ULL) |
+            ((x << 8)  & 0x000000ff00000000ULL) |
+            ((x >> 8)  & 0x00000000ff000000ULL) |
+            ((x >> 24) & 0x0000000000ff0000ULL) |
+            ((x >> 40) & 0x000000000000ff00ULL) |
+            ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+
+//**************************************
+// Constants
+//**************************************
+#define PRIME32_1   2654435761U
+#define PRIME32_2   2246822519U
+#define PRIME32_3   3266489917U
+#define PRIME32_4    668265263U
+#define PRIME32_5    374761393U
+
+#define PRIME64_1 11400714785074694791ULL
+#define PRIME64_2 14029467366897019727ULL
+#define PRIME64_3  1609587929392839161ULL
+#define PRIME64_4  9650029242287828579ULL
+#define PRIME64_5  2870177450012600261ULL
+
+//**************************************
+// Architecture Macros
+//**************************************
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+#ifndef XXH_CPU_LITTLE_ENDIAN   // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch
+static const int one = 1;
+#   define XXH_CPU_LITTLE_ENDIAN   (*(char*)(&one))
+#endif
+
+
+//**************************************
+// Macros
+//**************************************
+#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(!!(c)) }; }    // use only *after* variable declarations
+
+
+//****************************
+// Memory reads
+//****************************
+typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+FORCE_INLINE U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr));
+    else
+        return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr);
+}
+
+FORCE_INLINE U32 XXH_readLE32(const U32* ptr, XXH_endianess endian)
+{
+    return XXH_readLE32_align(ptr, endian, XXH_unaligned);
+}
+
+FORCE_INLINE U64 XXH_readLE64_align(const U64* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? A64(ptr) : XXH_swap64(A64(ptr));
+    else
+        return endian==XXH_littleEndian ? *ptr : XXH_swap64(*ptr);
+}
+
+FORCE_INLINE U64 XXH_readLE64(const U64* ptr, XXH_endianess endian)
+{
+    return XXH_readLE64_align(ptr, endian, XXH_unaligned);
+}
+
+
+//****************************
+// Simple Hash Functions
+//****************************
+FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U32 h32;
+#define XXH_get32bits(p) XXH_readLE32_align((const U32*)p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL)
+    {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)16;
+    }
+#endif
+
+    if (len>=16)
+    {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = seed + PRIME32_1 + PRIME32_2;
+        U32 v2 = seed + PRIME32_2;
+        U32 v3 = seed + 0;
+        U32 v4 = seed - PRIME32_1;
+
+        do
+        {
+            v1 += XXH_get32bits(p) * PRIME32_2;
+            v1 = XXH_rotl32(v1, 13);
+            v1 *= PRIME32_1;
+            p+=4;
+            v2 += XXH_get32bits(p) * PRIME32_2;
+            v2 = XXH_rotl32(v2, 13);
+            v2 *= PRIME32_1;
+            p+=4;
+            v3 += XXH_get32bits(p) * PRIME32_2;
+            v3 = XXH_rotl32(v3, 13);
+            v3 *= PRIME32_1;
+            p+=4;
+            v4 += XXH_get32bits(p) * PRIME32_2;
+            v4 = XXH_rotl32(v4, 13);
+            v4 *= PRIME32_1;
+            p+=4;
+        }
+        while (p<=limit);
+
+        h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+    }
+    else
+    {
+        h32  = seed + PRIME32_5;
+    }
+
+    h32 += (U32) len;
+
+    while (p+4<=bEnd)
+    {
+        h32 += XXH_get32bits(p) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+unsigned int XXH32 (const void* input, size_t len, unsigned seed)
+{
+#if 0
+    // Simple version, good for code maintenance, but unfortunately slow for small inputs
+    XXH32_state_t state;
+    XXH32_reset(&state, seed);
+    XXH32_update(&state, input, len);
+    return XXH32_digest(&state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+#  if !defined(XXH_USE_UNALIGNED_ACCESS)
+    if ((((size_t)input) & 3) == 0)   // Input is aligned, let's leverage the speed advantage
+    {
+        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+            return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+        else
+            return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }
+#  endif
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U64 h64;
+#define XXH_get64bits(p) XXH_readLE64_align((const U64*)p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL)
+    {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)32;
+    }
+#endif
+
+    if (len>=32)
+    {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = seed + PRIME64_1 + PRIME64_2;
+        U64 v2 = seed + PRIME64_2;
+        U64 v3 = seed + 0;
+        U64 v4 = seed - PRIME64_1;
+
+        do
+        {
+            v1 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v1 = XXH_rotl64(v1, 31);
+            v1 *= PRIME64_1;
+            v2 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v2 = XXH_rotl64(v2, 31);
+            v2 *= PRIME64_1;
+            v3 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v3 = XXH_rotl64(v3, 31);
+            v3 *= PRIME64_1;
+            v4 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v4 = XXH_rotl64(v4, 31);
+            v4 *= PRIME64_1;
+        }
+        while (p<=limit);
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+
+        v1 *= PRIME64_2;
+        v1 = XXH_rotl64(v1, 31);
+        v1 *= PRIME64_1;
+        h64 ^= v1;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v2 *= PRIME64_2;
+        v2 = XXH_rotl64(v2, 31);
+        v2 *= PRIME64_1;
+        h64 ^= v2;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v3 *= PRIME64_2;
+        v3 = XXH_rotl64(v3, 31);
+        v3 *= PRIME64_1;
+        h64 ^= v3;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v4 *= PRIME64_2;
+        v4 = XXH_rotl64(v4, 31);
+        v4 *= PRIME64_1;
+        h64 ^= v4;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+    }
+    else
+    {
+        h64  = seed + PRIME64_5;
+    }
+
+    h64 += (U64) len;
+
+    while (p+8<=bEnd)
+    {
+        U64 k1 = XXH_get64bits(p);
+        k1 *= PRIME64_2;
+        k1 = XXH_rotl64(k1,31);
+        k1 *= PRIME64_1;
+        h64 ^= k1;
+        h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd)
+    {
+        h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
+        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h64 ^= (*p) * PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+{
+#if 0
+    // Simple version, good for code maintenance, but unfortunately slow for small inputs
+    XXH64_state_t state;
+    XXH64_reset(&state, seed);
+    XXH64_update(&state, input, len);
+    return XXH64_digest(&state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+#  if !defined(XXH_USE_UNALIGNED_ACCESS)
+    if ((((size_t)input) & 7)==0)   // Input is aligned, let's leverage the speed advantage
+    {
+        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+            return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+        else
+            return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }
+#  endif
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+/****************************************************
+ *  Advanced Hash Functions
+****************************************************/
+
+/*** Allocation ***/
+typedef struct
+{
+    U64 total_len;
+    U32 seed;
+    U32 v1;
+    U32 v2;
+    U32 v3;
+    U32 v4;
+    U32 memsize;
+    char memory[16];
+} XXH_istate32_t;
+
+typedef struct
+{
+    U64 total_len;
+    U64 seed;
+    U64 v1;
+    U64 v2;
+    U64 v3;
+    U64 v4;
+    U32 memsize;
+    char memory[32];
+} XXH_istate64_t;
+
+
+XXH32_state_t* XXH32_createState(void)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t));   // A compilation error here means XXH32_state_t is not large enough
+    return (XXH32_state_t*)malloc(sizeof(XXH32_state_t));
+}
+XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+    free(statePtr);
+    return XXH_OK;
+};
+
+XXH64_state_t* XXH64_createState(void)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t));   // A compilation error here means XXH64_state_t is not large enough
+    return (XXH64_state_t*)malloc(sizeof(XXH64_state_t));
+}
+XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+    free(statePtr);
+    return XXH_OK;
+};
+
+
+/*** Hash feed ***/
+
+XXH_errorcode XXH32_reset(XXH32_state_t* state_in, U32 seed)
+{
+    XXH_istate32_t* state = (XXH_istate32_t*) state_in;
+    state->seed = seed;
+    state->v1 = seed + PRIME32_1 + PRIME32_2;
+    state->v2 = seed + PRIME32_2;
+    state->v3 = seed + 0;
+    state->v4 = seed - PRIME32_1;
+    state->total_len = 0;
+    state->memsize = 0;
+    return XXH_OK;
+}
+
+XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed)
+{
+    XXH_istate64_t* state = (XXH_istate64_t*) state_in;
+    state->seed = seed;
+    state->v1 = seed + PRIME64_1 + PRIME64_2;
+    state->v2 = seed + PRIME64_2;
+    state->v3 = seed + 0;
+    state->v4 = seed - PRIME64_1;
+    state->total_len = 0;
+    state->memsize = 0;
+    return XXH_OK;
+}
+
+
+FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
+{
+    XXH_istate32_t* state = (XXH_istate32_t *) state_in;
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 16)   // fill in tmp buffer
+    {
+        XXH_memcpy(state->memory + state->memsize, input, len);
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize)   // some data left from previous update
+    {
+        XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize);
+        {
+            const U32* p32 = (const U32*)state->memory;
+            state->v1 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v1 = XXH_rotl32(state->v1, 13);
+            state->v1 *= PRIME32_1;
+            p32++;
+            state->v2 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v2 = XXH_rotl32(state->v2, 13);
+            state->v2 *= PRIME32_1;
+            p32++;
+            state->v3 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v3 = XXH_rotl32(state->v3, 13);
+            state->v3 *= PRIME32_1;
+            p32++;
+            state->v4 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v4 = XXH_rotl32(state->v4, 13);
+            state->v4 *= PRIME32_1;
+            p32++;
+        }
+        p += 16-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p <= bEnd-16)
+    {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = state->v1;
+        U32 v2 = state->v2;
+        U32 v3 = state->v3;
+        U32 v4 = state->v4;
+
+        do
+        {
+            v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2;
+            v1 = XXH_rotl32(v1, 13);
+            v1 *= PRIME32_1;
+            p+=4;
+            v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2;
+            v2 = XXH_rotl32(v2, 13);
+            v2 *= PRIME32_1;
+            p+=4;
+            v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2;
+            v3 = XXH_rotl32(v3, 13);
+            v3 *= PRIME32_1;
+            p+=4;
+            v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2;
+            v4 = XXH_rotl32(v4, 13);
+            v4 *= PRIME32_1;
+            p+=4;
+        }
+        while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd)
+    {
+        XXH_memcpy(state->memory, p, bEnd-p);
+        state->memsize = (int)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian)
+{
+    XXH_istate32_t* state = (XXH_istate32_t*) state_in;
+    const BYTE * p = (const BYTE*)state->memory;
+    BYTE* bEnd = (BYTE*)state->memory + state->memsize;
+    U32 h32;
+
+    if (state->total_len >= 16)
+    {
+        h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
+    }
+    else
+    {
+        h32  = state->seed + PRIME32_5;
+    }
+
+    h32 += (U32) state->total_len;
+
+    while (p+4<=bEnd)
+    {
+        h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+U32 XXH32_digest (const XXH32_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH32_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
+{
+    XXH_istate64_t * state = (XXH_istate64_t *) state_in;
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 32)   // fill in tmp buffer
+    {
+        XXH_memcpy(state->memory + state->memsize, input, len);
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize)   // some data left from previous update
+    {
+        XXH_memcpy(state->memory + state->memsize, input, 32-state->memsize);
+        {
+            const U64* p64 = (const U64*)state->memory;
+            state->v1 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v1 = XXH_rotl64(state->v1, 31);
+            state->v1 *= PRIME64_1;
+            p64++;
+            state->v2 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v2 = XXH_rotl64(state->v2, 31);
+            state->v2 *= PRIME64_1;
+            p64++;
+            state->v3 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v3 = XXH_rotl64(state->v3, 31);
+            state->v3 *= PRIME64_1;
+            p64++;
+            state->v4 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v4 = XXH_rotl64(state->v4, 31);
+            state->v4 *= PRIME64_1;
+            p64++;
+        }
+        p += 32-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p+32 <= bEnd)
+    {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = state->v1;
+        U64 v2 = state->v2;
+        U64 v3 = state->v3;
+        U64 v4 = state->v4;
+
+        do
+        {
+            v1 += XXH_readLE64((const U64*)p, endian) * PRIME64_2;
+            v1 = XXH_rotl64(v1, 31);
+            v1 *= PRIME64_1;
+            p+=8;
+            v2 += XXH_readLE64((const U64*)p, endian) * PRIME64_2;
+            v2 = XXH_rotl64(v2, 31);
+            v2 *= PRIME64_1;
+            p+=8;
+            v3 += XXH_readLE64((const U64*)p, endian) * PRIME64_2;
+            v3 = XXH_rotl64(v3, 31);
+            v3 *= PRIME64_1;
+            p+=8;
+            v4 += XXH_readLE64((const U64*)p, endian) * PRIME64_2;
+            v4 = XXH_rotl64(v4, 31);
+            v4 *= PRIME64_1;
+            p+=8;
+        }
+        while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd)
+    {
+        XXH_memcpy(state->memory, p, bEnd-p);
+        state->memsize = (int)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian)
+{
+    XXH_istate64_t * state = (XXH_istate64_t *) state_in;
+    const BYTE * p = (const BYTE*)state->memory;
+    BYTE* bEnd = (BYTE*)state->memory + state->memsize;
+    U64 h64;
+
+    if (state->total_len >= 32)
+    {
+        U64 v1 = state->v1;
+        U64 v2 = state->v2;
+        U64 v3 = state->v3;
+        U64 v4 = state->v4;
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+
+        v1 *= PRIME64_2;
+        v1 = XXH_rotl64(v1, 31);
+        v1 *= PRIME64_1;
+        h64 ^= v1;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v2 *= PRIME64_2;
+        v2 = XXH_rotl64(v2, 31);
+        v2 *= PRIME64_1;
+        h64 ^= v2;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v3 *= PRIME64_2;
+        v3 = XXH_rotl64(v3, 31);
+        v3 *= PRIME64_1;
+        h64 ^= v3;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v4 *= PRIME64_2;
+        v4 = XXH_rotl64(v4, 31);
+        v4 *= PRIME64_1;
+        h64 ^= v4;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+    }
+    else
+    {
+        h64  = state->seed + PRIME64_5;
+    }
+
+    h64 += (U64) state->total_len;
+
+    while (p+8<=bEnd)
+    {
+        U64 k1 = XXH_readLE64((const U64*)p, endian);
+        k1 *= PRIME64_2;
+        k1 = XXH_rotl64(k1,31);
+        k1 *= PRIME64_1;
+        h64 ^= k1;
+        h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd)
+    {
+        h64 ^= (U64)(XXH_readLE32((const U32*)p, endian)) * PRIME64_1;
+        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h64 ^= (*p) * PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH64_digest_endian(state_in, XXH_bigEndian);
+}
+
+
diff --git a/pandas/src/xxhash/xxhash.h b/pandas/src/xxhash/xxhash.h
new file mode 100644
index 0000000000000..55b45015a447e
--- /dev/null
+++ b/pandas/src/xxhash/xxhash.h
@@ -0,0 +1,156 @@
+/*
+   xxHash - Extremely Fast Hash algorithm
+   Header File
+   Copyright (C) 2012-2014, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - xxHash source repository : http://code.google.com/p/xxhash/
+*/
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name            Speed       Q.Score   Author
+xxHash          5.4 GB/s     10
+CrapWow         3.2 GB/s      2       Andrew
+MumurHash 3a    2.7 GB/s     10       Austin Appleby
+SpookyHash      2.0 GB/s     10       Bob Jenkins
+SBox            1.4 GB/s      9       Bret Mulvey
+Lookup3         1.2 GB/s      9       Bob Jenkins
+SuperFastHash   1.2 GB/s      1       Paul Hsieh
+CityHash64      1.05 GB/s    10       Pike & Alakuijala
+FNV             0.55 GB/s     5       Fowler, Noll, Vo
+CRC32           0.43 GB/s     9
+MD5-32          0.33 GB/s    10       Ronald L. Rivest
+SHA1-32         0.28 GB/s    10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+*/
+
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*****************************
+   Includes
+*****************************/
+#include <stddef.h>   /* size_t */
+
+
+/*****************************
+   Type
+*****************************/
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+
+/*****************************
+   Simple Hash Functions
+*****************************/
+
+unsigned int       XXH32 (const void* input, size_t length, unsigned seed);
+unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed);
+
+/*
+XXH32() :
+    Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
+    The memory between input & input+length must be valid (allocated and read-accessible).
+    "seed" can be used to alter the result predictably.
+    This function successfully passes all SMHasher tests.
+    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
+XXH64() :
+    Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
+*/
+
+
+
+/*****************************
+   Advanced Hash Functions
+*****************************/
+typedef struct { long long ll[ 6]; } XXH32_state_t;
+typedef struct { long long ll[11]; } XXH64_state_t;
+
+/*
+These structures allow static allocation of XXH states.
+States must then be initialized using XXHnn_reset() before first use.
+
+If you prefer dynamic allocation, please refer to functions below.
+*/
+
+XXH32_state_t* XXH32_createState(void);
+XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+
+XXH64_state_t* XXH64_createState(void);
+XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+
+/*
+These functions create and release memory for XXH state.
+States must then be initialized using XXHnn_reset() before first use.
+*/
+
+
+XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned seed);
+XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+unsigned int  XXH32_digest (const XXH32_state_t* statePtr);
+
+XXH_errorcode      XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_errorcode      XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+unsigned long long XXH64_digest (const XXH64_state_t* statePtr);
+
+/*
+These functions calculate the xxHash of an input provided in multiple smaller packets,
+as opposed to an input provided as a single block.
+
+XXH state space must first be allocated, using either static or dynamic method provided above.
+
+Start a new hash by initializing state with a seed, using XXHnn_reset().
+
+Then, feed the hash state by calling XXHnn_update() as many times as necessary.
+Obviously, input must be valid, meaning allocated and read accessible.
+The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+
+Finally, you can produce a hash anytime, by using XXHnn_digest().
+This function returns the final nn-bits hash.
+You can nonetheless continue feeding the hash state with more input,
+and therefore get some new hashes, by calling again XXHnn_digest().
+
+When you are done, don't forget to free XXH state space, using typically XXHnn_freeState().
+*/
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/setup.py b/setup.py
index a14f831613696..f9fbdae83d233 100755
--- a/setup.py
+++ b/setup.py
@@ -275,7 +275,7 @@ def initialize_options(self):
                                'JSONtoObj.c',
                                'ultrajsonenc.c',
                                'ultrajsondec.c',
-                               ]
+                               'xxhash.c']
 
         for root, dirs, files in os.walk('pandas'):
             for f in files:
@@ -452,7 +452,11 @@ def pxd(name):
          'pxdfiles': [],
          'depends': lib_depends},
     hashtable={'pyxfile': 'hashtable',
-               'pxdfiles': ['hashtable']},
+               'pxdfiles': ['hashtable'],
+               'depends': ['pandas/src/xxhash/xxhash.h',
+                           'pandas/src/klib/khash_python.h',
+                           'pandas/src/klib/khash.h'],
+               'sources': ['pandas/src/xxhash/xxhash.c']},
     tslib={'pyxfile': 'tslib',
            'depends': tseries_depends,
            'sources': ['pandas/src/datetime/np_datetime.c',
@@ -467,9 +471,14 @@ def pxd(name):
     parser=dict(pyxfile='parser',
                 depends=['pandas/src/parser/tokenizer.h',
                          'pandas/src/parser/io.h',
-                         'pandas/src/numpy_helper.h'],
+                         'pandas/src/numpy_helper.h',
+                         'pandas/src/xxhash/xxhash.h',
+                         'pandas/src/klib/khash_python.h',
+                         'pandas/src/klib/khash.h'],
                 sources=['pandas/src/parser/tokenizer.c',
-                         'pandas/src/parser/io.c'])
+                         'pandas/src/parser/io.c',
+                         'pandas/src/xxhash/xxhash.c'],
+                libraries=['hashtable'])
 )
 
 extensions = []
diff --git a/vb_suite/factorize.py b/vb_suite/factorize.py
new file mode 100644
index 0000000000000..21ce350d91262
--- /dev/null
+++ b/vb_suite/factorize.py
@@ -0,0 +1,62 @@
+from vbench.api import Benchmark
+from datetime import datetime
+
+START_DATE = datetime(2014, 10, 13)
+
+# GH 8524
+
+common_setup = """from pandas_vb_common import *
+from pandas import factorize
+SIZE = 1000000
+indices = np.random.randint(100, size=SIZE)
+"""
+
+
+# --- Integer array factorization
+setup = common_setup + """
+int_values_uniq = np.arange(SIZE) * 100
+"""
+factorize_int_uniq = Benchmark("factorize(int_values_uniq)", setup,
+                               start_date=START_DATE)
+setup = common_setup + """
+int_values_dup = (np.arange(SIZE) * 100).take(indices)
+"""
+factorize_int_dup = Benchmark("factorize(int_values_dup)", setup,
+                              start_date=START_DATE)
+
+
+# --- String array factorization
+setup = common_setup + """
+str_values_uniq = tm.makeStringIndex(SIZE)
+"""
+factorize_str_uniq = Benchmark("factorize(str_values_uniq)", setup=setup,
+                               start_date=START_DATE)
+setup = common_setup + """
+str_values_dup = tm.makeStringIndex(SIZE).take(indices)
+"""
+factorize_str_dup = Benchmark("factorize(str_values_dup)", setup=setup,
+                              start_date=START_DATE)
+setup = common_setup + """
+shortstr_4_dup = Index(np.take(['AA', 'BB', 'CC', 'DD'],
+                       np.random.randint(4, size=SIZE)))
+"""
+factorize_shortstr_4_dup = Benchmark("factorize(shortstr_values_dup)",
+                                     setup=setup, start_date=START_DATE)
+setup = common_setup + """
+shortstr_many_dup = tm.rands_array(2, SIZE)
+"""
+factorize_shortstr_many_dup = Benchmark("factorize(shortstr_many_dup)",
+                                        setup=setup, start_date=START_DATE)
+
+
+# --- Float array factorization
+setup = common_setup + """
+float_values_uniq = np.linspace(0., 1., num=SIZE) * 100
+"""
+factorize_float_uniq = Benchmark("factorize(float_values_uniq)", setup=setup,
+                                 start_date=START_DATE)
+setup = common_setup + """
+float_values_dup = (np.linspace(0., 1., num=SIZE) * 100).take(indices)
+"""
+factorize_float_dup = Benchmark("factorize(float_values_dup)", setup,
+                                start_date=START_DATE)
diff --git a/vb_suite/suite.py b/vb_suite/suite.py
index a16d183ae62e2..e9d325d2ef543 100644
--- a/vb_suite/suite.py
+++ b/vb_suite/suite.py
@@ -6,6 +6,7 @@
 modules = ['attrs_caching',
            'binary_ops',
            'ctors',
+           'factorize',
            'frame_ctor',
            'frame_methods',
            'groupby',