12
12
_default_hash_key = '0123456789123456'
13
13
14
14
15
- def hash_pandas_object (obj , index = True , encoding = 'utf8' , hash_key = None ):
15
+ def reducer (arr ):
16
+ """
17
+ return a scalar via our reduction function
18
+
19
+ an empty array returns np.nan
20
+ """
21
+ if len (arr ):
22
+ return np .bitwise_xor .reduce (arr )
23
+ return np .nan
24
+
25
+
26
+ def hash_pandas_object (obj , index = True , encoding = 'utf8' , hash_key = None ,
27
+ reduce = False ):
16
28
"""
17
29
Return a data hash of the Index/Series/DataFrame
18
30
@@ -25,6 +37,8 @@ def hash_pandas_object(obj, index=True, encoding='utf8', hash_key=None):
25
37
encoding : string, default 'utf8'
26
38
encoding for data & key when strings
27
39
hash_key : string key to encode, default to _default_hash_key
40
+ reduce : boolean, default False
41
+ produce a single hash result
28
42
29
43
Returns
30
44
-------
@@ -65,10 +79,14 @@ def adder(h, hashed_to_add):
65
79
h = Series (h , index = obj .index , dtype = 'uint64' )
66
80
else :
67
81
raise TypeError ("Unexpected type for hashing %s" % type (obj ))
82
+
83
+ if reduce :
84
+ h = reducer (h .values )
85
+
68
86
return h
69
87
70
88
71
- def hash_array (vals , encoding = 'utf8' , hash_key = None ):
89
+ def hash_array (vals , encoding = 'utf8' , hash_key = None , reduce = False ):
72
90
"""
73
91
Given a 1d array, return an array of deterministic integers.
74
92
@@ -80,6 +98,8 @@ def hash_array(vals, encoding='utf8', hash_key=None):
80
98
encoding : string, default 'utf8'
81
99
encoding for data & key when strings
82
100
hash_key : string key to encode, default to _default_hash_key
101
+ reduce : boolean, default False
102
+ produce a single hash result
83
103
84
104
Returns
85
105
-------
@@ -134,4 +154,8 @@ def hash_array(vals, encoding='utf8', hash_key=None):
134
154
vals ^= vals >> 27
135
155
vals *= np .uint64 (0x94d049bb133111eb )
136
156
vals ^= vals >> 31
157
+
158
+ if reduce :
159
+ vals = reducer (vals )
160
+
137
161
return vals
0 commit comments