Skip to content

MAINT: Simplify return types #9

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 13, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 83 additions & 37 deletions numpy/random/generator.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -368,26 +368,11 @@ cdef class RandomGenerator:
[ True, True]]])

"""
cdef np.npy_intp n
cdef np.ndarray randoms
cdef int64_t *randoms_data

if size is None:
with self.lock:
return random_positive_int(self._brng)

randoms = <np.ndarray>np.empty(size, dtype=np.int64)
randoms_data = <int64_t*>np.PyArray_DATA(randoms)
n = np.PyArray_SIZE(randoms)

for i in range(n):
with self.lock, nogil:
randoms_data[i] = random_positive_int(self._brng)
return randoms
return self.randint(0, np.iinfo(np.int).max + 1, dtype=np.int, size=size)

def randint(self, low, high=None, size=None, dtype=int, use_masked=True):
def randint(self, low, high=None, size=None, dtype=np.int64, use_masked=True):
"""
randint(low, high=None, size=None, dtype='l', use_masked=True)
randint(low, high=None, size=None, dtype='int64', use_masked=True)

Return random integers from `low` (inclusive) to `high` (exclusive).

Expand Down Expand Up @@ -530,9 +515,9 @@ cdef class RandomGenerator:
return self.randint(0, 4294967296, size=n_uint32, dtype=np.uint32).tobytes()[:length]

@cython.wraparound(True)
def choice(self, a, size=None, replace=True, p=None):
def choice(self, a, size=None, replace=True, p=None, axis=0):
"""
choice(a, size=None, replace=True, p=None)
choice(a, size=None, replace=True, p=None, axis=0):

Generates a random sample from a given 1-D array

Expand All @@ -553,6 +538,9 @@ cdef class RandomGenerator:
The probabilities associated with each entry in a.
If not given the sample assumes a uniform distribution over all
entries in a.
axis : int, optional
The axis along which the selection is performed. The default, 0,
selects by row.

Returns
-------
Expand All @@ -562,11 +550,11 @@ cdef class RandomGenerator:
Raises
------
ValueError
If a is an int and less than zero, if a or p are not 1-dimensional,
if a is an array-like of size 0, if p is not a vector of
If a is an int and less than zero, if p is not 1-dimensional, if
a is array-like with a size 0, if p is not a vector of
probabilities, if a and p have different lengths, or if
replace=False and the sample size is greater than the population
size
size.

See Also
--------
Expand Down Expand Up @@ -607,7 +595,14 @@ cdef class RandomGenerator:
dtype='<U11')

"""

cdef char* idx_ptr
cdef int64_t buf
cdef char* buf_ptr

cdef set idx_set
cdef int64_t val, t, loc, size_i, pop_size_i
cdef int64_t *idx_data
cdef np.npy_intp j
# Format and Verify input
a = np.array(a, copy=False)
if a.ndim == 0:
Expand All @@ -618,11 +613,9 @@ cdef class RandomGenerator:
raise ValueError("a must be 1-dimensional or an integer")
if pop_size <= 0 and np.prod(size) != 0:
raise ValueError("a must be greater than 0 unless no samples are taken")
elif a.ndim != 1:
raise ValueError("a must be 1-dimensional")
else:
pop_size = a.shape[0]
if pop_size is 0 and np.prod(size) != 0:
pop_size = a.shape[axis]
if pop_size == 0 and np.prod(size) != 0:
raise ValueError("'a' cannot be empty unless no samples are taken")

if p is not None:
Expand Down Expand Up @@ -661,9 +654,9 @@ cdef class RandomGenerator:
cdf /= cdf[-1]
uniform_samples = self.random_sample(shape)
idx = cdf.searchsorted(uniform_samples, side='right')
idx = np.array(idx, copy=False) # searchsorted returns a scalar
idx = np.array(idx, copy=False, dtype=np.int64) # searchsorted returns a scalar
else:
idx = self.randint(0, pop_size, size=shape)
idx = self.randint(0, pop_size, size=shape, dtype=np.int64)
else:
if size > pop_size:
raise ValueError("Cannot take a larger sample than "
Expand Down Expand Up @@ -692,7 +685,39 @@ cdef class RandomGenerator:
n_uniq += new.size
idx = found
else:
idx = self.permutation(pop_size)[:size]
size_i = size
pop_size_i = pop_size
# This is a heuristic tuning. should be improvable
if pop_size_i > 200 and (size > 200 or size > (10 * pop_size // size)):
# Tail shuffle size elements
idx = np.arange(pop_size, dtype=np.int64)
idx_ptr = np.PyArray_BYTES(<np.ndarray>idx)
buf_ptr = <char*>&buf
self._shuffle_raw(pop_size_i, max(pop_size_i - size_i,1),
8, 8, idx_ptr, buf_ptr)
# Copy to allow potentially large array backing idx to be gc
idx = idx[(pop_size - size):].copy()
else:
# Floyds's algorithm with precomputed indices
# Worst case, O(n**2) when size is close to pop_size
idx = np.empty(size, dtype=np.int64)
idx_data = <int64_t*>np.PyArray_DATA(<np.ndarray>idx)
idx_set = set()
loc = 0
# Sample indices with one pass to avoid reacquiring the lock
with self.lock:
for j in range(pop_size_i - size_i, pop_size_i):
idx_data[loc] = random_interval(self._brng, j)
loc += 1
loc = 0
while len(idx_set) < size_i:
for j in range(pop_size_i - size_i, pop_size_i):
if idx_data[loc] not in idx_set:
val = idx_data[loc]
else:
idx_data[loc] = val = j
idx_set.add(val)
loc += 1
if shape is not None:
idx.shape = shape

Expand All @@ -714,7 +739,9 @@ cdef class RandomGenerator:
res[()] = a[idx]
return res

return a[idx]
# asarray downcasts on 32-bit platforms, always safe
# no-op on 64-bit platforms
return a.take(np.asarray(idx, dtype=np.intp), axis=axis)

def uniform(self, low=0.0, high=1.0, size=None):
"""
Expand Down Expand Up @@ -3986,9 +4013,9 @@ cdef class RandomGenerator:
# the most common case, yielding a ~33% performance improvement.
# Note that apparently, only one branch can ever be specialized.
if itemsize == sizeof(np.npy_intp):
self._shuffle_raw(n, sizeof(np.npy_intp), stride, x_ptr, buf_ptr)
self._shuffle_raw(n, 1, sizeof(np.npy_intp), stride, x_ptr, buf_ptr)
else:
self._shuffle_raw(n, itemsize, stride, x_ptr, buf_ptr)
self._shuffle_raw(n, 1, itemsize, stride, x_ptr, buf_ptr)
elif isinstance(x, np.ndarray) and x.ndim and x.size:
buf = np.empty_like(x[0, ...])
with self.lock:
Expand All @@ -4007,10 +4034,29 @@ cdef class RandomGenerator:
j = random_interval(self._brng, i)
x[i], x[j] = x[j], x[i]

cdef inline _shuffle_raw(self, np.npy_intp n, np.npy_intp itemsize,
np.npy_intp stride, char* data, char* buf):
cdef inline _shuffle_raw(self, np.npy_intp n, np.npy_intp first,
np.npy_intp itemsize, np.npy_intp stride,
char* data, char* buf):
"""
Parameters
----------
n
Number of elements in data
first
First observation to shuffle. Shuffles n-1,
n-2, ..., first, so that when first=1 the entire
array is shuffled
itemsize
Size in bytes of item
stride
Array stride
data
Location of data
buf
Location of buffer (itemsize)
"""
cdef np.npy_intp i, j
for i in reversed(range(1, n)):
for i in reversed(range(first, n)):
j = random_interval(self._brng, i)
string.memcpy(buf, data + j * stride, itemsize)
string.memcpy(data + j * stride, data + i * stride, itemsize)
Expand Down
2 changes: 1 addition & 1 deletion numpy/random/src/distributions/distributions.c
Original file line number Diff line number Diff line change
Expand Up @@ -1070,7 +1070,7 @@ int64_t random_zipf(brng_t *brng_state, double a) {

T = pow(1.0 + 1.0 / X, am1);
if (V * X * (T - 1.0) / (b - 1.0) <= T / b) {
return (long)X;
return (int64_t)X;
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions numpy/random/tests/test_against_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ def test_standard_exponential(self):
self.rs.standard_exponential)
self._is_state_common_legacy()

@pytest.mark.xfail(reason='Stream broken for simplicity')
def test_tomaxint(self):
self._set_common_state()
self._is_state_common()
Expand Down Expand Up @@ -327,6 +328,7 @@ def test_multinomial(self):
g(100, np.array(p), size=(7, 23)))
self._is_state_common()

@pytest.mark.xfail(reason='Stream broken for performance')
def test_choice(self):
self._set_common_state()
self._is_state_common()
Expand Down
44 changes: 39 additions & 5 deletions numpy/random/tests/test_generator_mt19937.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,25 +542,25 @@ def test_random_sample_unsupported_type(self):
def test_choice_uniform_replace(self):
random.brng.seed(self.seed)
actual = random.choice(4, 4)
desired = np.array([2, 3, 2, 3])
desired = np.array([2, 3, 2, 3], dtype=np.int64)
assert_array_equal(actual, desired)

def test_choice_nonuniform_replace(self):
random.brng.seed(self.seed)
actual = random.choice(4, 4, p=[0.4, 0.4, 0.1, 0.1])
desired = np.array([1, 1, 2, 2])
desired = np.array([1, 1, 2, 2], dtype=np.int64)
assert_array_equal(actual, desired)

def test_choice_uniform_noreplace(self):
random.brng.seed(self.seed)
actual = random.choice(4, 3, replace=False)
desired = np.array([0, 1, 3])
desired = np.array([0, 2, 3], dtype=np.int64)
assert_array_equal(actual, desired)

def test_choice_nonuniform_noreplace(self):
random.brng.seed(self.seed)
actual = random.choice(4, 3, replace=False, p=[0.1, 0.3, 0.5, 0.1])
desired = np.array([2, 3, 1])
desired = np.array([2, 3, 1], dtype=np.int64)
assert_array_equal(actual, desired)

def test_choice_noninteger(self):
Expand All @@ -569,11 +569,22 @@ def test_choice_noninteger(self):
desired = np.array(['c', 'd', 'c', 'd'])
assert_array_equal(actual, desired)

def test_choice_multidimensional_default_axis(self):
random.brng.seed(self.seed)
actual = random.choice([[0, 1], [2, 3], [4, 5], [6, 7]], 3)
desired = np.array([[4, 5], [6, 7], [4, 5]])
assert_array_equal(actual, desired)

def test_choice_multidimensional_custom_axis(self):
random.brng.seed(self.seed)
actual = random.choice([[0, 1], [2, 3], [4, 5], [6, 7]], 1, axis=1)
desired = np.array([[0], [2], [4], [6]])
assert_array_equal(actual, desired)

def test_choice_exceptions(self):
sample = random.choice
assert_raises(ValueError, sample, -1, 3)
assert_raises(ValueError, sample, 3., 3)
assert_raises(ValueError, sample, [[1, 2], [3, 4]], 3)
assert_raises(ValueError, sample, [], 3)
assert_raises(ValueError, sample, [1, 2, 3, 4], 3,
p=[[0.25, 0.25], [0.25, 0.25]])
Expand Down Expand Up @@ -639,6 +650,29 @@ def test_choice_nan_probabilities(self):
p = [None, None, None]
assert_raises(ValueError, random.choice, a, p=p)

def test_choice_return_type(self):
# gh 9867
p = np.ones(4) / 4.
actual = random.choice(4, 2)
assert actual.dtype == np.int64
actual = random.choice(4, 2, replace=False)
assert actual.dtype == np.int64
actual = random.choice(4, 2, p=p)
assert actual.dtype == np.int64
actual = random.choice(4, 2, p=p, replace=False)
assert actual.dtype == np.int64

def test_choice_large_sample(self):
import hashlib

choice_hash = '6395868be877d27518c832213c17977c'
random.brng.seed(self.seed)
actual = random.choice(10000, 5000, replace=False)
if sys.byteorder != 'little':
actual = actual.byteswap()
res = hashlib.md5(actual.view(np.int8)).hexdigest()
assert_(choice_hash == res)

def test_bytes(self):
random.brng.seed(self.seed)
actual = random.bytes(10)
Expand Down