diff --git a/src/libextra/base64.rs b/src/libextra/base64.rs index 3960be4668698..429f7adf5f537 100644 --- a/src/libextra/base64.rs +++ b/src/libextra/base64.rs @@ -318,7 +318,7 @@ mod test { use std::vec; do 1000.times { - let times = task_rng().gen_integer_range(1u, 100); + let times = task_rng().gen_range(1u, 100); let v = vec::from_fn(times, |_| random::()); assert_eq!(v.to_base64(STANDARD).from_base64().unwrap(), v); } diff --git a/src/libextra/crypto/cryptoutil.rs b/src/libextra/crypto/cryptoutil.rs index 97b82383d8432..bb3524a7d4902 100644 --- a/src/libextra/crypto/cryptoutil.rs +++ b/src/libextra/crypto/cryptoutil.rs @@ -365,7 +365,7 @@ pub mod test { digest.reset(); while count < total_size { - let next: uint = rng.gen_integer_range(0, 2 * blocksize + 1); + let next: uint = rng.gen_range(0, 2 * blocksize + 1); let remaining = total_size - count; let size = if next > remaining { remaining } else { next }; digest.input(buffer.slice_to(size)); diff --git a/src/libextra/flate.rs b/src/libextra/flate.rs index 9d6c2e8aa8207..3f3b588e8bc18 100644 --- a/src/libextra/flate.rs +++ b/src/libextra/flate.rs @@ -113,7 +113,7 @@ mod tests { let mut r = rand::rng(); let mut words = ~[]; do 20.times { - let range = r.gen_integer_range(1u, 10); + let range = r.gen_range(1u, 10); words.push(r.gen_vec::(range)); } do 20.times { diff --git a/src/libextra/sort.rs b/src/libextra/sort.rs index d884f4f05c176..2a456f8de3ebe 100644 --- a/src/libextra/sort.rs +++ b/src/libextra/sort.rs @@ -1069,8 +1069,8 @@ mod big_tests { isSorted(arr); do 3.times { - let i1 = rng.gen_integer_range(0u, n); - let i2 = rng.gen_integer_range(0u, n); + let i1 = rng.gen_range(0u, n); + let i2 = rng.gen_range(0u, n); arr.swap(i1, i2); } tim_sort(arr); // 3sort @@ -1088,7 +1088,7 @@ mod big_tests { isSorted(arr); do (n/100).times { - let idx = rng.gen_integer_range(0u, n); + let idx = rng.gen_range(0u, n); arr[idx] = rng.gen(); } tim_sort(arr); @@ -1141,8 +1141,8 @@ mod big_tests { isSorted(arr); do 3.times { - let i1 = rng.gen_integer_range(0u, n); - let i2 = rng.gen_integer_range(0u, n); + let i1 = rng.gen_range(0u, n); + let i2 = rng.gen_range(0u, n); arr.swap(i1, i2); } tim_sort(arr); // 3sort @@ -1160,7 +1160,7 @@ mod big_tests { isSorted(arr); do (n/100).times { - let idx = rng.gen_integer_range(0u, n); + let idx = rng.gen_range(0u, n); arr[idx] = @rng.gen(); } tim_sort(arr); diff --git a/src/libextra/treemap.rs b/src/libextra/treemap.rs index ad196b32fb2ca..7ef9ba76b995d 100644 --- a/src/libextra/treemap.rs +++ b/src/libextra/treemap.rs @@ -1028,7 +1028,7 @@ mod test_treemap { } do 30.times { - let r = rng.gen_integer_range(0, ctrl.len()); + let r = rng.gen_range(0, ctrl.len()); let (key, _) = ctrl.remove(r); assert!(map.remove(&key)); check_structure(&map); diff --git a/src/libstd/rand/distributions.rs b/src/libstd/rand/distributions.rs index 0902100dca60a..e7bcf8ce5d3a7 100644 --- a/src/libstd/rand/distributions.rs +++ b/src/libstd/rand/distributions.rs @@ -8,38 +8,226 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -//! Sampling from random distributions +/*! +Sampling from random distributions. -// Some implementations use the Ziggurat method -// https://en.wikipedia.org/wiki/Ziggurat_algorithm -// -// The version used here is ZIGNOR [Doornik 2005, "An Improved -// Ziggurat Method to Generate Normal Random Samples"] which is slower -// (about double, it generates an extra random number) than the -// canonical version [Marsaglia & Tsang 2000, "The Ziggurat Method for -// Generating Random Variables"], but more robust. If one wanted, one -// could implement VIZIGNOR the ZIGNOR paper for more speed. +This is a generalization of `Rand` to allow parameters to control the +exact properties of the generated values, e.g. the mean and standard +deviation of a normal distribution. The `Sample` trait is the most +general, and allows for generating values that change some state +internally. The `IndependentSample` trait is for generating values +that do not need to record state. + +*/ +use iter::range; +use option::{Some, None}; use num; use rand::{Rng,Rand}; +use clone::Clone; + +pub use self::range::Range; + +pub mod range; + +/// Types that can be used to create a random instance of `Support`. +pub trait Sample { + /// Generate a random value of `Support`, using `rng` as the + /// source of randomness. + fn sample(&mut self, rng: &mut R) -> Support; +} + +/// `Sample`s that do not require keeping track of state. +/// +/// Since no state is recored, each sample is (statistically) +/// independent of all others, assuming the `Rng` used has this +/// property. +// XXX maybe having this separate is overkill (the only reason is to +// take &self rather than &mut self)? or maybe this should be the +// trait called `Sample` and the other should be `DependentSample`. +pub trait IndependentSample: Sample { + /// Generate a random value. + fn ind_sample(&self, &mut R) -> Support; +} + +/// A wrapper for generating types that implement `Rand` via the +/// `Sample` & `IndependentSample` traits. +pub struct RandSample; + +impl Sample for RandSample { + fn sample(&mut self, rng: &mut R) -> Sup { self.ind_sample(rng) } +} + +impl IndependentSample for RandSample { + fn ind_sample(&self, rng: &mut R) -> Sup { + rng.gen() + } +} + +/// A value with a particular weight for use with `WeightedChoice`. +pub struct Weighted { + /// The numerical weight of this item + weight: uint, + /// The actual item which is being weighted + item: T, +} + +/// A distribution that selects from a finite collection of weighted items. +/// +/// Each item has an associated weight that influences how likely it +/// is to be chosen: higher weight is more likely. +/// +/// The `Clone` restriction is a limitation of the `Sample` and +/// `IndepedentSample` traits. Note that `&T` is (cheaply) `Clone` for +/// all `T`, as is `uint`, so one can store references or indices into +/// another vector. +/// +/// # Example +/// +/// ```rust +/// use std::rand; +/// use std::rand::distributions::{Weighted, WeightedChoice, IndepedentSample}; +/// +/// fn main() { +/// let wc = WeightedChoice::new(~[Weighted { weight: 2, item: 'a' }, +/// Weighted { weight: 4, item: 'b' }, +/// Weighted { weight: 1, item: 'c' }]); +/// let rng = rand::task_rng(); +/// for _ in range(0, 16) { +/// // on average prints 'a' 4 times, 'b' 8 and 'c' twice. +/// println!("{}", wc.ind_sample(rng)); +/// } +/// } +/// ``` +pub struct WeightedChoice { + priv items: ~[Weighted], + priv weight_range: Range +} + +impl WeightedChoice { + /// Create a new `WeightedChoice`. + /// + /// Fails if: + /// - `v` is empty + /// - the total weight is 0 + /// - the total weight is larger than a `uint` can contain. + pub fn new(mut items: ~[Weighted]) -> WeightedChoice { + // strictly speaking, this is subsumed by the total weight == 0 case + assert!(!items.is_empty(), "WeightedChoice::new called with no items"); + + let mut running_total = 0u; + + // we convert the list from individual weights to cumulative + // weights so we can binary search. This *could* drop elements + // with weight == 0 as an optimisation. + for item in items.mut_iter() { + running_total = running_total.checked_add(&item.weight) + .expect("WeightedChoice::new called with a total weight larger \ + than a uint can contain"); + + item.weight = running_total; + } + assert!(running_total != 0, "WeightedChoice::new called with a total weight of 0"); + + WeightedChoice { + items: items, + // we're likely to be generating numbers in this range + // relatively often, so might as well cache it + weight_range: Range::new(0, running_total) + } + } +} + +impl Sample for WeightedChoice { + fn sample(&mut self, rng: &mut R) -> T { self.ind_sample(rng) } +} + +impl IndependentSample for WeightedChoice { + fn ind_sample(&self, rng: &mut R) -> T { + // we want to find the first element that has cumulative + // weight > sample_weight, which we do by binary since the + // cumulative weights of self.items are sorted. + + // choose a weight in [0, total_weight) + let sample_weight = self.weight_range.ind_sample(rng); + + // short circuit when it's the first item + if sample_weight < self.items[0].weight { + return self.items[0].item.clone(); + } + + let mut idx = 0; + let mut modifier = self.items.len(); + + // now we know that every possibility has an element to the + // left, so we can just search for the last element that has + // cumulative weight <= sample_weight, then the next one will + // be "it". (Note that this greatest element will never be the + // last element of the vector, since sample_weight is chosen + // in [0, total_weight) and the cumulative weight of the last + // one is exactly the total weight.) + while modifier > 1 { + let i = idx + modifier / 2; + if self.items[i].weight <= sample_weight { + // we're small, so look to the right, but allow this + // exact element still. + idx = i; + // we need the `/ 2` to round up otherwise we'll drop + // the trailing elements when `modifier` is odd. + modifier += 1; + } else { + // otherwise we're too big, so go left. (i.e. do + // nothing) + } + modifier /= 2; + } + return self.items[idx + 1].item.clone(); + } +} mod ziggurat_tables; -// inlining should mean there is no performance penalty for this -#[inline] +/// Sample a random number using the Ziggurat method (specifically the +/// ZIGNOR variant from Doornik 2005). Most of the arguments are +/// directly from the paper: +/// +/// * `rng`: source of randomness +/// * `symmetric`: whether this is a symmetric distribution, or one-sided with P(x < 0) = 0. +/// * `X`: the $x_i$ abscissae. +/// * `F`: precomputed values of the PDF at the $x_i$, (i.e. $f(x_i)$) +/// * `F_DIFF`: precomputed values of $f(x_i) - f(x_{i+1})$ +/// * `pdf`: the probability density function +/// * `zero_case`: manual sampling from the tail when we chose the +/// bottom box (i.e. i == 0) + +// the perf improvement (25-50%) is definitely worth the extra code +// size from force-inlining. +#[inline(always)] fn ziggurat(rng: &mut R, - center_u: bool, + symmetric: bool, X: ziggurat_tables::ZigTable, F: ziggurat_tables::ZigTable, F_DIFF: ziggurat_tables::ZigTable, - pdf: &'static fn(f64) -> f64, // probability density function + pdf: &'static fn(f64) -> f64, zero_case: &'static fn(&mut R, f64) -> f64) -> f64 { + static SCALE: f64 = (1u64 << 53) as f64; loop { - let u = if center_u {2.0 * rng.gen() - 1.0} else {rng.gen()}; - let i: uint = rng.gen::() & 0xff; + // reimplement the f64 generation as an optimisation suggested + // by the Doornik paper: we have a lot of precision-space + // (i.e. there are 11 bits of the 64 of a u64 to use after + // creating a f64), so we might as well reuse some to save + // generating a whole extra random number. (Seems to be 15% + // faster.) + let bits: u64 = rng.gen(); + let i = (bits & 0xff) as uint; + let f = (bits >> 11) as f64 / SCALE; + + // u is either U(-1, 1) or U(0, 1) depending on if this is a + // symmetric distribution or not. + let u = if symmetric {2.0 * f - 1.0} else {f}; let x = u * X[i]; - let test_x = if center_u {num::abs(x)} else {x}; + let test_x = if symmetric {num::abs(x)} else {x}; // algebraically equivalent to |u| < X[i+1]/X[i] (or u < X[i+1]/X[i]) if test_x < X[i + 1] { @@ -49,30 +237,25 @@ fn ziggurat(rng: &mut R, return zero_case(rng, u); } // algebraically equivalent to f1 + DRanU()*(f0 - f1) < 1 - if F[i+1] + F_DIFF[i+1] * rng.gen() < pdf(x) { + if F[i + 1] + F_DIFF[i + 1] * rng.gen() < pdf(x) { return x; } } } -/// A wrapper around an `f64` to generate N(0, 1) random numbers (a.k.a. a -/// standard normal, or Gaussian). Multiplying the generated values by the -/// desired standard deviation `sigma` then adding the desired mean `mu` will -/// give N(mu, sigma^2) distributed random numbers. +/// A wrapper around an `f64` to generate N(0, 1) random numbers +/// (a.k.a. a standard normal, or Gaussian). /// -/// Note that this has to be unwrapped before use as an `f64` (using either -/// `*` or `cast::transmute` is safe). +/// See `Normal` for the general normal distribution. That this has to +/// be unwrapped before use as an `f64` (using either `*` or +/// `cast::transmute` is safe). /// -/// # Example +/// Implemented via the ZIGNOR variant[1] of the Ziggurat method. /// -/// ``` -/// use std::rand::distributions::StandardNormal; -/// -/// fn main() { -/// let normal = 2.0 + (*rand::random::()) * 3.0; -/// println!("{} is from a N(2, 9) distribution", normal) -/// } -/// ``` +/// [1]: Jurgen A. Doornik (2005). [*An Improved Ziggurat Method to +/// Generate Normal Random +/// Samples*](http://www.doornik.com/research/ziggurat.pdf). Nuffield +/// College, Oxford pub struct StandardNormal(f64); impl Rand for StandardNormal { @@ -110,23 +293,62 @@ impl Rand for StandardNormal { } } -/// A wrapper around an `f64` to generate Exp(1) random numbers. Dividing by -/// the desired rate `lambda` will give Exp(lambda) distributed random -/// numbers. +/// The normal distribution `N(mean, std_dev**2)`. /// -/// Note that this has to be unwrapped before use as an `f64` (using either -/// `*` or `cast::transmute` is safe). +/// This uses the ZIGNOR variant of the Ziggurat method, see +/// `StandardNormal` for more details. /// /// # Example /// /// ``` -/// use std::rand::distributions::Exp1; +/// use std::rand; +/// use std::rand::distributions::{Normal, IndependentSample}; /// /// fn main() { -/// let exp2 = (*rand::random::()) * 0.5; -/// println!("{} is from a Exp(2) distribution", exp2); +/// let normal = Normal::new(2.0, 3.0); +/// let v = normal.ind_sample(rand::task_rng()); +/// println!("{} is from a N(2, 9) distribution", v) /// } /// ``` +pub struct Normal { + priv mean: f64, + priv std_dev: f64 +} + +impl Normal { + /// Construct a new `Normal` distribution with the given mean and + /// standard deviation. Fails if `std_dev < 0`. + pub fn new(mean: f64, std_dev: f64) -> Normal { + assert!(std_dev >= 0.0, "Normal::new called with `std_dev` < 0"); + Normal { + mean: mean, + std_dev: std_dev + } + } +} +impl Sample for Normal { + fn sample(&mut self, rng: &mut R) -> f64 { self.ind_sample(rng) } +} +impl IndependentSample for Normal { + fn ind_sample(&self, rng: &mut R) -> f64 { + self.mean + self.std_dev * (*rng.gen::()) + } +} + +/// A wrapper around an `f64` to generate Exp(1) random numbers. +/// +/// See `Exp` for the general exponential distribution.Note that this + // has to be unwrapped before use as an `f64` (using either +/// `*` or `cast::transmute` is safe). +/// +/// Implemented via the ZIGNOR variant[1] of the Ziggurat method. The +/// exact description in the paper was adjusted to use tables for the +/// exponential distribution rather than normal. +/// +/// [1]: Jurgen A. Doornik (2005). [*An Improved Ziggurat Method to +/// Generate Normal Random +/// Samples*](http://www.doornik.com/research/ziggurat.pdf). Nuffield +/// College, Oxford pub struct Exp1(f64); // This could be done via `-rng.gen::().ln()` but that is slower. @@ -148,3 +370,221 @@ impl Rand for Exp1 { pdf, zero_case)) } } + +/// The exponential distribution `Exp(lambda)`. +/// +/// This distribution has density function: `f(x) = lambda * +/// exp(-lambda * x)` for `x > 0`. +/// +/// # Example +/// +/// ``` +/// use std::rand; +/// use std::rand::distributions::{Exp, IndependentSample}; +/// +/// fn main() { +/// let exp = Exp::new(2.0); +/// let v = exp.ind_sample(rand::task_rng()); +/// println!("{} is from a Exp(2) distribution", v); +/// } +/// ``` +pub struct Exp { + /// `lambda` stored as `1/lambda`, since this is what we scale by. + priv lambda_inverse: f64 +} + +impl Exp { + /// Construct a new `Exp` with the given shape parameter + /// `lambda`. Fails if `lambda <= 0`. + pub fn new(lambda: f64) -> Exp { + assert!(lambda > 0.0, "Exp::new called with `lambda` <= 0"); + Exp { lambda_inverse: 1.0 / lambda } + } +} + +impl Sample for Exp { + fn sample(&mut self, rng: &mut R) -> f64 { self.ind_sample(rng) } +} +impl IndependentSample for Exp { + fn ind_sample(&self, rng: &mut R) -> f64 { + (*rng.gen::()) * self.lambda_inverse + } +} + +#[cfg(test)] +mod tests { + use rand::*; + use super::*; + use iter::range; + use option::{Some, None}; + + struct ConstRand(uint); + impl Rand for ConstRand { + fn rand(_: &mut R) -> ConstRand { + ConstRand(0) + } + } + + // 0, 1, 2, 3, ... + struct CountingRng { i: u32 } + impl Rng for CountingRng { + fn next_u32(&mut self) -> u32 { + self.i += 1; + self.i - 1 + } + fn next_u64(&mut self) -> u64 { + self.next_u32() as u64 + } + } + + #[test] + fn test_rand_sample() { + let mut rand_sample = RandSample::; + + assert_eq!(*rand_sample.sample(task_rng()), 0); + assert_eq!(*rand_sample.ind_sample(task_rng()), 0); + } + + #[test] + fn test_normal() { + let mut norm = Normal::new(10.0, 10.0); + let rng = task_rng(); + for _ in range(0, 1000) { + norm.sample(rng); + norm.ind_sample(rng); + } + } + #[test] + #[should_fail] + fn test_normal_invalid_sd() { + Normal::new(10.0, -1.0); + } + + #[test] + fn test_exp() { + let mut exp = Exp::new(10.0); + let rng = task_rng(); + for _ in range(0, 1000) { + assert!(exp.sample(rng) >= 0.0); + assert!(exp.ind_sample(rng) >= 0.0); + } + } + #[test] + #[should_fail] + fn test_exp_invalid_lambda_zero() { + Exp::new(0.0); + } + #[test] + #[should_fail] + fn test_exp_invalid_lambda_neg() { + Exp::new(-10.0); + } + + #[test] + fn test_weighted_choice() { + // this makes assumptions about the internal implementation of + // WeightedChoice, specifically: it doesn't reorder the items, + // it doesn't do weird things to the RNG (so 0 maps to 0, 1 to + // 1, internally; modulo a modulo operation). + + macro_rules! t ( + ($items:expr, $expected:expr) => {{ + let wc = WeightedChoice::new($items); + let expected = $expected; + + let mut rng = CountingRng { i: 0 }; + + for &val in expected.iter() { + assert_eq!(wc.ind_sample(&mut rng), val) + } + }} + ); + + t!(~[Weighted { weight: 1, item: 10}], ~[10]); + + // skip some + t!(~[Weighted { weight: 0, item: 20}, + Weighted { weight: 2, item: 21}, + Weighted { weight: 0, item: 22}, + Weighted { weight: 1, item: 23}], + ~[21,21, 23]); + + // different weights + t!(~[Weighted { weight: 4, item: 30}, + Weighted { weight: 3, item: 31}], + ~[30,30,30,30, 31,31,31]); + + // check that we're binary searching + // correctly with some vectors of odd + // length. + t!(~[Weighted { weight: 1, item: 40}, + Weighted { weight: 1, item: 41}, + Weighted { weight: 1, item: 42}, + Weighted { weight: 1, item: 43}, + Weighted { weight: 1, item: 44}], + ~[40, 41, 42, 43, 44]); + t!(~[Weighted { weight: 1, item: 50}, + Weighted { weight: 1, item: 51}, + Weighted { weight: 1, item: 52}, + Weighted { weight: 1, item: 53}, + Weighted { weight: 1, item: 54}, + Weighted { weight: 1, item: 55}, + Weighted { weight: 1, item: 56}], + ~[50, 51, 52, 53, 54, 55, 56]); + } + + #[test] #[should_fail] + fn test_weighted_choice_no_items() { + WeightedChoice::::new(~[]); + } + #[test] #[should_fail] + fn test_weighted_choice_zero_weight() { + WeightedChoice::new(~[Weighted { weight: 0, item: 0}, + Weighted { weight: 0, item: 1}]); + } + #[test] #[should_fail] + fn test_weighted_choice_weight_overflows() { + let x = (-1) as uint / 2; // x + x + 2 is the overflow + WeightedChoice::new(~[Weighted { weight: x, item: 0 }, + Weighted { weight: 1, item: 1 }, + Weighted { weight: x, item: 2 }, + Weighted { weight: 1, item: 3 }]); + } +} + +#[cfg(test)] +mod bench { + use extra::test::BenchHarness; + use rand::*; + use super::*; + use iter::range; + use option::{Some, None}; + use mem::size_of; + + static N: u64 = 100; + + #[bench] + fn rand_normal(bh: &mut BenchHarness) { + let mut rng = XorShiftRng::new(); + let mut normal = Normal::new(-2.71828, 3.14159); + + do bh.iter { + for _ in range(0, N) { + normal.sample(&mut rng); + } + } + bh.bytes = size_of::() as u64 * N; + } + #[bench] + fn rand_exp(bh: &mut BenchHarness) { + let mut rng = XorShiftRng::new(); + let mut exp = Exp::new(2.71828 * 3.14159); + + do bh.iter { + for _ in range(0, N) { + exp.sample(&mut rng); + } + } + bh.bytes = size_of::() as u64 * N; + } +} diff --git a/src/libstd/rand/isaac.rs b/src/libstd/rand/isaac.rs index 0068b60cfa51b..2fd58d0314e76 100644 --- a/src/libstd/rand/isaac.rs +++ b/src/libstd/rand/isaac.rs @@ -18,10 +18,15 @@ use option::{None, Some}; static RAND_SIZE_LEN: u32 = 8; static RAND_SIZE: u32 = 1 << RAND_SIZE_LEN; -/// A random number generator that uses the [ISAAC -/// algorithm](http://en.wikipedia.org/wiki/ISAAC_%28cipher%29). +/// A random number generator that uses the ISAAC algorithm[1]. /// -/// The ISAAC algorithm is suitable for cryptographic purposes. +/// The ISAAC algorithm is generally accepted as suitable for +/// cryptographic purposes, but this implementation has not be +/// verified as such. Prefer a generator like `OSRng` that defers to +/// the operating system for cases that need high security. +/// +/// [1]: Bob Jenkins, [*ISAAC: A fast cryptographic random number +/// generator*](http://www.burtleburtle.net/bob/rand/isaacafa.html) pub struct IsaacRng { priv cnt: u32, priv rsl: [u32, .. RAND_SIZE], @@ -212,11 +217,16 @@ impl<'self> SeedableRng<&'self [u32]> for IsaacRng { static RAND_SIZE_64_LEN: uint = 8; static RAND_SIZE_64: uint = 1 << RAND_SIZE_64_LEN; -/// A random number generator that uses the 64-bit variant of the -/// [ISAAC -/// algorithm](http://en.wikipedia.org/wiki/ISAAC_%28cipher%29). +/// A random number generator that uses ISAAC-64[1], the 64-bit +/// variant of the ISAAC algorithm. +/// +/// The ISAAC algorithm is generally accepted as suitable for +/// cryptographic purposes, but this implementation has not be +/// verified as such. Prefer a generator like `OSRng` that defers to +/// the operating system for cases that need high security. /// -/// The ISAAC algorithm is suitable for cryptographic purposes. +/// [1]: Bob Jenkins, [*ISAAC: A fast cryptographic random number +/// generator*](http://www.burtleburtle.net/bob/rand/isaacafa.html) pub struct Isaac64Rng { priv cnt: uint, priv rsl: [u64, .. RAND_SIZE_64], diff --git a/src/libstd/rand/mod.rs b/src/libstd/rand/mod.rs index f5c60417bacb5..a372eb1f11ad5 100644 --- a/src/libstd/rand/mod.rs +++ b/src/libstd/rand/mod.rs @@ -28,6 +28,23 @@ from an operating-system source of randomness, e.g. `/dev/urandom` on Unix systems, and will automatically reseed itself from this source after generating 32 KiB of random data. +# Cryptographic security + +An application that requires random numbers for cryptographic purposes +should prefer `OSRng`, which reads randomness from one of the source +that the operating system provides (e.g. `/dev/urandom` on +Unixes). The other random number generators provided by this module +are either known to be insecure (`XorShiftRng`), or are not verified +to be secure (`IsaacRng`, `Isaac64Rng` and `StdRng`). + +*Note*: on Linux, `/dev/random` is more secure than `/dev/urandom`, +but it is a blocking RNG, and will wait until it has determined that +it has collected enough entropy to fulfill a request for random +data. It can be used with the `Rng` trait provided by this module by +opening the file and passing it to `reader::ReaderRng`. Since it +blocks, `/dev/random` should only be used to retrieve small amounts of +randomness. + # Examples ```rust @@ -55,17 +72,20 @@ fn main () { use mem::size_of; use unstable::raw::Slice; use cast; +use cmp::Ord; use container::Container; use iter::{Iterator, range}; use local_data; use prelude::*; use str; -use u64; use vec; pub use self::isaac::{IsaacRng, Isaac64Rng}; pub use self::os::OSRng; +use self::distributions::{Range, IndependentSample}; +use self::distributions::range::SampleRange; + pub mod distributions; pub mod isaac; pub mod os; @@ -80,14 +100,6 @@ pub trait Rand { fn rand(rng: &mut R) -> Self; } -/// A value with a particular weight compared to other values -pub struct Weighted { - /// The numerical weight of this item - weight: uint, - /// The actual item which is being weighted - item: T, -} - /// A random number generator pub trait Rng { /// Return the next random u32. This rarely needs to be called @@ -218,14 +230,14 @@ pub trait Rng { vec::from_fn(len, |_| self.gen()) } - /// Generate a random primitive integer in the range [`low`, - /// `high`). Fails if `low >= high`. + /// Generate a random value in the range [`low`, `high`). Fails if + /// `low >= high`. /// - /// This gives a uniform distribution (assuming this RNG is itself - /// uniform), even for edge cases like `gen_integer_range(0u8, - /// 170)`, which a naive modulo operation would return numbers - /// less than 85 with double the probability to those greater than - /// 85. + /// This is a convenience wrapper around + /// `distributions::Range`. If this function will be called + /// repeatedly with the same arguments, one should use `Range`, as + /// that will amortize the computations that allow for perfect + /// uniformity, as they only happen on initialization. /// /// # Example /// @@ -235,22 +247,15 @@ pub trait Rng { /// /// fn main() { /// let mut rng = rand::task_rng(); - /// let n: uint = rng.gen_integer_range(0u, 10); + /// let n: uint = rng.gen_range(0u, 10); /// println!("{}", n); - /// let m: int = rng.gen_integer_range(-40, 400); + /// let m: float = rng.gen_range(-40.0, 1.3e5); /// println!("{}", m); /// } /// ``` - fn gen_integer_range(&mut self, low: T, high: T) -> T { - assert!(low < high, "RNG.gen_integer_range called with low >= high"); - let range = (high - low).to_u64().unwrap(); - let accept_zone = u64::max_value - u64::max_value % range; - loop { - let rand = self.gen::(); - if rand < accept_zone { - return low + NumCast::from(rand % range).unwrap(); - } - } + fn gen_range(&mut self, low: T, high: T) -> T { + assert!(low < high, "Rng.gen_range called with low >= high"); + Range::new(low, high).ind_sample(self) } /// Return a bool with a 1 in n chance of true @@ -267,7 +272,7 @@ pub trait Rng { /// } /// ``` fn gen_weighted_bool(&mut self, n: uint) -> bool { - n == 0 || self.gen_integer_range(0, n) == 0 + n == 0 || self.gen_range(0, n) == 0 } /// Return a random string of the specified length composed of @@ -317,93 +322,8 @@ pub trait Rng { if values.is_empty() { None } else { - Some(&values[self.gen_integer_range(0u, values.len())]) - } - } - - /// Choose an item respecting the relative weights, failing if the sum of - /// the weights is 0 - /// - /// # Example - /// - /// ```rust - /// use std::rand; - /// use std::rand::Rng; - /// - /// fn main() { - /// let mut rng = rand::rng(); - /// let x = [rand::Weighted {weight: 4, item: 'a'}, - /// rand::Weighted {weight: 2, item: 'b'}, - /// rand::Weighted {weight: 2, item: 'c'}]; - /// println!("{}", rng.choose_weighted(x)); - /// } - /// ``` - fn choose_weighted(&mut self, v: &[Weighted]) -> T { - self.choose_weighted_option(v).expect("Rng.choose_weighted: total weight is 0") - } - - /// Choose Some(item) respecting the relative weights, returning none if - /// the sum of the weights is 0 - /// - /// # Example - /// - /// ```rust - /// use std::rand; - /// use std::rand::Rng; - /// - /// fn main() { - /// let mut rng = rand::rng(); - /// let x = [rand::Weighted {weight: 4, item: 'a'}, - /// rand::Weighted {weight: 2, item: 'b'}, - /// rand::Weighted {weight: 2, item: 'c'}]; - /// println!("{:?}", rng.choose_weighted_option(x)); - /// } - /// ``` - fn choose_weighted_option(&mut self, v: &[Weighted]) - -> Option { - let mut total = 0u; - for item in v.iter() { - total += item.weight; - } - if total == 0u { - return None; - } - let chosen = self.gen_integer_range(0u, total); - let mut so_far = 0u; - for item in v.iter() { - so_far += item.weight; - if so_far > chosen { - return Some(item.item.clone()); - } - } - unreachable!(); - } - - /// Return a vec containing copies of the items, in order, where - /// the weight of the item determines how many copies there are - /// - /// # Example - /// - /// ```rust - /// use std::rand; - /// use std::rand::Rng; - /// - /// fn main() { - /// let mut rng = rand::rng(); - /// let x = [rand::Weighted {weight: 4, item: 'a'}, - /// rand::Weighted {weight: 2, item: 'b'}, - /// rand::Weighted {weight: 2, item: 'c'}]; - /// println!("{}", rng.weighted_vec(x)); - /// } - /// ``` - fn weighted_vec(&mut self, v: &[Weighted]) -> ~[T] { - let mut r = ~[]; - for item in v.iter() { - for _ in range(0u, item.weight) { - r.push(item.item.clone()); - } + Some(&values[self.gen_range(0u, values.len())]) } - r } /// Shuffle a vec @@ -447,7 +367,7 @@ pub trait Rng { // invariant: elements with index >= i have been locked in place. i -= 1u; // lock element i in place. - values.swap(i, self.gen_integer_range(0u, i + 1u)); + values.swap(i, self.gen_range(0u, i + 1u)); } } @@ -473,7 +393,7 @@ pub trait Rng { continue } - let k = self.gen_integer_range(0, i + 1); + let k = self.gen_range(0, i + 1); if k < reservoir.len() { reservoir[k] = elem } @@ -520,8 +440,8 @@ pub trait SeedableRng: Rng { /// Create a random number generator with a default algorithm and seed. /// -/// It returns the cryptographically-safest `Rng` algorithm currently -/// available in Rust. If you require a specifically seeded `Rng` for +/// It returns the strongest `Rng` algorithm currently implemented in +/// pure Rust. If you require a specifically seeded `Rng` for /// consistency over time you should pick one algorithm and create the /// `Rng` yourself. /// @@ -596,12 +516,16 @@ pub fn weak_rng() -> XorShiftRng { XorShiftRng::new() } -/// An [Xorshift random number -/// generator](http://en.wikipedia.org/wiki/Xorshift). +/// An Xorshift[1] random number +/// generator. /// /// The Xorshift algorithm is not suitable for cryptographic purposes /// but is very fast. If you do not know for sure that it fits your -/// requirements, use a more secure one such as `IsaacRng`. +/// requirements, use a more secure one such as `IsaacRng` or `OSRng`. +/// +/// [1]: Marsaglia, George (July 2003). ["Xorshift +/// RNGs"](http://www.jstatsoft.org/v08/i14/paper). *Journal of +/// Statistical Software*. Vol. 8 (Issue 14). pub struct XorShiftRng { priv x: u32, priv y: u32, @@ -760,36 +684,36 @@ mod test { } #[test] - fn test_gen_integer_range() { + fn test_gen_range() { let mut r = rng(); for _ in range(0, 1000) { - let a = r.gen_integer_range(-3i, 42); + let a = r.gen_range(-3i, 42); assert!(a >= -3 && a < 42); - assert_eq!(r.gen_integer_range(0, 1), 0); - assert_eq!(r.gen_integer_range(-12, -11), -12); + assert_eq!(r.gen_range(0, 1), 0); + assert_eq!(r.gen_range(-12, -11), -12); } for _ in range(0, 1000) { - let a = r.gen_integer_range(10, 42); + let a = r.gen_range(10, 42); assert!(a >= 10 && a < 42); - assert_eq!(r.gen_integer_range(0, 1), 0); - assert_eq!(r.gen_integer_range(3_000_000u, 3_000_001), 3_000_000); + assert_eq!(r.gen_range(0, 1), 0); + assert_eq!(r.gen_range(3_000_000u, 3_000_001), 3_000_000); } } #[test] #[should_fail] - fn test_gen_integer_range_fail_int() { + fn test_gen_range_fail_int() { let mut r = rng(); - r.gen_integer_range(5i, -2); + r.gen_range(5i, -2); } #[test] #[should_fail] - fn test_gen_integer_range_fail_uint() { + fn test_gen_range_fail_uint() { let mut r = rng(); - r.gen_integer_range(5u, 2u); + r.gen_range(5u, 2u); } #[test] @@ -843,44 +767,6 @@ mod test { assert_eq!(r.choose_option(v), Some(&i)); } - #[test] - fn test_choose_weighted() { - let mut r = rng(); - assert!(r.choose_weighted([ - Weighted { weight: 1u, item: 42 }, - ]) == 42); - assert!(r.choose_weighted([ - Weighted { weight: 0u, item: 42 }, - Weighted { weight: 1u, item: 43 }, - ]) == 43); - } - - #[test] - fn test_choose_weighted_option() { - let mut r = rng(); - assert!(r.choose_weighted_option([ - Weighted { weight: 1u, item: 42 }, - ]) == Some(42)); - assert!(r.choose_weighted_option([ - Weighted { weight: 0u, item: 42 }, - Weighted { weight: 1u, item: 43 }, - ]) == Some(43)); - let v: Option = r.choose_weighted_option([]); - assert!(v.is_none()); - } - - #[test] - fn test_weighted_vec() { - let mut r = rng(); - let empty: ~[int] = ~[]; - assert_eq!(r.weighted_vec([]), empty); - assert!(r.weighted_vec([ - Weighted { weight: 0u, item: 3u }, - Weighted { weight: 1u, item: 2u }, - Weighted { weight: 2u, item: 1u }, - ]) == ~[2u, 1u, 1u]); - } - #[test] fn test_shuffle() { let mut r = rng(); @@ -894,7 +780,7 @@ mod test { let mut r = task_rng(); r.gen::(); assert_eq!(r.shuffle(~[1, 1, 1]), ~[1, 1, 1]); - assert_eq!(r.gen_integer_range(0u, 1u), 0u); + assert_eq!(r.gen_range(0u, 1u), 0u); } #[test] @@ -953,41 +839,53 @@ mod bench { use extra::test::BenchHarness; use rand::*; use mem::size_of; + use iter::range; + use option::{Some, None}; + + static N: u64 = 100; #[bench] fn rand_xorshift(bh: &mut BenchHarness) { let mut rng = XorShiftRng::new(); do bh.iter { - rng.gen::(); + for _ in range(0, N) { + rng.gen::(); + } } - bh.bytes = size_of::() as u64; + bh.bytes = size_of::() as u64 * N; } #[bench] fn rand_isaac(bh: &mut BenchHarness) { let mut rng = IsaacRng::new(); do bh.iter { - rng.gen::(); + for _ in range(0, N) { + rng.gen::(); + } } - bh.bytes = size_of::() as u64; + bh.bytes = size_of::() as u64 * N; } #[bench] fn rand_isaac64(bh: &mut BenchHarness) { let mut rng = Isaac64Rng::new(); do bh.iter { - rng.gen::(); + for _ in range(0, N) { + rng.gen::(); + } } - bh.bytes = size_of::() as u64; + bh.bytes = size_of::() as u64 * N; } #[bench] fn rand_std(bh: &mut BenchHarness) { let mut rng = StdRng::new(); do bh.iter { - rng.gen::(); + for _ in range(0, N) { + rng.gen::(); + } } - bh.bytes = size_of::() as u64; + bh.bytes = size_of::() as u64 * N; } #[bench] diff --git a/src/libstd/rand/os.rs b/src/libstd/rand/os.rs index 4c8cf06c55e87..5ed8d6b10d1be 100644 --- a/src/libstd/rand/os.rs +++ b/src/libstd/rand/os.rs @@ -30,8 +30,12 @@ type HCRYPTPROV = c_long; // assume they work when we call them. /// A random number generator that retrieves randomness straight from -/// the operating system. On Unix-like systems this reads from -/// `/dev/urandom`, on Windows this uses `CryptGenRandom`. +/// the operating system. Platform sources: +/// +/// - Unix-like systems (Linux, Android, Mac OSX): read directly from +/// `/dev/urandom`. +/// - Windows: calls `CryptGenRandom`, using the default cryptographic +/// service provider with the `PROV_RSA_FULL` type. /// /// This does not block. #[cfg(unix)] @@ -39,8 +43,12 @@ pub struct OSRng { priv inner: ReaderRng } /// A random number generator that retrieves randomness straight from -/// the operating system. On Unix-like systems this reads from -/// `/dev/urandom`, on Windows this uses `CryptGenRandom`. +/// the operating system. Platform sources: +/// +/// - Unix-like systems (Linux, Android, Mac OSX): read directly from +/// `/dev/urandom`. +/// - Windows: calls `CryptGenRandom`, using the default cryptographic +/// service provider with the `PROV_RSA_FULL` type. /// /// This does not block. #[cfg(windows)] diff --git a/src/libstd/rand/range.rs b/src/libstd/rand/range.rs new file mode 100644 index 0000000000000..1b805a0b8f763 --- /dev/null +++ b/src/libstd/rand/range.rs @@ -0,0 +1,235 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Generating numbers between two others. + +// this is surprisingly complicated to be both generic & correct + +use cmp::Ord; +use num::Bounded; +use rand::Rng; +use rand::distributions::{Sample, IndependentSample}; + +/// Sample values uniformly between two bounds. +/// +/// This gives a uniform distribution (assuming the RNG used to sample +/// it is itself uniform & the `SampleRange` implementation for the +/// given type is correct), even for edge cases like `low = 0u8`, +/// `high = 170u8`, for which a naive modulo operation would return +/// numbers less than 85 with double the probability to those greater +/// than 85. +/// +/// Types should attempt to sample in `[low, high)`, i.e., not +/// including `high`, but this may be very difficult. All the +/// primitive integer types satisfy this property, and the float types +/// normally satisfy it, but rounding may mean `high` can occur. +/// +/// # Example +/// +/// ```rust +/// use std::rand; +/// use std::rand::distributions::{IndependentSample, Range}; +/// +/// fn main() { +/// let between = Range::new(10u, 10000u); +/// let rng = rand::task_rng(); +/// let mut sum = 0; +/// for _ in range(0, 1000) { +/// sum += between.ind_sample(rng); +/// } +/// println!("{}", sum); +/// } +/// ``` +pub struct Range { + priv low: X, + priv range: X, + priv accept_zone: X +} + +impl Range { + /// Create a new `Range` instance that samples uniformly from + /// `[low, high)`. Fails if `low >= high`. + pub fn new(low: X, high: X) -> Range { + assert!(low < high, "Range::new called with `low >= high`"); + SampleRange::construct_range(low, high) + } +} + +impl Sample for Range { + #[inline] + fn sample(&mut self, rng: &mut R) -> Sup { self.ind_sample(rng) } +} +impl IndependentSample for Range { + fn ind_sample(&self, rng: &mut R) -> Sup { + SampleRange::sample_range(self, rng) + } +} + +/// The helper trait for types that have a sensible way to sample +/// uniformly between two values. This should not be used directly, +/// and is only to facilitate `Range`. +pub trait SampleRange { + /// Construct the `Range` object that `sample_range` + /// requires. This should not ever be called directly, only via + /// `Range::new`, which will check that `low < high`, so this + /// function doesn't have to repeat the check. + fn construct_range(low: Self, high: Self) -> Range; + + /// Sample a value from the given `Range` with the given `Rng` as + /// a source of randomness. + fn sample_range(r: &Range, rng: &mut R) -> Self; +} + +macro_rules! integer_impl { + ($ty:ty, $unsigned:ty) => { + impl SampleRange for $ty { + // we play free and fast with unsigned vs signed here + // (when $ty is signed), but that's fine, since the + // contract of this macro is for $ty and $unsigned to be + // "bit-equal", so casting between them is a no-op & a + // bijection. + + fn construct_range(low: $ty, high: $ty) -> Range<$ty> { + let range = high as $unsigned - low as $unsigned; + let unsigned_max: $unsigned = Bounded::max_value(); + + // this is the largest number that fits into $unsigned + // that `range` divides evenly, so, if we've sampled + // `n` uniformly from this region, then `n % range` is + // uniform in [0, range) + let zone = unsigned_max - unsigned_max % range; + + Range { + low: low, + range: range as $ty, + accept_zone: zone as $ty + } + } + #[inline] + fn sample_range(r: &Range<$ty>, rng: &mut R) -> $ty { + loop { + // rejection sample + let v = rng.gen::<$unsigned>(); + // until we find something that fits into the + // region which r.range evenly divides (this will + // be uniformly distributed) + if v < r.accept_zone as $unsigned { + // and return it, with some adjustments + return r.low + (v % r.range as $unsigned) as $ty; + } + } + } + } + } +} + +integer_impl! { i8, u8 } +integer_impl! { i16, u16 } +integer_impl! { i32, u32 } +integer_impl! { i64, u64 } +integer_impl! { int, uint } +integer_impl! { u8, u8 } +integer_impl! { u16, u16 } +integer_impl! { u32, u32 } +integer_impl! { u64, u64 } +integer_impl! { uint, uint } + +macro_rules! float_impl { + ($ty:ty) => { + impl SampleRange for $ty { + fn construct_range(low: $ty, high: $ty) -> Range<$ty> { + Range { + low: low, + range: high - low, + accept_zone: 0.0 // unused + } + } + fn sample_range(r: &Range<$ty>, rng: &mut R) -> $ty { + r.low + r.range * rng.gen() + } + } + } +} + +float_impl! { f32 } +float_impl! { f64 } + +#[cfg(test)] +mod tests { + use super::*; + use rand::*; + use num::Bounded; + use iter::range; + use option::{Some, None}; + use vec::ImmutableVector; + + #[should_fail] + #[test] + fn test_range_bad_limits_equal() { + Range::new(10, 10); + } + #[should_fail] + #[test] + fn test_range_bad_limits_flipped() { + Range::new(10, 5); + } + + #[test] + fn test_integers() { + let rng = task_rng(); + macro_rules! t ( + ($($ty:ty),*) => {{ + $( + let v: &[($ty, $ty)] = [(0, 10), + (10, 127), + (Bounded::min_value(), Bounded::max_value())]; + for &(low, high) in v.iter() { + let mut sampler: Range<$ty> = Range::new(low, high); + for _ in range(0, 1000) { + let v = sampler.sample(rng); + assert!(low <= v && v < high); + let v = sampler.ind_sample(rng); + assert!(low <= v && v < high); + } + } + )* + }} + ); + t!(i8, i16, i32, i64, int, + u8, u16, u32, u64, uint) + } + + #[test] + fn test_floats() { + let rng = task_rng(); + macro_rules! t ( + ($($ty:ty),*) => {{ + $( + let v: &[($ty, $ty)] = [(0.0, 100.0), + (-1e35, -1e25), + (1e-35, 1e-25), + (-1e35, 1e35)]; + for &(low, high) in v.iter() { + let mut sampler: Range<$ty> = Range::new(low, high); + for _ in range(0, 1000) { + let v = sampler.sample(rng); + assert!(low <= v && v < high); + let v = sampler.ind_sample(rng); + assert!(low <= v && v < high); + } + } + )* + }} + ); + + t!(f32, f64) + } + +} diff --git a/src/libstd/rt/comm.rs b/src/libstd/rt/comm.rs index 4eae8bdc9a822..967dedd94a620 100644 --- a/src/libstd/rt/comm.rs +++ b/src/libstd/rt/comm.rs @@ -1117,7 +1117,7 @@ mod test { let total = stress_factor() + 10; let mut rng = rand::rng(); do total.times { - let msgs = rng.gen_integer_range(0u, 10); + let msgs = rng.gen_range(0u, 10); let pipe_clone = pipe.clone(); let end_chan_clone = end_chan.clone(); do spawntask_random { diff --git a/src/libstd/rt/sched.rs b/src/libstd/rt/sched.rs index 336d2518e437c..c1090d360106e 100644 --- a/src/libstd/rt/sched.rs +++ b/src/libstd/rt/sched.rs @@ -431,7 +431,7 @@ impl Scheduler { fn try_steals(&mut self) -> Option<~Task> { let work_queues = &mut self.work_queues; let len = work_queues.len(); - let start_index = self.rng.gen_integer_range(0, len); + let start_index = self.rng.gen_range(0, len); for index in range(0, len).map(|i| (i + start_index) % len) { match work_queues[index].steal() { Some(task) => { diff --git a/src/rt/rust_builtin.cpp b/src/rt/rust_builtin.cpp index 17f05b5909055..7702053766160 100644 --- a/src/rt/rust_builtin.cpp +++ b/src/rt/rust_builtin.cpp @@ -672,6 +672,8 @@ extern "C" CDECL void rust_win32_rand_acquire(HCRYPTPROV* phProv) { win32_require (_T("CryptAcquireContext"), + // changes to the parameters here should be reflected in the docs of + // std::rand::os::OSRng CryptAcquireContext(phProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT|CRYPT_SILENT)); diff --git a/src/test/bench/core-std.rs b/src/test/bench/core-std.rs index f549f747ef797..dbd1edffe7827 100644 --- a/src/test/bench/core-std.rs +++ b/src/test/bench/core-std.rs @@ -90,7 +90,7 @@ fn vec_plus() { let mut v = ~[]; let mut i = 0; while i < 1500 { - let rv = vec::from_elem(r.gen_integer_range(0u, i + 1), i); + let rv = vec::from_elem(r.gen_range(0u, i + 1), i); if r.gen() { v.push_all_move(rv); } else { @@ -106,7 +106,7 @@ fn vec_append() { let mut v = ~[]; let mut i = 0; while i < 1500 { - let rv = vec::from_elem(r.gen_integer_range(0u, i + 1), i); + let rv = vec::from_elem(r.gen_range(0u, i + 1), i); if r.gen() { v = vec::append(v, rv); } @@ -122,7 +122,7 @@ fn vec_push_all() { let mut v = ~[]; for i in range(0u, 1500) { - let mut rv = vec::from_elem(r.gen_integer_range(0u, i + 1), i); + let mut rv = vec::from_elem(r.gen_range(0u, i + 1), i); if r.gen() { v.push_all(rv); }