Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/Random123.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ using RandomNumbers
export set_counter!
include("common.jl")

export Threefry2x, Threefry4x
export Threefry2x, Threefry4x, threefry
include("threefry.jl")

export Philox2x, Philox4x
export Philox2x, Philox4x, philox
include("philox.jl")

export R123_USE_AESNI
Expand All @@ -46,8 +46,8 @@ catch e
end

@static if R123_USE_AESNI
export AESNI1x, AESNI4x
export ARS1x, ARS4x
export AESNI1x, AESNI4x, aesni
export ARS1x, ARS4x, ars
include("./aesni_common.jl")
include("./aesni.jl")
include("./ars.jl")
Expand Down
53 changes: 39 additions & 14 deletions src/aesni.jl
Original file line number Diff line number Diff line change
Expand Up @@ -201,26 +201,51 @@ copy(src::AESNI4x) = copyto!(AESNI4x(), src)
==(r1::AESNI4x, r2::AESNI4x) = unsafe_compare(r1, r2, UInt128, 2) &&
r1.key == r2.key && r1.p == r2.p

function aesni1xm128i(input::__m128i, key::AESNIKey)
x = key.key1 ⊻ input
x = _aes_enc(x, key.key2)
x = _aes_enc(x, key.key3)
x = _aes_enc(x, key.key4)
x = _aes_enc(x, key.key5)
x = _aes_enc(x, key.key6)
x = _aes_enc(x, key.key7)
x = _aes_enc(x, key.key8)
x = _aes_enc(x, key.key9)
x = _aes_enc(x, key.key10)
x = _aes_enc_last(x, key.key11)
function get_key__m128i(o::Union{AESNI1x, AESNI4x})::NTuple{11, __m128i}
k = o.key
(k.key1,k.key2,k.key3,k.key4,k.key5,k.key6,k.key7,k.key8,k.key9,k.key10,k.key11)
end
get_ctr__m128i(o::AESNI4x)::Tuple{__m128i} = (o.ctr1,)
get_ctr__m128i(o::AESNI1x)::Tuple{__m128i} = (o.ctr,)
get_key(o::Union{AESNI1x, AESNI4x})::NTuple{11,UInt128} = map(UInt128, get_key__m128i(o))
get_ctr(o::Union{AESNI1x, AESNI4x})::Tuple{UInt128} = map(UInt128, get_ctr__m128i(o))

@inline function aesni(key::NTuple{11,__m128i}, ctr::Tuple{__m128i})::Tuple{__m128i}
key1, key2, key3, key4, key5, key6, key7, key8, key9, key10, key11 = key
ctr1 = only(ctr)
x = key1 ⊻ ctr1
x = _aes_enc(x, key2)
x = _aes_enc(x, key3)
x = _aes_enc(x, key4)
x = _aes_enc(x, key5)
x = _aes_enc(x, key6)
x = _aes_enc(x, key7)
x = _aes_enc(x, key8)
x = _aes_enc(x, key9)
x = _aes_enc(x, key10)
x = _aes_enc_last(x, key11)
(x,)
end

"""
aesni(key::NTuple{11,UInt128}, ctr::Tuple{UInt128})::Tuple{UInt128}

Functional variant of [`AESNI1x`](@ref) and [`AESNI4x`](@ref).
This function if free of mutability and side effects.
"""
@inline function aesni(key::NTuple{11,UInt128}, ctr::Tuple{UInt128})::Tuple{UInt128}
k = map(__m128i, key)
c = map(__m128i, ctr)
map(UInt128,aesni(k,c))
end


@inline function random123_r(r::AESNI1x)
r.x = aesni1xm128i(r.ctr, r.key)
r.x = only(aesni(get_key__m128i(r), get_ctr__m128i(r)))
(UInt128(r.x),)
end

@inline function random123_r(r::AESNI4x)
r.x = aesni1xm128i(r.ctr1, r.key)
r.x = only(aesni(get_key__m128i(r), get_ctr__m128i(r)))
split_uint(UInt128(r.x), UInt32)
end
43 changes: 35 additions & 8 deletions src/ars.jl
Original file line number Diff line number Diff line change
Expand Up @@ -98,19 +98,15 @@ copy(src::ARS4x{R}) where R = ARS4x{R}(src.x, src.ctr1, src.key, src.p)

==(r1::ARS4x{R}, r2::ARS4x{R}) where R = unsafe_compare(r1, r2, UInt128, 3) && r1.p ≡ r2.p

@generated function ars1xm128i(r::Union{ARS1x{R}, ARS4x{R}}) where R
function expr_ars1xm128i(expr_key, expr_ctr, R)
@assert R isa Int && 1 ≤ R ≤ 10
rounds = [quote
kk += kweyl
v = _aes_enc(v, kk)
end for _ in 2:R]
ctr = :(r.ctr)
if r <: ARS4x
ctr.args[2] = :(:ctr1)
end
quote
ctr = $ctr
key = r.key
ctr = $(expr_ctr)
key = $(expr_key)
kweyl = __m128i(0xbb67ae8584caa73b, 0x9e3779b97f4a7c15)
kk = key
v = ctr ⊻ kk
Expand All @@ -122,12 +118,43 @@ copy(src::ARS4x{R}) where R = ARS4x{R}(src.x, src.ctr1, src.key, src.p)
end
end

@generated function ars1xm128i(r::Union{ARS1x{R}, ARS4x{R}}) where R
expr_ctr = if r <: ARS1x
:(r.ctr)
elseif r <: ARS4x
:(r.ctr1)
else
:(error("Unreachable"))
end
expr_key = :(r.key)
expr_ars1xm128i(expr_key, expr_ctr, R)
end

@generated function ars(key::Tuple{__m128i}, ctr::Tuple{__m128i}, ::Val{R})::Tuple{__m128i} where {R}
:(($(expr_ars1xm128i(:(only(key)), :(only(ctr)), R)),))
end

"""
ars(key::Tuple{UInt128}, ctr::Tuple{UInt128}, rounds::Val{R})::Tuple{UInt128} where {R}

Functional variant of [`ARS1x`](@ref) and [`ARS4x`](@ref).
This function if free of mutability and side effects.
"""
function ars(key::Tuple{UInt128}, ctr::Tuple{UInt128}, rounds::Val{R})::Tuple{UInt128} where {R}
k = map(__m128i, key)
c = map(__m128i, ctr)
map(UInt128,ars(k,c,rounds))
end

get_key(r::Union{ARS1x, ARS4x}) = (UInt128(r.key),)
get_ctr(r::ARS1x) = (UInt128(r.ctr),)
get_ctr(r::ARS4x) = (UInt128(r.ctr1),)

@inline function random123_r(r::ARS1x{R}) where R
r.x = ars1xm128i(r)
(UInt128(r.x),)
end


@inline function random123_r(r::ARS4x{R}) where R
r.x = ars1xm128i(r)
split_uint(UInt128(r.x), UInt32)
Expand Down
32 changes: 27 additions & 5 deletions src/philox.jl
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,24 @@ end
key + PHILOX_W_0(T)
end

@inline get_key(r::Philox2x) = (r.key,)
@inline get_ctr(r::Philox2x) = (r.ctr1, r.ctr2)

@inline function random123_r(r::Philox2x{T, R}) where {T <: Union{UInt32, UInt64}, R}
ctr1, ctr2, key = r.ctr1, r.ctr2, r.key
r.x1, r.x2 = philox(get_key(r), get_ctr(r), Val(R))
end

"""
philox(key::NTuple{1,T}, ctr::NTuple{2,T}, ::Val{R})::NTuple{2,T}
philox(key::NTuple{2,T}, ctr::NTuple{4,T}, ::Val{R})::NTuple{4,T}

Functional variant of [`Philox2x`](@ref) and [`Philox4x`](@ref).
Produces a pseudorandom output of type `T = UInt64` or `T = UInt32` from the inputs.
This function if free of mutability and side effects.
"""
@inline function philox(key_::Tuple{T}, ctr::NTuple{2,T}, ::Val{R}) where {T,R}
key = first(key_)
ctr1, ctr2 = ctr
if R > 0 ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
if R > 1 key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
if R > 2 key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
Expand All @@ -107,7 +123,7 @@ end
if R > 13 key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
if R > 14 key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
if R > 15 key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
r.x1, r.x2 = ctr1, ctr2
ctr1, ctr2
end

"""
Expand Down Expand Up @@ -183,9 +199,15 @@ end
key1 + PHILOX_W_0(T), key2 + PHILOX_W_1(T)
end

@inline get_ctr(r::Philox4x) = (r.ctr1, r.ctr2, r.ctr3, r.ctr4)
@inline get_key(r::Philox4x) = (r.key1, r.key2)
@inline function random123_r(r::Philox4x{T, R}) where {T <: Union{UInt32, UInt64}, R}
ctr1, ctr2, ctr3, ctr4 = r.ctr1, r.ctr2, r.ctr3, r.ctr4
key1, key2 = r.key1, r.key2
r.x1, r.x2, r.x3, r.x4 = philox(get_key(r), get_ctr(r), Val(R))
end

@inline function philox(key::NTuple{2,T}, ctr::NTuple{4,T}, ::Val{R}) where {T <:Union{UInt32, UInt64}, R}
ctr1, ctr2, ctr3, ctr4 = ctr
key1, key2 = key
if R > 0
ctr1, ctr2, ctr3, ctr4 = philox4x_round(ctr1, ctr2, ctr3, ctr4, key1, key2);
end
Expand Down Expand Up @@ -249,5 +271,5 @@ end
key1, key2 = philox4x_bumpkey(key1, key2);
ctr1, ctr2, ctr3, ctr4 = philox4x_round(ctr1, ctr2, ctr3, ctr4, key1, key2);
end
r.x1, r.x2, r.x3, r.x4 = ctr1, ctr2, ctr3, ctr4
ctr1, ctr2, ctr3, ctr4
end
41 changes: 27 additions & 14 deletions src/threefry.jl
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,25 @@ copy(src::Threefry2x{T, R}) where {T, R} = Threefry2x{T, R}(src.x1, src.x2, src.

==(r1::Threefry2x{T, R}, r2::Threefry2x{T, R}) where {T, R} = unsafe_compare(r1, r2, T, 6) && r1.p == r2.p

@inline get_key(r::Threefry2x) = (r.key1, r.key2)
@inline get_ctr(r::Threefry2x) = (r.ctr1, r.ctr2)
@inline function random123_r(r::Threefry2x{T, R}) where {T <: Union{UInt32, UInt64}, R}
r.x1, r.x2 = threefry(get_key(r), get_ctr(r), Val(R))
end

"""
threefry(key::NTuple{2,T}, ctr::NTuple{2,T}, ::Val{R})::NTuple{2,T}
threefry(key::NTuple{4,T}, ctr::NTuple{4,T}, ::Val{R})::NTuple{4,T}

Functional variant of [`Threefry2x`](@ref) and [`Threefry4x`](@ref).
Produces a pseudorandom output of type `T = UInt64` or `T = UInt32` from the inputs.
This function if free of mutability and side effects.
"""
@inline function threefry(key::NTuple{2,T}, ctr::NTuple{2,T}, ::Val{R})::NTuple{2,T} where {T <: Union{UInt32, UInt64}, R}
ks2 = SKEIN_KS_PARITY(T)
ks0 = r.key1
x0 = r.ctr1
x0,x1 = ctr
ks0,ks1 = key
ks2 ⊻= ks0
ks1 = r.key2
x1 = r.ctr2
ks2 ⊻= ks1
x0 += ks0
x1 += ks1
Expand Down Expand Up @@ -191,7 +203,7 @@ copy(src::Threefry2x{T, R}) where {T, R} = Threefry2x{T, R}(src.x1, src.x2, src.
x0 += ks2; x1 += ks0;
x1 += 8 % T;
end
r.x1, r.x2 = x0, x1
x0, x1
end

"""
Expand Down Expand Up @@ -257,19 +269,20 @@ copy(src::Threefry4x{T, R}) where {T, R} = Threefry4x{T, R}(src.x1, src.x2, src.

==(r1::Threefry4x{T, R}, r2::Threefry4x{T, R}) where {T, R} = unsafe_compare(r1, r2, T, 12) && r1.p == r2.p

@inline get_key(r::Threefry4x) = (r.key1, r.key2, r.key3, r.key4)
@inline get_ctr(r::Threefry4x) = (r.ctr1, r.ctr2, r.ctr3, r.ctr4)

@inline function random123_r(r::Threefry4x{T, R}) where {T <: Union{UInt32, UInt64}, R}
r.x1, r.x2, r.x3, r.x4 = threefry(get_key(r), get_ctr(r), Val(R))
end

@inline function threefry(key::NTuple{4,T},ctr::NTuple{4,T}, rounds::Val{R})::NTuple{4,T} where {T <: Union{UInt32, UInt64}, R}
ks4 = SKEIN_KS_PARITY(T)
ks0 = r.key1
x0 = r.ctr1
ks0,ks1,ks2,ks3 = key
x0,x1,x2,x3 = ctr
ks4 ⊻= ks0
ks1 = r.key2
x1 = r.ctr2
ks4 ⊻= ks1
ks2 = r.key3
x2 = r.ctr3
ks4 ⊻= ks2
ks3 = r.key4
x3 = r.ctr4
ks4 ⊻= ks3
x0 += ks0; x1 += ks1; x2 += ks2; x3 += ks3;

Expand Down Expand Up @@ -633,5 +646,5 @@ copy(src::Threefry4x{T, R}) where {T, R} = Threefry4x{T, R}(src.x1, src.x2, src.
x0 += ks3; x1 += ks4; x2 += ks0; x3 += ks1;
x3 += 18 % T;
end
r.x1, r.x2, r.x3, r.x4 = x0, x1, x2, x3
x0, x1, x2, x3
end
Loading