Skip to content

Commit f6d3e50

Browse files
authored
Merge a2d569d into d7167b8
2 parents d7167b8 + a2d569d commit f6d3e50

File tree

32 files changed

+359
-221
lines changed

32 files changed

+359
-221
lines changed

NDTensors/ext/NDTensorsCUDAExt/permutedims.jl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,16 @@ function Base.permutedims!(
55
copyto!(expose(parent(Edest)), expose(Aperm))
66
return unexpose(Edest)
77
end
8+
9+
## Found an issue in CUDA where if Edest is a reshaped{<:Adjoint}
10+
## .= can fail. So instead force Esrc into the shape of parent(Edest)
11+
function Base.permutedims!(
12+
Edest::Exposed{<:CuArray,<:Base.ReshapedArray{<:Any,<:Any,<:Adjoint}},
13+
Esrc::Exposed{<:CuArray},
14+
perm,
15+
f,
16+
)
17+
Aperm = reshape(permutedims(Esrc, perm), size(parent(Edest)))
18+
parent(Edest) .= f.(parent(Edest), Aperm)
19+
return unexpose(Edest)
20+
end

NDTensors/ext/NDTensorsMetalExt/mul.jl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,22 @@ function LinearAlgebra.mul!(
1919
mul!(CM', BM', AM', α, β)
2020
return unexpose(CM)
2121
end
22+
23+
## Fix issue in Metal.jl where it cannot distinguish Transpose{Reshape{Adjoint{MtlArray}}}
24+
## as a MtlArray and calls generic matmul
25+
function LinearAlgebra.mul!(
26+
CM::Exposed{<:MtlArray},
27+
AM::Exposed{<:MtlArray},
28+
BM::Exposed{
29+
<:MtlArray,
30+
<:LinearAlgebra.Transpose{
31+
<:Any,<:Base.ReshapedArray{<:Any,<:Any,<:LinearAlgebra.Adjoint}
32+
},
33+
},
34+
α,
35+
β,
36+
)
37+
B = copy(expose(parent(BM)))
38+
mul!(CM, AM, expose(transpose(B)), α, β)
39+
return unexpose(CM)
40+
end
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,37 @@
1+
## Theres an issue in metal that `ReshapedArray' wrapped arrays cannot be permuted using
2+
## permutedims (failing in that Metal uses scalar indexing)
3+
## These functions are to address the problem in different instances of permutedims
4+
function Base.permutedims(E::Exposed{<:MtlArray,<:Base.ReshapedArray}, perm)
5+
A = copy(E)
6+
return permutedims(A, perm)
7+
end
8+
19
function Base.permutedims!(
210
Edest::Exposed{<:MtlArray,<:Base.ReshapedArray}, Esrc::Exposed{<:MtlArray}, perm
311
)
412
Aperm = permutedims(Esrc, perm)
513
copyto!(expose(parent(Edest)), expose(Aperm))
614
return unexpose(Edest)
715
end
16+
17+
function Base.permutedims!(
18+
Edest::Exposed{<:MtlArray}, Esrc::Exposed{<:MtlArray,<:Base.ReshapedArray}, perm
19+
)
20+
Aperm = permutedims(Esrc, perm)
21+
copyto!(Edest, expose(Aperm))
22+
return unexpose(Edest)
23+
end
24+
25+
## To get around the Metal issue here we copy and permute Esrc,
26+
## then we reshape Esrc to the size of Edest's parent
27+
## and broadcast into the parent.
28+
function Base.permutedims!(
29+
Edest::Exposed{<:MtlArray,<:Base.ReshapedArray},
30+
Esrc::Exposed{<:MtlArray,<:Base.ReshapedArray},
31+
perm,
32+
f,
33+
)
34+
Aperm = reshape(permutedims(Esrc, perm), size(parent(Edest)))
35+
parent(Edest) .= f.(parent(Edest), Aperm)
36+
return unexpose(Edest)
37+
end
Lines changed: 28 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
1-
using CUDA
21
using NDTensors
3-
4-
using ITensors
5-
using Test
6-
7-
using Zygote
2+
using CUDA: CUDA, CuVector, cu, reshape
3+
using ITensors:
4+
Index, ITensor, randomMPO, randomMPS, inner, orthogonalize, qr, siteinds, svd
5+
using Test: @test
6+
using Zygote: gradient
87

98
function main()
109
# using ITensorGPU
10+
cpu = NDTensors.cpu
11+
gpu = NDTensors.cu
1112
# Here is an example of how to utilize NDTensors based tensors with CUDA datatypes
1213
i = Index(2)
1314
j = Index(5)
@@ -18,10 +19,9 @@ function main()
1819
dim2 = (j, k)
1920

2021
# Create 2 ITensors with CUDA backends (These will be made simpiler by randomITensor(CuVector) soon)
21-
A = ITensor(NDTensors.generic_randn(CuVector, dim(dim1)), dim1)
22-
B = ITensor(NDTensors.generic_randn(CuVector, dim(dim2)), dim2)
22+
A = ITensor(randomTensor(CuVector, dim1))
23+
B = ITensor(randomTensor(CuVector, dim2))
2324
# Contract the two tensors
24-
cpu = NDTensors.cpu
2525
C = A * B
2626
A = cpu(A)
2727
B = cpu(B)
@@ -36,8 +36,8 @@ function main()
3636
fill!(B, randn())
3737

3838
# Convert the ITensors to GPU
39-
cA = NDTensors.cu(A)
40-
cB = NDTensors.cu(B)
39+
cA = gpu(A)
40+
cB = gpu(B)
4141

4242
#Check that backend of contraction is GPU
4343
@test A * A cpu(cA * cA)
@@ -47,11 +47,8 @@ function main()
4747

4848
dim3 = (l, k)
4949
dim4 = (i,)
50-
cC = ITensor(
51-
NDTensors.generic_randn(CuVector{Float64,CUDA.Mem.DeviceBuffer}, dim(dim3)), dim3
52-
)
53-
cC = NDTensors.cu(ITensor(NDTensors.generic_randn(Vector{Float64}, dim(dim3)), dim3))
54-
cD = ITensor(Tensor(CuVector, dim4))
50+
cC = ITensor(randomTensor(CuVector{Float64,CUDA.Mem.DeviceBuffer}, dim3))
51+
cD = ITensor(Tensor(CuVector{Float32}, dim4))
5552
fill!(cD, randn())
5653

5754
# Create a function of 4 tensors on GPU
@@ -61,20 +58,18 @@ function main()
6158
#Currently this code fails with CUDA.allowscalar(false)
6259
# Because of outer calling the _gemm! function which calls a
6360
# generic implementation
64-
@allowscalar grad = gradient(f, cA, cB, cC, cD)
65-
@allowscalar @test NDTensors.cpu(cB * cC * cD) NDTensors.cpu(grad[1])
66-
@allowscalar @test (cB * cC * cD) grad[1]
61+
grad = gradient(f, cA, cB, cC, cD)
62+
@test cpu(cB * cC * cD) cpu(grad[1])
63+
@test (cB * cC * cD) grad[1]
6764
# Create a tuple of indices
68-
decomp = (
69-
dim(NDTensors.ind(grad[1], 1)),
70-
dim(NDTensors.ind(grad[1], 2)) * dim(NDTensors.ind(grad[1], 3)),
71-
)
65+
dims = size(grad[1])
66+
decomp = (dims[1], dims[2] * dims[3])
7267
# Reshape the CuVector of data into a matrix
73-
cuTensor_data = CUDA.reshape(NDTensors.data(storage(grad[1])), decomp)
68+
cuTensor_data = reshape(array(grad[1]), decomp)
7469
# Use cuBLAS to compute SVD of data
7570
U, S, V = svd(cuTensor_data)
76-
decomp = (dim(NDTensors.ind(grad[2], 1)), dim(NDTensors.ind(grad[2], 2)))
77-
cuTensor_data = CUDA.reshape(NDTensors.data(storage(grad[2])), decomp)
71+
decomp = size(array(grad[2]))
72+
cuTensor_data = reshape(array(grad[2]), decomp)
7873
U, S, V = svd(cuTensor_data)
7974

8075
# These things can take up lots of memory, look at memory usage here
@@ -87,33 +82,33 @@ function main()
8782
CUDA.memory_status()
8883

8984
# Its possible to compute QR of GPU tensor
90-
cq = ITensors.qr(cA, (i,), (j, l))
91-
q = ITensors.qr(A, (i,), (j, l))
85+
cq = qr(cA, (i,), (j, l))
9286
A cpu(cq[1]) * cpu(cq[2])
9387

9488
## SVD does not yet work with CUDA backend, see above on
9589
## Converting ITensors to vectors and calling CUDA svd function
9690
## CuVectors...
9791
#ITensors.svd(A, (i,), (j, l))
9892

99-
s = ITensors.siteinds("S=1/2", 8)
93+
s = siteinds("S=1/2", 8)
10094
m = randomMPS(s; linkdims=4)
101-
cm = NDTensors.cu(m)
95+
cm = gpu(m)
10296

10397
@test inner(cm', cm) inner(m', m)
10498

10599
H = randomMPO(s)
106-
cH = NDTensors.cu(H)
100+
cH = gpu(H)
107101
@test inner(cm', cH, cm) inner(m', H, m)
108102

109103
m = orthogonalize(m, 1)
110-
cm = NDTensors.cu(orthogonalize(cm, 1))
104+
cm = gpu(orthogonalize(cm, 1))
111105
@test inner(m', m) inner(cm', cm)
112106

113107
H = orthogonalize(H, 1)
114-
cH = NDTensors.cu(cH)
108+
cH = gpu(cH)
115109

116110
@test inner(cm', cH, cm) inner(m', H, m)
117111
end
118112

113+
## running the main function with Float64
119114
main()
Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
using Metal
1+
using Metal: MtlVector, mtl
22
using NDTensors
33

4-
using ITensors
5-
using Test
6-
using Zygote
4+
using ITensors: ITensor, Index, randomITensor
5+
using Test: @test
6+
using Zygote: gradient
77

88
function main()
9+
cpu = NDTensors.cpu
10+
gpu = NDTensors.mtl
911
# Here is an example of how to utilize NDTensors based tensors with CUDA datatypes
1012
i = Index(20)
1113
j = Index(5)
@@ -15,27 +17,26 @@ function main()
1517
dim1 = (i, j, l)
1618
dim2 = (j, k)
1719

18-
cA = ITensor(NDTensors.generic_randn(MtlVector{Float32}, dim(dim1)), dim1)
19-
cB = ITensor(NDTensors.generic_randn(MtlVector{Float32}, dim(dim2)), dim2)
20+
## MtlArrays only support Float32 arithmatic
21+
cA = ITensor(randomTensor(MtlVector{Float32}, dim1))
22+
cB = ITensor(randomTensor(MtlVector{Float32}, dim2))
2023
cC = cA * cB
2124

22-
cpu = NDTensors.cpu
2325
A = cpu(cA)
2426
B = cpu(cB)
2527

2628
@test A * B cpu(cC)
2729

28-
#C = A * B
29-
3030
dim3 = (l, k)
3131
dim4 = (i,)
3232

33-
cC = mtl(randomITensor(Float32, dim3))
34-
cD = mtl(randomITensor(Float32, dim4))
33+
cC = gpu(randomITensor(Float32, dim3))
34+
cD = gpu(randomITensor(Float32, dim4))
3535

3636
f(A, B, C, D) = (A * B * C * D)[]
3737

38-
return grad = gradient(f, cA, cB, cC, cD)
38+
grad = gradient(f, cA, cB, cC, cD)
39+
@test grad[2] cA * cC * cD
3940
end
4041

4142
main()

NDTensors/src/blocksparse/blocksparsetensor.jl

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ Construct a block sparse tensor with uninitialized memory
4949
from indices and locations of non-zero blocks.
5050
"""
5151
function BlockSparseTensor(::UndefInitializer, blockoffsets, inds)
52-
return BlockSparseTensor(Float64, undef, blockoffsets, inds)
52+
return BlockSparseTensor(default_eltype(), undef, blockoffsets, inds)
5353
end
5454

5555
function BlockSparseTensor(
@@ -65,15 +65,15 @@ function BlockSparseTensor(eltype::Type{<:Number}, blockoffsets::BlockOffsets, i
6565
end
6666

6767
function BlockSparseTensor(blockoffsets::BlockOffsets, inds)
68-
return BlockSparseTensor(Float64, blockoffsets, inds)
68+
return BlockSparseTensor(default_eltype(), blockoffsets, inds)
6969
end
7070

7171
"""
7272
BlockSparseTensor(inds)
7373
7474
Construct a block sparse tensor with no blocks.
7575
"""
76-
BlockSparseTensor(inds) = BlockSparseTensor(Float64, inds)
76+
BlockSparseTensor(inds) = BlockSparseTensor(default_eltype(), inds)
7777

7878
function BlockSparseTensor(datatype::Type{<:AbstractArray}, inds)
7979
return BlockSparseTensor(datatype, BlockOffsets{length(inds)}(), inds)
@@ -99,7 +99,7 @@ Construct a block sparse tensor with the specified blocks.
9999
Defaults to setting structurally non-zero blocks to zero.
100100
"""
101101
function BlockSparseTensor(blocks::Vector{BlockT}, inds) where {BlockT<:Union{Block,NTuple}}
102-
return BlockSparseTensor(Float64, blocks, inds)
102+
return BlockSparseTensor(default_eltype(), blocks, inds)
103103
end
104104

105105
function BlockSparseTensor(
@@ -160,7 +160,7 @@ function randomBlockSparseTensor(blocks::Vector, inds)
160160
end
161161

162162
function randomBlockSparseTensor(rng::AbstractRNG, blocks::Vector, inds)
163-
return randomBlockSparseTensor(rng, Float64, blocks, inds)
163+
return randomBlockSparseTensor(rng, default_eltype(), blocks, inds)
164164
end
165165

166166
"""
@@ -176,6 +176,12 @@ function BlockSparseTensor(
176176
return BlockSparseTensor(blocks, inds)
177177
end
178178

179+
function BlockSparseTensor{ElT}(
180+
blocks::Vector{BlockT}, inds::Vararg{BlockDim,N}
181+
) where {ElT<:Number,BlockT<:Union{Block{N},NTuple{N,<:Integer}}} where {N}
182+
return BlockSparseTensor(ElT, blocks, inds)
183+
end
184+
179185
function zeros(
180186
tensor::BlockSparseTensor{ElT,N}, blockoffsets::BlockOffsets{N}, inds
181187
) where {ElT,N}

NDTensors/src/dense/tensoralgebra/outer.jl

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,9 @@ function outer!(
2121
v1 = data(T1)
2222
v2 = data(T2)
2323
RM = reshape(R, length(v1), length(v2))
24-
## Potential fix is call reshape on array
25-
#RM = reshape(array(R), length(v1), length(v2))
26-
#RM .= v1 .* transpose(v2)
27-
#mul!(RM, v1, transpose(v2))
28-
_gemm!('N', 'T', one(ElR), v1, v2, zero(ElR), RM)
29-
#mul!!(RM, v1, transpose(v2), one(ElR), zero(ElR))
24+
## There is no _gemm! defined for CUDA or Metal so it calls
25+
## generic matmul. Replace with mul!! to call correct mul! (ger)
26+
mul!!(array(RM), v1, transpose(v2), one(ElR), zero(ElR))
3027
return R
3128
end
3229

NDTensors/src/lib/BlockSparseArrays/test/runtests.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
@eval module $(gensym())
12
using Test: @test, @testset, @test_broken
23
using BlockArrays: BlockArrays, BlockRange, blocksize
34
using Compat: allequal
@@ -101,3 +102,4 @@ include("TestBlockSparseArraysUtils.jl")
101102
@test Hermitian(Matrix(a)) * Matrix(u) Matrix(u) * Diagonal(Vector(d))
102103
end
103104
end
105+
end

NDTensors/src/lib/SetParameters/test/runtests.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using Test
1+
@eval module $(gensym())
2+
using Test: @inferred, @test, @testset
23
using NDTensors.SetParameters
34

45
@testset "Test NDTensors.SetParameters" begin
@@ -152,3 +153,4 @@ using NDTensors.SetParameters
152153
Array{Float64,1}
153154
end
154155
end
156+
end

NDTensors/src/lib/SmallVectors/test/runtests.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
@eval module $(gensym())
12
using NDTensors.SmallVectors
2-
using Test
3+
using Test: @inferred, @test, @testset, @test_broken
34

45
using NDTensors.SmallVectors:
56
setindex,
@@ -153,3 +154,4 @@ end
153154
# @testset "SmallVectors" test_smallvectors()
154155
# (new in Julia 1.9)
155156
test_smallvectors()
157+
end

0 commit comments

Comments
 (0)