1- using CUDA
21using NDTensors
3-
4- using ITensors
5- using Test
6-
7- using Zygote
2+ using CUDA : CUDA, CuVector, cu, reshape
3+ using ITensors:
4+ Index, ITensor, randomMPO, randomMPS, inner, orthogonalize, qr, siteinds, svd
5+ using Test : @test
6+ using Zygote: gradient
87
98function main ()
109 # using ITensorGPU
10+ cpu = NDTensors. cpu
11+ gpu = NDTensors. cu
1112 # Here is an example of how to utilize NDTensors based tensors with CUDA datatypes
1213 i = Index (2 )
1314 j = Index (5 )
@@ -18,10 +19,9 @@ function main()
1819 dim2 = (j, k)
1920
2021 # Create 2 ITensors with CUDA backends (These will be made simpiler by randomITensor(CuVector) soon)
21- A = ITensor (NDTensors . generic_randn (CuVector, dim ( dim1)), dim1 )
22- B = ITensor (NDTensors . generic_randn (CuVector, dim ( dim2)), dim2 )
22+ A = ITensor (randomTensor (CuVector, dim1))
23+ B = ITensor (randomTensor (CuVector, dim2))
2324 # Contract the two tensors
24- cpu = NDTensors. cpu
2525 C = A * B
2626 A = cpu (A)
2727 B = cpu (B)
@@ -36,8 +36,8 @@ function main()
3636 fill! (B, randn ())
3737
3838 # Convert the ITensors to GPU
39- cA = NDTensors . cu (A)
40- cB = NDTensors . cu (B)
39+ cA = gpu (A)
40+ cB = gpu (B)
4141
4242 # Check that backend of contraction is GPU
4343 @test A * A ≈ cpu (cA * cA)
@@ -47,11 +47,8 @@ function main()
4747
4848 dim3 = (l, k)
4949 dim4 = (i,)
50- cC = ITensor (
51- NDTensors. generic_randn (CuVector{Float64,CUDA. Mem. DeviceBuffer}, dim (dim3)), dim3
52- )
53- cC = NDTensors. cu (ITensor (NDTensors. generic_randn (Vector{Float64}, dim (dim3)), dim3))
54- cD = ITensor (Tensor (CuVector, dim4))
50+ cC = ITensor (randomTensor (CuVector{Float64,CUDA. Mem. DeviceBuffer}, dim3))
51+ cD = ITensor (Tensor (CuVector{Float32}, dim4))
5552 fill! (cD, randn ())
5653
5754 # Create a function of 4 tensors on GPU
@@ -61,20 +58,18 @@ function main()
6158 # Currently this code fails with CUDA.allowscalar(false)
6259 # Because of outer calling the _gemm! function which calls a
6360 # generic implementation
64- @allowscalar grad = gradient (f, cA, cB, cC, cD)
65- @allowscalar @ test NDTensors . cpu (cB * cC * cD) ≈ NDTensors . cpu (grad[1 ])
66- @allowscalar @ test (cB * cC * cD) ≈ grad[1 ]
61+ grad = gradient (f, cA, cB, cC, cD)
62+ @test cpu (cB * cC * cD) ≈ cpu (grad[1 ])
63+ @test (cB * cC * cD) ≈ grad[1 ]
6764 # Create a tuple of indices
68- decomp = (
69- dim (NDTensors. ind (grad[1 ], 1 )),
70- dim (NDTensors. ind (grad[1 ], 2 )) * dim (NDTensors. ind (grad[1 ], 3 )),
71- )
65+ dims = size (grad[1 ])
66+ decomp = (dims[1 ], dims[2 ] * dims[3 ])
7267 # Reshape the CuVector of data into a matrix
73- cuTensor_data = CUDA . reshape (NDTensors . data ( storage ( grad[1 ]) ), decomp)
68+ cuTensor_data = reshape (array ( grad[1 ]), decomp)
7469 # Use cuBLAS to compute SVD of data
7570 U, S, V = svd (cuTensor_data)
76- decomp = ( dim (NDTensors . ind ( grad[2 ], 1 )), dim (NDTensors . ind (grad[ 2 ], 2 ) ))
77- cuTensor_data = CUDA . reshape (NDTensors . data ( storage ( grad[2 ]) ), decomp)
71+ decomp = size ( array ( grad[2 ]))
72+ cuTensor_data = reshape (array ( grad[2 ]), decomp)
7873 U, S, V = svd (cuTensor_data)
7974
8075 # These things can take up lots of memory, look at memory usage here
@@ -87,33 +82,33 @@ function main()
8782 CUDA. memory_status ()
8883
8984 # Its possible to compute QR of GPU tensor
90- cq = ITensors. qr (cA, (i,), (j, l))
91- q = ITensors. qr (A, (i,), (j, l))
85+ cq = qr (cA, (i,), (j, l))
9286 A ≈ cpu (cq[1 ]) * cpu (cq[2 ])
9387
9488 # # SVD does not yet work with CUDA backend, see above on
9589 # # Converting ITensors to vectors and calling CUDA svd function
9690 # # CuVectors...
9791 # ITensors.svd(A, (i,), (j, l))
9892
99- s = ITensors . siteinds (" S=1/2" , 8 )
93+ s = siteinds (" S=1/2" , 8 )
10094 m = randomMPS (s; linkdims= 4 )
101- cm = NDTensors . cu (m)
95+ cm = gpu (m)
10296
10397 @test inner (cm' , cm) ≈ inner (m' , m)
10498
10599 H = randomMPO (s)
106- cH = NDTensors . cu (H)
100+ cH = gpu (H)
107101 @test inner (cm' , cH, cm) ≈ inner (m' , H, m)
108102
109103 m = orthogonalize (m, 1 )
110- cm = NDTensors . cu (orthogonalize (cm, 1 ))
104+ cm = gpu (orthogonalize (cm, 1 ))
111105 @test inner (m' , m) ≈ inner (cm' , cm)
112106
113107 H = orthogonalize (H, 1 )
114- cH = NDTensors . cu (cH)
108+ cH = gpu (cH)
115109
116110 @test inner (cm' , cH, cm) ≈ inner (m' , H, m)
117111end
118112
113+ # # running the main function with Float64
119114main ()
0 commit comments