diff --git a/NDTensors/ext/NDTensorsCUDAExt/adapt.jl b/NDTensors/ext/NDTensorsCUDAExt/adapt.jl index 165b5405ce..75162a8844 100644 --- a/NDTensors/ext/NDTensorsCUDAExt/adapt.jl +++ b/NDTensors/ext/NDTensorsCUDAExt/adapt.jl @@ -16,7 +16,8 @@ buffertype(::NDTensorCuArrayAdaptor{B}) where {B} = B function Adapt.adapt_storage(adaptor::NDTensorCuArrayAdaptor, xs::AbstractArray) ElT = eltype(xs) BufT = buffertype(adaptor) - return isbits(xs) ? xs : adapt(CuArray{ElT,1,BufT}, xs) + N = ndims(xs) + return isbits(xs) ? xs : adapt(CuArray{ElT,N,BufT}, xs) end function NDTensors.adapt_storagetype( diff --git a/NDTensors/ext/NDTensorsCUDAExt/indexing.jl b/NDTensors/ext/NDTensorsCUDAExt/indexing.jl index 1593264780..2f0f6790e0 100644 --- a/NDTensors/ext/NDTensorsCUDAExt/indexing.jl +++ b/NDTensors/ext/NDTensorsCUDAExt/indexing.jl @@ -7,14 +7,12 @@ function setindex!(E::Exposed{<:CuArray}, x::Number) return unexpose(E) end -function Base.getindex(E::Exposed{<:CuArray,<:Adjoint}, I...) - Ap = parent(E) - return expose(Ap)[I...] +function Base.getindex(E::Exposed{<:CuArray,<:Adjoint}, i, j) + return (expose(parent(E))[j, i])' end function Base.copy(E::Exposed{<:CuArray,<:Base.ReshapedArray}) - Ap = parent(E) - return copy(expose(Ap)) + return reshape(copy(expose(parent(E))), size(unexpose(E))) end Base.any(f, E::Exposed{<:CuArray,<:NDTensors.Tensor}) = any(f, data(unexpose(E))) diff --git a/NDTensors/ext/examples/NDTensorCUDA.jl b/NDTensors/ext/examples/NDTensorCUDA.jl index 28ecbbf91e..ca4ffbe23e 100644 --- a/NDTensors/ext/examples/NDTensorCUDA.jl +++ b/NDTensors/ext/examples/NDTensorCUDA.jl @@ -58,6 +58,9 @@ function main() f(A, B, C, D) = (A * B * C * D)[] #Use Zygote to take the gradient of the four tensors on GPU + #Currently this code fails with CUDA.allowscalar(false) + # Because of outer calling the _gemm! function which calls a + # generic implementation grad = gradient(f, cA, cB, cC, cD) @test NDTensors.cpu(cB * cC * cD) ≈ NDTensors.cpu(grad[1]) @test (cB * cC * cD) ≈ grad[1] diff --git a/NDTensors/src/Unwrap/src/Unwrap.jl b/NDTensors/src/Unwrap/src/Unwrap.jl index c83f05cd71..00b6448852 100644 --- a/NDTensors/src/Unwrap/src/Unwrap.jl +++ b/NDTensors/src/Unwrap/src/Unwrap.jl @@ -20,6 +20,4 @@ include("functions/permutedims.jl") export IsWrappedArray, is_wrapped_array, parenttype, unwrap_type, expose, Exposed, unexpose, cpu -## TODO write exposed based functions in the NDTensors Extensions when necessary - end diff --git a/NDTensors/src/Unwrap/test/runtests.jl b/NDTensors/src/Unwrap/test/runtests.jl new file mode 100644 index 0000000000..0cbf4006c7 --- /dev/null +++ b/NDTensors/src/Unwrap/test/runtests.jl @@ -0,0 +1,90 @@ +using Test +using NDTensors.Unwrap +using NDTensors +using LinearAlgebra + +include("../../../test/device_list.jl") +@testset "Testing Unwrap" for dev in devices_list(ARGS) + v = dev(Vector{Float64}(undef, 10)) + vt = transpose(v) + va = v' + + E = expose(v) + Et = expose(vt) + Ea = expose(va) + v_type = typeof(v) + e_type = eltype(v) + @test typeof(E) == Exposed{v_type,v_type} + @test typeof(Et) == Exposed{v_type,LinearAlgebra.Transpose{e_type,v_type}} + @test typeof(Ea) == Exposed{v_type,LinearAlgebra.Adjoint{e_type,v_type}} + + @test parent(E) == v + @test parent(Et) == v + @test parent(Ea) == v + @test transpose(E) == vt + @test cpu(E) == v + @test cpu(Et) == vt + + m = reshape(v, (5, 2)) + mt = transpose(m) + ma = m' + E = expose(m) + Et = expose(mt) + Ea = expose(ma) + + m_type = typeof(m) + @test typeof(E) == Exposed{m_type,m_type} + @test typeof(Et) == Exposed{m_type,LinearAlgebra.Transpose{e_type,m_type}} + @test typeof(Ea) == Exposed{m_type,LinearAlgebra.Adjoint{e_type,m_type}} + + o = dev(Vector{Float32})(undef, 1) + expose(o)[] = 2 + @test expose(o)[] == 2 + + fill!(m, 0) + @test any(!Base.isinf, expose(m)) + + mp = copy(Ea) + @test mp == ma + fill!(ma, 2.0) + copyto!(expose(mp), expose(ma)) + @test mp == ma + + q, r = qr(expose(mp)) + @test q * r ≈ mp + + q, r = Unwrap.qr_positive(expose(mp)) + @test q * r ≈ mp + + square = dev(rand(Float64, (10, 10))) + square = (square + transpose(square)) ./ 2.0 + ## CUDA only supports Hermitian or Symmetric eigen decompositions + ## So I symmetrize square and call symetric here + l, U = eigen(expose(Symmetric(square))) + @test square * U ≈ U * Diagonal(l) + + U, S, V, = svd(expose(mp)) + @test U * Diagonal(S) * V' ≈ mp + + cm = dev(fill!(Matrix{Float64}(undef, (2, 2)), 0.0)) + mul!(expose(cm), expose(mp), expose(mp'), 1.0, 0.0) + @test cm ≈ mp * mp' + + @test permutedims(expose(mp), (2, 1)) == transpose(mp) + fill!(mt, 3.0) + permutedims!(expose(m), expose(mt), (2, 1)) + @test norm(m) == sqrt(3^2 * 10) + @test size(m) == (5, 2) + permutedims!(expose(m), expose(mt), (2, 1), +) + @test size(m) == (5, 2) + @test norm(m) == sqrt(6^2 * 10) + + m = reshape(m, (5, 2, 1)) + mt = fill!(similar(m), 3.0) + m = permutedims(expose(m), (2, 1, 3)) + @test size(m) == (2, 5, 1) + permutedims!(expose(m), expose(mt), (2, 1, 3)) + @test norm(m) == sqrt(3^2 * 10) + permutedims!(expose(m), expose(mt), (2, 1, 3), -) + @test norm(m) == 0 +end diff --git a/NDTensors/src/dense/tensoralgebra/outer.jl b/NDTensors/src/dense/tensoralgebra/outer.jl index 1aa43b3e7e..591b3bc160 100644 --- a/NDTensors/src/dense/tensoralgebra/outer.jl +++ b/NDTensors/src/dense/tensoralgebra/outer.jl @@ -21,9 +21,12 @@ function outer!( v1 = data(T1) v2 = data(T2) RM = reshape(R, length(v1), length(v2)) + ## Potential fix is call reshape on array + #RM = reshape(array(R), length(v1), length(v2)) #RM .= v1 .* transpose(v2) #mul!(RM, v1, transpose(v2)) _gemm!('N', 'T', one(ElR), v1, v2, zero(ElR), RM) + #mul!!(RM, v1, transpose(v2), one(ElR), zero(ElR)) return R end diff --git a/NDTensors/src/linearalgebra/linearalgebra.jl b/NDTensors/src/linearalgebra/linearalgebra.jl index 6dd793eba8..b2a79febe6 100644 --- a/NDTensors/src/linearalgebra/linearalgebra.jl +++ b/NDTensors/src/linearalgebra/linearalgebra.jl @@ -157,7 +157,7 @@ function svd( spec = Spectrum(P, truncerr) dS = length(P) if dS < length(MS) - MU = MU[:, 1:dS] + MU = expose(MU)[:, 1:dS] # Fails on some GPU backends like Metal. # resize!(MS, dS) MS = MS[1:dS] @@ -187,6 +187,7 @@ function eigen( matrixT = matrix(T) ## TODO Here I am calling parent to ensure that the correct `any` function ## is envoked for non-cpu matrices + ## TODO use expose here if any(!isfinite, parent(matrixT)) throw( ArgumentError( @@ -195,6 +196,7 @@ function eigen( ) end + ### What do we do if DM is full of Nan or Inf? DM, VM = eigen(expose(matrixT)) # Sort by largest to smallest eigenvalues diff --git a/NDTensors/test/ITensors/TestITensorDMRG/TestITensorDMRG.jl b/NDTensors/test/ITensors/TestITensorDMRG/TestITensorDMRG.jl new file mode 100644 index 0000000000..667e2a1f8a --- /dev/null +++ b/NDTensors/test/ITensors/TestITensorDMRG/TestITensorDMRG.jl @@ -0,0 +1,16 @@ +## This is getting closer still working on it. No need to review +## Failing for CUDA mostly with eigen (I believe there is some noise in +## eigen decomp with CUBLAS to give slightly different answer than BLAS) +module TestITensorDMRG +using Test +using ITensors +using NDTensors +using Random + +reference_energies = Dict([ + (4, -1.6160254037844384), (8, -3.374932598687889), (10, -4.258035207282885) +]) + +default_rtol(elt::Type) = 10^(0.75 * log10(eps(real(elt)))) +include("dmrg.jl") +end diff --git a/NDTensors/test/ITensors/TestITensorDMRG/dmrg.jl b/NDTensors/test/ITensors/TestITensorDMRG/dmrg.jl new file mode 100644 index 0000000000..2c5a77eddf --- /dev/null +++ b/NDTensors/test/ITensors/TestITensorDMRG/dmrg.jl @@ -0,0 +1,24 @@ +function test_dmrg(elt, N::Integer, dev::Function) + sites = siteinds("S=1/2", N) + + os = OpSum() + for j in 1:(N - 1) + os += "Sz", j, "Sz", j + 1 + os += 0.5, "S+", j, "S-", j + 1 + os += 0.5, "S-", j, "S+", j + 1 + end + + Random.seed!(1234) + psi0 = dev(randomMPS(Float64, sites; linkdims=4)) + H = dev(MPO(elt, os, sites)) + + nsweeps = 3 + cutoff = [1e-3, 1e-13] + noise = [1e-12, 0] + ## running these with nsweeps = 100 and no maxdim + ## all problems do not have a maxlinkdim > 32 + maxdim = 32 + + energy, psi = dmrg(H, psi0; nsweeps, cutoff, maxdim, noise, outputlevel=0) + @test energy ≈ reference_energies[N] rtol = default_rtol(elt) +end diff --git a/NDTensors/test/ITensors/TestITensorDMRG/runtests.jl b/NDTensors/test/ITensors/TestITensorDMRG/runtests.jl new file mode 100644 index 0000000000..ee6712e9d6 --- /dev/null +++ b/NDTensors/test/ITensors/TestITensorDMRG/runtests.jl @@ -0,0 +1,14 @@ +using Test +using NDTensors +include("TestITensorDMRG.jl") + +include("../../device_list.jl") + +@testset "Testing DMRG different backends" begin + for dev in devices_list(ARGS), + N in [4, 10], + elt in (Float32, ComplexF32, Float64, ComplexF64) + + TestITensorDMRG.test_dmrg(elt, N, dev) + end +end diff --git a/NDTensors/test/ITensors/runtests.jl b/NDTensors/test/ITensors/runtests.jl new file mode 100644 index 0000000000..00ecc4aa31 --- /dev/null +++ b/NDTensors/test/ITensors/runtests.jl @@ -0,0 +1 @@ +include("TestITensorDMRG/runtests.jl") diff --git a/NDTensors/test/Project.toml b/NDTensors/test/Project.toml index c1c309317a..824ce0a350 100644 --- a/NDTensors/test/Project.toml +++ b/NDTensors/test/Project.toml @@ -6,6 +6,7 @@ ITensors = "9136182c-28ba-11e9-034c-db9fb085ebd5" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Octavian = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" TBLIS = "48530278-0828-4a49-9772-0f3830dfa1e9" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/NDTensors/test/Unwrap.jl b/NDTensors/test/Unwrap.jl new file mode 100644 index 0000000000..a7a49a2a17 --- /dev/null +++ b/NDTensors/test/Unwrap.jl @@ -0,0 +1,4 @@ +using Test +using NDTensors + +include(joinpath(pkgdir(NDTensors), "src", "Unwrap", "test", "runtests.jl")) diff --git a/NDTensors/test/blocksparse.jl b/NDTensors/test/blocksparse.jl index 11711aaa30..c8d0224725 100644 --- a/NDTensors/test/blocksparse.jl +++ b/NDTensors/test/blocksparse.jl @@ -1,12 +1,6 @@ using NDTensors using LinearAlgebra using Test -if "cuda" in ARGS || "all" in ARGS - using CUDA -end -if "metal" in ARGS || "all" in ARGS - using Metal -end @testset "BlockSparseTensor basic functionality" begin C = nothing diff --git a/NDTensors/test/combiner.jl b/NDTensors/test/combiner.jl index f8853ff8ff..e422e69fa0 100644 --- a/NDTensors/test/combiner.jl +++ b/NDTensors/test/combiner.jl @@ -5,13 +5,6 @@ using Test # Testing generic block indices using ITensors: QN, Index -if "cuda" in ARGS || "all" in ARGS - using CUDA -end -if "metal" in ARGS || "all" in ARGS - using Metal -end - @testset "CombinerTensor basic functionality" begin include("device_list.jl") devs = devices_list(copy(ARGS)) diff --git a/NDTensors/test/dense.jl b/NDTensors/test/dense.jl index 80be64c0ed..eb6678c689 100644 --- a/NDTensors/test/dense.jl +++ b/NDTensors/test/dense.jl @@ -1,11 +1,5 @@ using NDTensors using Test -if "cuda" in ARGS || "all" in ARGS - using CUDA -end -if "metal" in ARGS || "all" in ARGS - using Metal -end @testset "Dense Tensors" begin include("device_list.jl") diff --git a/NDTensors/test/linearalgebra.jl b/NDTensors/test/linearalgebra.jl index de746ed830..7d0a1c1250 100644 --- a/NDTensors/test/linearalgebra.jl +++ b/NDTensors/test/linearalgebra.jl @@ -2,13 +2,6 @@ using NDTensors using LinearAlgebra using Test -if "cuda" in ARGS || "all" in ARGS - using CUDA -end -if "metal" in ARGS || "all" in ARGS - using Metal -end - @testset "random_orthog" begin n, m = 10, 4 O1 = random_orthog(n, m) diff --git a/NDTensors/test/runtests.jl b/NDTensors/test/runtests.jl index 5f743e2397..256c55d055 100644 --- a/NDTensors/test/runtests.jl +++ b/NDTensors/test/runtests.jl @@ -25,6 +25,7 @@ end "SmallVectors.jl", "SortedSets.jl", "TagSets.jl", + "Unwrap.jl", "linearalgebra.jl", "dense.jl", "blocksparse.jl", @@ -34,6 +35,7 @@ end "emptystorage.jl", "combiner.jl", "arraytensor/arraytensor.jl", + "ITensors/runtests.jl", ] println("Running $filename") include(filename)