diff --git a/NDTensors/Project.toml b/NDTensors/Project.toml index a28621d734..208364fe04 100644 --- a/NDTensors/Project.toml +++ b/NDTensors/Project.toml @@ -11,6 +11,7 @@ Dictionaries = "85a47980-9c8c-11e8-2b9f-f7ca1fa99fb4" FLoops = "cc61a311-1640-44b5-9fba-1b764f453329" Folds = "41a02a25-b8f0-4f67-bc48-60067656b558" Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196" +GPUArraysCore="46192b85-c4d5-4398-a991-12ede77f4527" HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" InlineStrings = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" @@ -44,6 +45,7 @@ Dictionaries = "0.3.5" FLoops = "0.2.1" Folds = "0.2.8" Functors = "0.2, 0.3, 0.4" +GPUArraysCore = "0.1" HDF5 = "0.14, 0.15, 0.16, 0.17" InlineStrings = "1" LinearAlgebra = "1.6" diff --git a/NDTensors/ext/NDTensorsCUDAExt/NDTensorsCUDAExt.jl b/NDTensors/ext/NDTensorsCUDAExt/NDTensorsCUDAExt.jl index f9e8813152..cd2a83827f 100644 --- a/NDTensors/ext/NDTensorsCUDAExt/NDTensorsCUDAExt.jl +++ b/NDTensors/ext/NDTensorsCUDAExt/NDTensorsCUDAExt.jl @@ -5,16 +5,19 @@ using NDTensors.SetParameters using NDTensors.Unwrap using Adapt using Functors -using LinearAlgebra +using LinearAlgebra: LinearAlgebra, Adjoint, Transpose, mul!, svd using CUDA using CUDA.CUBLAS using CUDA.CUSOLVER include("imports.jl") include("default_kwargs.jl") +include("copyto.jl") include("set_types.jl") include("iscu.jl") include("adapt.jl") include("indexing.jl") include("linearalgebra.jl") +include("mul.jl") +include("permutedims.jl") end diff --git a/NDTensors/ext/NDTensorsCUDAExt/copyto.jl b/NDTensors/ext/NDTensorsCUDAExt/copyto.jl new file mode 100644 index 0000000000..73867192c4 --- /dev/null +++ b/NDTensors/ext/NDTensorsCUDAExt/copyto.jl @@ -0,0 +1,26 @@ +# Same definition as `MtlArray`. +function Base.copy(src::Exposed{<:CuArray,<:Base.ReshapedArray}) + return reshape(copy(parent(src)), size(unexpose(src))) +end + +function Base.copy( + src::Exposed{ + <:CuArray,<:SubArray{<:Any,<:Any,<:Base.ReshapedArray{<:Any,<:Any,<:Adjoint}} + }, +) + return copy(@view copy(expose(parent(src)))[parentindices(unexpose(src))...]) +end + +# Catches a bug in `copyto!` in CUDA backend. +function Base.copyto!(dest::Exposed{<:CuArray}, src::Exposed{<:CuArray,<:SubArray}) + copyto!(dest, expose(copy(src))) + return unexpose(dest) +end + +# Catches a bug in `copyto!` in CUDA backend. +function Base.copyto!( + dest::Exposed{<:CuArray}, src::Exposed{<:CuArray,<:Base.ReshapedArray} +) + copyto!(dest, expose(parent(src))) + return unexpose(dest) +end diff --git a/NDTensors/ext/NDTensorsCUDAExt/imports.jl b/NDTensors/ext/NDTensorsCUDAExt/imports.jl index 96b8383c74..a9e49db657 100644 --- a/NDTensors/ext/NDTensorsCUDAExt/imports.jl +++ b/NDTensors/ext/NDTensorsCUDAExt/imports.jl @@ -2,5 +2,3 @@ import NDTensors: cu, set_ndims, set_eltype, set_eltype_if_unspecified, similart import NDTensors: ContractionProperties, _contract!, GemmBackend, auto_select_backend, _gemm!, iscu import NDTensors.SetParameters: nparameters, get_parameter, set_parameter, default_parameter - -import .CUDA: CuArrayAdaptor diff --git a/NDTensors/ext/NDTensorsCUDAExt/indexing.jl b/NDTensors/ext/NDTensorsCUDAExt/indexing.jl index 2f0f6790e0..263e2442a6 100644 --- a/NDTensors/ext/NDTensorsCUDAExt/indexing.jl +++ b/NDTensors/ext/NDTensorsCUDAExt/indexing.jl @@ -2,7 +2,7 @@ function Base.getindex(E::Exposed{<:CuArray}) return CUDA.@allowscalar unexpose(E)[] end -function setindex!(E::Exposed{<:CuArray}, x::Number) +function Base.setindex!(E::Exposed{<:CuArray}, x::Number) CUDA.@allowscalar unexpose(E)[] = x return unexpose(E) end @@ -11,10 +11,6 @@ function Base.getindex(E::Exposed{<:CuArray,<:Adjoint}, i, j) return (expose(parent(E))[j, i])' end -function Base.copy(E::Exposed{<:CuArray,<:Base.ReshapedArray}) - return reshape(copy(expose(parent(E))), size(unexpose(E))) -end - Base.any(f, E::Exposed{<:CuArray,<:NDTensors.Tensor}) = any(f, data(unexpose(E))) function Base.print_array(io::IO, E::Exposed{<:CuArray}) diff --git a/NDTensors/ext/NDTensorsCUDAExt/mul.jl b/NDTensors/ext/NDTensorsCUDAExt/mul.jl new file mode 100644 index 0000000000..b3751b5fc1 --- /dev/null +++ b/NDTensors/ext/NDTensorsCUDAExt/mul.jl @@ -0,0 +1,43 @@ +# This was calling generic matrix multiplication. +# TODO: Raise an issue with `CUDA.jl`. +function LinearAlgebra.mul!( + CM::Exposed{<:CuArray,<:LinearAlgebra.Transpose}, + AM::Exposed{<:CuArray}, + BM::Exposed{<:CuArray}, + α, + β, +) + mul!(transpose(CM), transpose(BM), transpose(AM), α, β) + return unexpose(CM) +end + +# This was calling generic matrix multiplication. +# TODO: Raise an issue with `CUDA.jl`. +function LinearAlgebra.mul!( + CM::Exposed{<:CuArray,<:LinearAlgebra.Adjoint}, + AM::Exposed{<:CuArray}, + BM::Exposed{<:CuArray}, + α, + β, +) + mul!(CM', BM', AM', α, β) + return unexpose(CM) +end + +## Fix issue in CUDA.jl where it cannot distinguish Transpose{Reshape{Adjoint{CuArray}}} +## as a CuArray and calls generic matmul +function LinearAlgebra.mul!( + CM::Exposed{<:CuArray}, + AM::Exposed{<:CuArray}, + BM::Exposed{ + <:CuArray, + <:LinearAlgebra.Transpose{ + <:Any,<:Base.ReshapedArray{<:Any,<:Any,<:LinearAlgebra.Adjoint} + }, + }, + α, + β, +) + mul!(CM, AM, expose(transpose(copy(expose(parent(BM))))), α, β) + return unexpose(CM) +end diff --git a/NDTensors/ext/NDTensorsCUDAExt/permutedims.jl b/NDTensors/ext/NDTensorsCUDAExt/permutedims.jl new file mode 100644 index 0000000000..2dba8286bb --- /dev/null +++ b/NDTensors/ext/NDTensorsCUDAExt/permutedims.jl @@ -0,0 +1,7 @@ +function Base.permutedims!( + Edest::Exposed{<:CuArray,<:Base.ReshapedArray}, Esrc::Exposed{<:CuArray}, perm +) + Aperm = permutedims(Esrc, perm) + copyto!(expose(parent(Edest)), expose(Aperm)) + return unexpose(Edest) +end diff --git a/NDTensors/ext/NDTensorsMetalExt/copyto.jl b/NDTensors/ext/NDTensorsMetalExt/copyto.jl index ba111fb099..c32b5f2f01 100644 --- a/NDTensors/ext/NDTensorsMetalExt/copyto.jl +++ b/NDTensors/ext/NDTensorsMetalExt/copyto.jl @@ -12,12 +12,14 @@ end # Catches a bug in `copyto!` in Metal backend. function Base.copyto!(dest::Exposed{<:MtlArray}, src::Exposed{<:MtlArray,<:SubArray}) - return copyto!(dest, expose(copy(src))) + copyto!(dest, expose(copy(src))) + return unexpose(dest) end # Catches a bug in `copyto!` in Metal backend. function Base.copyto!( dest::Exposed{<:MtlArray}, src::Exposed{<:MtlArray,<:Base.ReshapedArray} ) - return copyto!(dest, expose(parent(src))) + copyto!(dest, expose(parent(src))) + return unexpose(dest) end diff --git a/NDTensors/ext/NDTensorsMetalExt/mul.jl b/NDTensors/ext/NDTensorsMetalExt/mul.jl index f5268fdd7f..7c388a0fc8 100644 --- a/NDTensors/ext/NDTensorsMetalExt/mul.jl +++ b/NDTensors/ext/NDTensorsMetalExt/mul.jl @@ -10,3 +10,12 @@ function LinearAlgebra.mul!( mul!(transpose(CM), transpose(BM), transpose(AM), α, β) return unexpose(CM) end + +# This was calling generic matrix multiplication. +# TODO: Raise an issue with `Metal.jl`. +function LinearAlgebra.mul!( + CM::Exposed{<:MtlArray,<:Adjoint}, AM::Exposed{<:MtlArray}, BM::Exposed{<:MtlArray}, α, β +) + mul!(CM', BM', AM', α, β) + return unexpose(CM) +end diff --git a/NDTensors/ext/examples/NDTensorCUDA.jl b/NDTensors/ext/examples/NDTensorCUDA.jl index ca4ffbe23e..6a9efe749f 100644 --- a/NDTensors/ext/examples/NDTensorCUDA.jl +++ b/NDTensors/ext/examples/NDTensorCUDA.jl @@ -61,9 +61,9 @@ function main() #Currently this code fails with CUDA.allowscalar(false) # Because of outer calling the _gemm! function which calls a # generic implementation - grad = gradient(f, cA, cB, cC, cD) - @test NDTensors.cpu(cB * cC * cD) ≈ NDTensors.cpu(grad[1]) - @test (cB * cC * cD) ≈ grad[1] + @allowscalar grad = gradient(f, cA, cB, cC, cD) + @allowscalar @test NDTensors.cpu(cB * cC * cD) ≈ NDTensors.cpu(grad[1]) + @allowscalar @test (cB * cC * cD) ≈ grad[1] # Create a tuple of indices decomp = ( dim(NDTensors.ind(grad[1], 1)), diff --git a/NDTensors/src/NDTensors.jl b/NDTensors/src/NDTensors.jl index aee947e2aa..2dd3735997 100644 --- a/NDTensors/src/NDTensors.jl +++ b/NDTensors/src/NDTensors.jl @@ -6,6 +6,7 @@ using Compat using Dictionaries using FLoops using Folds +using GPUArraysCore using InlineStrings using Random using LinearAlgebra diff --git a/NDTensors/src/Unwrap/src/functions/abstractarray.jl b/NDTensors/src/Unwrap/src/functions/abstractarray.jl index db98228d98..2ef6833807 100644 --- a/NDTensors/src/Unwrap/src/functions/abstractarray.jl +++ b/NDTensors/src/Unwrap/src/functions/abstractarray.jl @@ -2,6 +2,8 @@ parent(E::Exposed) = parent(unexpose(E)) transpose(E::Exposed) = transpose(unexpose(E)) +adjoint(E::Exposed) = adjoint(unexpose(E)) + cpu(E::Exposed) = cpu(unexpose(E)) getindex(E::Exposed) = unexpose(E)[] diff --git a/NDTensors/src/Unwrap/src/import.jl b/NDTensors/src/Unwrap/src/import.jl index 742cda3b2e..66c1235995 100644 --- a/NDTensors/src/Unwrap/src/import.jl +++ b/NDTensors/src/Unwrap/src/import.jl @@ -1,4 +1,5 @@ import Base: + adjoint, permutedims, permutedims!, copy, diff --git a/NDTensors/src/Unwrap/test/runtests.jl b/NDTensors/src/Unwrap/test/runtests.jl index 127f85cfae..68cbb74e18 100644 --- a/NDTensors/src/Unwrap/test/runtests.jl +++ b/NDTensors/src/Unwrap/test/runtests.jl @@ -2,6 +2,7 @@ using Test using NDTensors.Unwrap using NDTensors using LinearAlgebra +using GPUArraysCore: @allowscalar include("../../../test/device_list.jl") @testset "Testing Unwrap $dev, $elt" for dev in devices_list(ARGS), @@ -24,8 +25,8 @@ include("../../../test/device_list.jl") @test parent(Et) == v @test parent(Ea) == v @test transpose(E) == vt - @test cpu(E) == v - @test cpu(Et) == vt + @test cpu(E) == cpu(v) + @test cpu(Et) == cpu(vt) m = reshape(v, (5, 2)) mt = transpose(m) @@ -125,17 +126,68 @@ include("../../../test/device_list.jl") y = dev(randn(elt, 16)) x = reshape(dev(randn(elt, 4, 4))', 16) copyto!(expose(y), expose(x)) - @test y == x - @test copy(x) == x + @allowscalar begin + @test y == x + @test copy(x) == x + end y = dev(randn(elt, 8)) x = @view reshape(dev(randn(elt, 8, 8))', 64)[1:8] copyto!(expose(y), expose(x)) - @test y == x - @test copy(x) == x + @allowscalar begin + @test y == x + @test copy(x) == x + end y = Base.ReshapedArray(dev(randn(elt, 16)), (4, 4), ()) x = dev(randn(elt, 4, 4)) permutedims!(expose(y), expose(x), (2, 1)) @test NDTensors.cpu(y) == transpose(NDTensors.cpu(x)) + + ########################################## + ### Testing an issue with CUDA&Metal transpose/adjoint mul + A = dev(randn(elt, (3, 2))) + B = dev(randn(elt, (3, 4))) + C = dev(randn(elt, (4, 2))) + Cp = copy(C) + + ## This fails with scalar indexing + if dev != NDTensors.cpu + @test_broken mul!(transpose(C), transpose(A), B, true, false) + end + mul!(C, transpose(B), A, true, false) + mul!(expose(transpose(Cp)), expose(transpose(A)), expose(B), true, false) + @test C ≈ Cp + Cp = zero(C) + ## Try calling mul!! with transposes to verify that code works + Cpt = NDTensors.mul!!(transpose(Cp), transpose(A), B, true, false) + @test transpose(Cpt) ≈ C + + Cp = zero(C) + ## This fails with scalar indexing + if dev != NDTensors.cpu + @test_broken mul!(C', A', B, true, false) + end + mul!(C, B', A, true, false) + mul!(expose(Cp'), expose(A'), expose(B), true, false) + @test C ≈ Cp + Cp = zero(C) + Cpt = NDTensors.mul!!(Cp', A', B, true, false) + @test Cpt' ≈ C + + ################################## + ### Add test for transpose(reshape(adjoint )) failure in CUDA + + A = dev(transpose(reshape(randn(elt, 2, 12)', (12, 2)))) + B = dev(randn(elt, 2, 2)) + C = dev(zeros(elt, 2, 12)) + NDTensors.mul!(expose(C), expose(B), expose(A), true, false) + Cp = NDTensors.cpu(similar(C)) + NDTensors.mul!( + expose(Cp), expose(NDTensors.cpu(B)), expose(NDTensors.cpu(A)), true, false + ) + @test NDTensors.cpu(C) ≈ Cp + NDTensors.zero(C) + NDTensors.mul!!(C, B, A, true, false) + @test NDTensors.cpu(C) ≈ Cp end diff --git a/NDTensors/src/blocksparse/blocksparsetensor.jl b/NDTensors/src/blocksparse/blocksparsetensor.jl index 65f824a48d..f59238f955 100644 --- a/NDTensors/src/blocksparse/blocksparsetensor.jl +++ b/NDTensors/src/blocksparse/blocksparsetensor.jl @@ -335,11 +335,15 @@ view(T::BlockSparseTensor, b::Block) = blockview(T, b) # convert to Dense function dense(T::TensorT) where {TensorT<:BlockSparseTensor} R = zeros(dense(TensorT), inds(T)) + ## Here this failed with scalar indexing (R[blockindices] = blockview) + ## We can fix this by using copyto the arrays + r = array(R) for block in keys(blockoffsets(T)) # TODO: make sure this assignment is efficient - R[blockindices(T, block)] = blockview(T, block) + rview = @view r[blockindices(T, block)] + copyto!(expose(rview), expose(array(blockview(T, block)))) end - return R + return tensor(Dense(r), inds(T)) end # diff --git a/NDTensors/src/blocksparse/linearalgebra.jl b/NDTensors/src/blocksparse/linearalgebra.jl index f3a830ccf7..bc69bb1954 100644 --- a/NDTensors/src/blocksparse/linearalgebra.jl +++ b/NDTensors/src/blocksparse/linearalgebra.jl @@ -200,7 +200,7 @@ function svd( if (sV * sVP) == -1 Vb *= -1 end - copyto!(expose(blockview(V, blockV)), expose(Vb)) + copyto!(blockview(V, blockV), Vb) end return U, S, V, Spectrum(d, truncerr) end diff --git a/NDTensors/test/Project.toml b/NDTensors/test/Project.toml index 824ce0a350..0970083593 100644 --- a/NDTensors/test/Project.toml +++ b/NDTensors/test/Project.toml @@ -2,6 +2,7 @@ BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Dictionaries = "85a47980-9c8c-11e8-2b9f-f7ca1fa99fb4" +GPUArraysCore="46192b85-c4d5-4398-a991-12ede77f4527" ITensors = "9136182c-28ba-11e9-034c-db9fb085ebd5" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Octavian = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4" diff --git a/NDTensors/test/blocksparse.jl b/NDTensors/test/blocksparse.jl index c8d0224725..e85580ab80 100644 --- a/NDTensors/test/blocksparse.jl +++ b/NDTensors/test/blocksparse.jl @@ -1,6 +1,7 @@ using NDTensors using LinearAlgebra using Test +using GPUArraysCore: @allowscalar @testset "BlockSparseTensor basic functionality" begin C = nothing @@ -45,26 +46,29 @@ using Test @test nnzblocks(A) == 2 @test nnzblocks(B) == 2 - A[1, 5] = 15 - A[2, 5] = 25 - - @test A[1, 1] == 0 - @test A[1, 5] == 15 - @test A[2, 5] == 25 + @allowscalar begin + A[1, 5] = 15 + A[2, 5] = 25 + @test A[1, 1] == 0 + @test A[1, 5] == 15 + @test A[2, 5] == 25 + end D = dense(A) - @test D == A + @allowscalar begin + @test D == A - for I in eachindex(A) - @test D[I] == A[I] + for I in eachindex(A) + @test D[I] == A[I] + end end A12 = blockview(A, (1, 2)) @test dims(A12) == (2, 5) - for I in eachindex(A12) + @allowscalar for I in eachindex(A12) @test A12[I] == A[I + CartesianIndex(0, 4)] end @@ -73,7 +77,7 @@ using Test C = A + B - for I in eachindex(C) + @allowscalar for I in eachindex(C) @test C[I] == A[I] + B[I] end @@ -84,7 +88,7 @@ using Test @test nnz(A) == nnz(Ap) @test nnzblocks(A) == nnzblocks(Ap) - for I in eachindex(C) + @allowscalar for I in eachindex(C) @test A[I] == Ap[NDTensors.permute(I, (2, 1))] end @@ -159,7 +163,7 @@ using Test Ap = NDTensors.permutedims(A, (3, 2, 1)) - for (bAp, bB) in zip(eachnzblock(Ap), eachnzblock(B)) + @allowscalar for (bAp, bB) in zip(eachnzblock(Ap), eachnzblock(B)) blockAp = blockview(Ap, bAp) blockB = blockview(B, bB) @test reshape(blockAp, size(blockB)) == blockB @@ -170,7 +174,7 @@ using Test @testset "BlockSparseTensor setindex! add block" begin T = BlockSparseTensor([2, 3], [4, 5]) - for I in eachindex(T) + @allowscalar for I in eachindex(T) @test T[I] == 0.0 end @test nnz(T) == 0 @@ -226,35 +230,45 @@ using Test A = dev(BlockSparseTensor([(2, 1), (1, 2)], [2, 2], [2, 2])) randn!(A) U, S, V = svd(A) - @test isapprox(norm(array(U) * array(S) * array(V)' - array(A)), 0; atol=1e-14) + @test @allowscalar isapprox( + norm(array(U) * array(S) * array(V)' - array(A)), 0; atol=1e-14 + ) end @testset "svd example 2" begin A = dev(BlockSparseTensor([(1, 2), (2, 3)], [2, 2], [3, 2, 3])) randn!(A) U, S, V = svd(A) - @test isapprox(norm(array(U) * array(S) * array(V)' - array(A)), 0.0; atol=1e-14) + @test @allowscalar isapprox( + norm(array(U) * array(S) * array(V)' - array(A)), 0.0; atol=1e-14 + ) end @testset "svd example 3" begin A = dev(BlockSparseTensor([(2, 1), (3, 2)], [3, 2, 3], [2, 2])) randn!(A) U, S, V = svd(A) - @test isapprox(norm(array(U) * array(S) * array(V)' - array(A)), 0.0; atol=1e-14) + @test @allowscalar isapprox( + norm(array(U) * array(S) * array(V)' - array(A)), 0.0; atol=1e-14 + ) end @testset "svd example 4" begin A = dev(BlockSparseTensor([(2, 1), (3, 2)], [2, 3, 4], [5, 6])) randn!(A) U, S, V = svd(A) - @test isapprox(norm(array(U) * array(S) * array(V)' - array(A)), 0.0; atol=1e-13) + @test @allowscalar isapprox( + norm(array(U) * array(S) * array(V)' - array(A)), 0.0; atol=1e-13 + ) end @testset "svd example 5" begin A = dev(BlockSparseTensor([(1, 2), (2, 3)], [5, 6], [2, 3, 4])) randn!(A) U, S, V = svd(A) - @test isapprox(norm(array(U) * array(S) * array(V)' - array(A)), 0.0; atol=1e-13) + @test @allowscalar isapprox( + norm(array(U) * array(S) * array(V)' - array(A)), 0.0; atol=1e-13 + ) end end diff --git a/NDTensors/test/combiner.jl b/NDTensors/test/combiner.jl index e422e69fa0..52e1cae5b4 100644 --- a/NDTensors/test/combiner.jl +++ b/NDTensors/test/combiner.jl @@ -1,6 +1,7 @@ using NDTensors using LinearAlgebra using Test +using GPUArraysCore: @allowscalar # Testing generic block indices using ITensors: QN, Index @@ -21,13 +22,13 @@ using ITensors: QN, Index output_tensor = contract(input_tensor, (1, -1, -2), combiner_tensor, (2, -1, -2)) @test output_tensor isa DenseTensor @test dims(output_tensor) == output_tensor_inds - for i in 1:length(input_tensor) + @allowscalar for i in 1:length(input_tensor) @test input_tensor[i] == output_tensor[i] end # Test uncombining new_input_tensor = contract(output_tensor, (1, -1), combiner_tensor, (-1, 2, 3)) - @test new_input_tensor == input_tensor + @test NDTensors.cpu(new_input_tensor) == NDTensors.cpu(input_tensor) # Catch invalid combining input_tensor_inds = (d,) @@ -61,14 +62,14 @@ using ITensors: QN, Index @test output_tensor isa BlockSparseTensor @test dims(output_tensor) == dims(output_tensor_inds) output_tensor = permutedims(output_tensor, (2, 1)) - for i in 1:length(input_tensor) + @allowscalar for i in 1:length(input_tensor) @test input_tensor[i] == output_tensor[i] end # Test uncombining. Broken for inds that are not `Index`. new_input_tensor = contract(output_tensor, (1, -1), combiner_tensor, (-1, 2, 3)) new_input_tensor = permutedims(new_input_tensor, (3, 1, 2)) - @test new_input_tensor == input_tensor + @test NDTensors.cpu(new_input_tensor) == NDTensors.cpu(input_tensor) # Catch invalid combining invalid_input_tensor_inds = (k,) diff --git a/NDTensors/test/dense.jl b/NDTensors/test/dense.jl index eb6678c689..e716c2b4f0 100644 --- a/NDTensors/test/dense.jl +++ b/NDTensors/test/dense.jl @@ -1,5 +1,6 @@ using NDTensors using Test +using GPUArraysCore: @allowscalar @testset "Dense Tensors" begin include("device_list.jl") @@ -10,11 +11,11 @@ using Test # Testing with GPU and CPU backends @testset "DenseTensor basic functionality" begin A = dev(Tensor(elt, (3, 4))) - for I in eachindex(A) + @allowscalar for I in eachindex(A) @test A[I] == 0 end - @test A[2, 1] isa elt + @test @allowscalar A[2, 1] isa elt @test dims(A[1:2, 1]) == (2,) @test dims(A[1:2, 2]) == (2,) @test dims(A[2:3, 2]) == (2,) @@ -25,49 +26,58 @@ using Test randn!(A) - for I in eachindex(A) - @test A[I] != 0 - end - - for I in eachindex(A) - @test A[I] != 0 - end - @test ndims(A) == 2 @test dims(A) == (3, 4) @test inds(A) == (3, 4) - A[1, 1] = 11 + Aview = A[2:3, 2:3] + @test dims(Aview) == (2, 2) - @test A[1, 1] == 11 + B = dev(Tensor(undef, (3, 4))) + randn!(B) + C = copy(A) + C = permutedims!!(C, B, (1, 2), +) - Aview = A[2:3, 2:3] + Ap = permutedims(A, (2, 1)) + @allowscalar begin + for I in eachindex(A) + @test A[I] != 0 + end - @test dims(Aview) == (2, 2) - @test A[2, 2] == Aview[1, 1] + for I in eachindex(A) + @test A[I] != 0 + end - @test A * 2.0 == 2.0 * A + ## TODO Currently this fails with scalar indexing on CUDA + ## Because A + B calls + ## +(A::DenseTensor{Float64, 2, Tuple{Int64, Int64}, Dense{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}, B::DenseTensor{Float64, 2, Tuple{Int64, Int64}, Dense{Float64, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}) + ## @ Base ./arraymath.jl:8 + #C = A + B - Asim = similar(data(A), 10) - @test eltype(Asim) == elt - @test length(Asim) == 10 + for I in eachindex(C) + @test C[I] == A[I] + B[I] + end - B = dev(Tensor(undef, (3, 4))) - randn!(B) + for I in eachindex(A) + @test A[I] == Ap[NDTensors.permute(I, (2, 1))] + end - C = A + B + A[1, 1] = 11 + @test A[1, 1] == 11 - for I in eachindex(C) - @test C[I] == A[I] + B[I] + @test A[2, 2] == Aview[1, 1] end - Ap = permutedims(A, (2, 1)) + ## Right now this treats the `Tensor` type as an abstract Array + ## And calls getindex instead of CUDA.==. Can fix by converting to CPU or + ## Just looking at the data + @test data(A * 2.0) == data(2.0 * A) - for I in eachindex(A) - @test A[I] == Ap[NDTensors.permute(I, (2, 1))] - end + Asim = similar(data(A), 10) + @test eltype(Asim) == elt + @test length(Asim) == 10 - t = Tensor(complex(elt), (100, 100)) + t = dev(Tensor(complex(elt), (100, 100))) randn!(t) @test conj(data(store(t))) == data(store(conj(t))) @test typeof(conj(t)) <: DenseTensor @@ -75,33 +85,33 @@ using Test @test Dense(complex(elt)) == Dense{complex(elt)}() @test Dense(complex(elt)) == complex(Dense(elt)) - D = Tensor(complex(elt), (100, 100)) + D = dev(Tensor(complex(elt), (100, 100))) @test eltype(D) == complex(elt) @test ndims(D) == 2 @test dim(D) == 100^2 - E = Tensor(complex(elt), undef, (100, 100)) + E = dev(Tensor(complex(elt), undef, (100, 100))) @test eltype(E) == complex(elt) @test ndims(E) == 2 @test dim(E) == 100^2 - F = Tensor(elt, (100, 100)) + F = dev(Tensor(elt, (100, 100))) @test eltype(F) == elt @test ndims(F) == 2 @test dim(F) == 100^2 - G = Tensor(elt, undef, (100, 100)) + G = dev(Tensor(elt, undef, (100, 100))) @test eltype(G) == elt @test ndims(G) == 2 @test dim(G) == 100^2 - H = Tensor(complex(elt), undef, (100, 100)) + H = dev(Tensor(complex(elt), undef, (100, 100))) @test eltype(H) == complex(elt) @test ndims(H) == 2 @test dim(H) == 100^2 - I_arr = rand(elt, 10, 10, 10) - I = Tensor(I_arr, (10, 10, 10)) + I_arr = dev(rand(elt, 10, 10, 10)) + I = dev(Tensor(I_arr, (10, 10, 10))) @test eltype(I) == elt @test dim(I) == 1000 @test Array(I) == I_arr @@ -115,13 +125,13 @@ using Test T = dev(randomTensor(elt, (2, 2))) @test dims(T) == (2, 2) @test eltype(T) == elt - @test T[1, 1] ≉ 0 + @test @allowscalar T[1, 1] ≉ 0 @test norm(T) ≉ 0 Tc = dev(randomTensor(complex(elt), (2, 2))) @test dims(Tc) == (2, 2) @test eltype(Tc) == complex(elt) - @test Tc[1, 1] ≉ 0 + @test @allowscalar Tc[1, 1] ≉ 0 @test norm(Tc) ≉ 0 end @@ -133,7 +143,7 @@ using Test iT = imag(T) cT = conj(T) - for n1 in 1:d1, n2 in 1:d2, n3 in 1:d3 + @allowscalar for n1 in 1:d1, n2 in 1:d2, n3 in 1:d3 @test rT[n1, n2, n3] ≈ real(T[n1, n2, n3]) @test iT[n1, n2, n3] ≈ imag(T[n1, n2, n3]) @test cT[n1, n2, n3] ≈ conj(T[n1, n2, n3]) @@ -153,8 +163,10 @@ using Test @test dims(T) == (2, 3, 4) @test ndims(T) == 3 @test inds(T) == (MyInd(2), MyInd(3), MyInd(4)) - T[2, 1, 2] = 1.21 - @test T[2, 1, 2] == elt(1.21) + @allowscalar begin + T[2, 1, 2] = 1.21 + @test T[2, 1, 2] == elt(1.21) + end @test norm(T) == elt(1.21) T = dev(randomTensor(complex(elt), (MyInd(4), MyInd(3)))) @@ -200,23 +212,23 @@ using Test @testset "No permutation" begin R = dev(Tensor(complex(elt), (2, 2, 1))) fill!(R, NaN) - @test any(isnan, R) + @test @allowscalar any(isnan, R) T1 = dev(randomTensor((2, 2, 1))) T2 = dev(randomTensor(complex(elt), (1, 1))) NDTensors.contract!(R, (1, 2, 3), T1, (1, 2, -1), T2, (-1, 1)) - @test !any(isnan, R) - @test convert(Array, R) ≈ convert(Array, T1) * T2[1] + @test @allowscalar !any(isnan, R) + @test convert(Array, R) ≈ convert(Array, T1) * T2[] end @testset "Permutation" begin R = dev(Tensor(complex(elt), (2, 2, 1))) fill!(R, NaN) - @test any(isnan, R) + @test @allowscalar any(isnan, R) T1 = dev(randomTensor((2, 2, 1))) T2 = dev(randomTensor(complex(elt), (1, 1))) NDTensors.contract!(R, (2, 1, 3), T1, (1, 2, -1), T2, (-1, 1)) - @test !any(isnan, R) - @test convert(Array, R) ≈ permutedims(convert(Array, T1), (2, 1, 3)) * T2[1] + @test @allowscalar !any(isnan, R) + @test convert(Array, R) ≈ permutedims(convert(Array, T1), (2, 1, 3)) * T2[] end end end diff --git a/NDTensors/test/device_list.jl b/NDTensors/test/device_list.jl index 6433e170b3..021a20a75b 100644 --- a/NDTensors/test/device_list.jl +++ b/NDTensors/test/device_list.jl @@ -12,7 +12,6 @@ function devices_list(test_args) end if "cuda" in test_args || "all" in test_args - CUDA.allowscalar() if CUDA.functional() push!(devs, NDTensors.cu) else diff --git a/NDTensors/test/diag.jl b/NDTensors/test/diag.jl index a712afbc70..168049a9ef 100644 --- a/NDTensors/test/diag.jl +++ b/NDTensors/test/diag.jl @@ -1,5 +1,6 @@ using NDTensors using Test +using GPUArraysCore: @allowscalar @testset "DiagTensor basic functionality" begin include("device_list.jl") @@ -18,7 +19,7 @@ using Test d = rand(real(elt), 10) D = dev(Diag{elt}(d)) @test eltype(D) == elt - @test dev(Array(dense(D))) == convert.(elt, d) + @test @allowscalar dev(Array(dense(D))) == convert.(elt, d) simD = similar(D) @test length(simD) == length(D) @test eltype(simD) == eltype(D) @@ -32,15 +33,26 @@ using Test d = 3 vr = rand(elt, d) D = dev(tensor(Diag(vr), (d, d))) - @test Array(D) == NDTensors.LinearAlgebra.diagm(0 => vr) - @test matrix(D) == NDTensors.LinearAlgebra.diagm(0 => vr) - @test permutedims(D, (2, 1)) == D + Da = Array(D) + Dm = Matrix(D) + @allowscalar begin + @test Da == NDTensors.LinearAlgebra.diagm(0 => vr) + @test Da == NDTensors.LinearAlgebra.diagm(0 => vr) + + ## TODO Currently this permutedims requires scalar indexing on GPU. + Da = permutedims(D, (2, 1)) + @test Da == D + end # Regression test for https://github.com/ITensor/ITensors.jl/issues/1199 S = dev(tensor(Diag(randn(elt, 2)), (2, 2))) + ## This was creating a `Dense{ReshapedArray{Adjoint{Matrix}}}` which, in mul!, was + ## becoming a Transpose{ReshapedArray{Adjoint{Matrix}}} which was causing issues on + ## dispatching GPU mul! V = dev(tensor(Dense(randn(elt, 12, 2)'), (3, 4, 2))) - @test contract(S, (2, -1), V, (3, 4, -1)) ≈ - contract(dense(S), (2, -1), copy(V), (3, 4, -1)) + S1 = contract(S, (2, -1), V, (3, 4, -1)) + S2 = contract(dense(S), (2, -1), copy(V), (3, 4, -1)) + @test @allowscalar S1 ≈ S2 end end @testset "DiagTensor contractions" begin diff --git a/NDTensors/test/linearalgebra.jl b/NDTensors/test/linearalgebra.jl index 7d0a1c1250..11e94dd362 100644 --- a/NDTensors/test/linearalgebra.jl +++ b/NDTensors/test/linearalgebra.jl @@ -1,6 +1,7 @@ using NDTensors using LinearAlgebra using Test +using GPUArraysCore: @allowscalar @testset "random_orthog" begin n, m = 10, 4 @@ -44,16 +45,17 @@ devs = devices_list(copy(ARGS)) # A = dev(randomTensor(elt, (n, m))) # We want to test 0.0 on the diagonal. We need to make all rows equal to gaurantee this with numerical roundoff. - if singular + @allowscalar if singular for i in 2:n A[i, :] = A[1, :] end end Q, X = qx(A; positive=positive) #X is R or L. - @test A ≈ Q * X atol = eps - @test array(Q)' * array(Q) ≈ Id atol = eps - @test array(Q) * array(Q)' ≈ Id atol = eps - if positive + Ap = Q * X + @test NDTensors.cpu(A) ≈ NDTensors.cpu(Ap) atol = eps + @test NDTensors.cpu(array(Q)' * array(Q)) ≈ Id atol = eps + @test NDTensors.cpu(array(Q) * array(Q)') ≈ Id atol = eps + @allowscalar if positive nr, nc = size(X) dr = qx == ql ? Base.max(0, nc - nr) : 0 diagX = diag(X[:, (1 + dr):end]) #location of diag(L) is shifted dr columns over the right. @@ -65,15 +67,16 @@ devs = devices_list(copy(ARGS)) # A = dev(randomTensor(elt, (m, n))) #Tall array # We want to test 0.0 on the diagonal. We need make all rows equal to gaurantee this with numerical roundoff. - if singular + @allowscalar if singular for i in 2:m A[i, :] = A[1, :] end end Q, X = qx(A; positive=positive) - @test A ≈ Q * X atol = eps - @test array(Q)' * array(Q) ≈ Id atol = eps - if positive + Ap = Q * X + @test NDTensors.cpu(A) ≈ NDTensors.cpu(Ap) atol = eps + @test NDTensors.cpu(array(Q)' * array(Q)) ≈ Id atol = eps + @allowscalar if positive nr, nc = size(X) dr = qx == ql ? Base.max(0, nc - nr) : 0 diagX = diag(X[:, (1 + dr):end]) #location of diag(L) is shifted dr columns over the right.