From a1e7ec5793e85cd69e244b2f792cb2ec98fba886 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Thu, 20 Jun 2024 20:16:19 -0400 Subject: [PATCH] [NDTensors] Fix scalar indexing issue for Diag broadcast on GPU (#1497) --- .../NDTensorsGPUArraysCoreExt.jl | 1 + .../blocksparsetensor.jl | 26 +++++++++++ .../src/blocksparse/blocksparsetensor.jl | 26 ++++++++++- NDTensors/src/dense/densetensor.jl | 6 +++ NDTensors/src/diag/diagtensor.jl | 43 +++++-------------- NDTensors/src/linearalgebra/linearalgebra.jl | 1 - NDTensors/src/tensor/tensor.jl | 16 +++++-- NDTensors/test/test_blocksparse.jl | 8 ++++ NDTensors/test/test_dense.jl | 4 ++ NDTensors/test/test_diag.jl | 24 ++++++++++- jenkins/Jenkinsfile | 6 +-- 11 files changed, 117 insertions(+), 44 deletions(-) create mode 100644 NDTensors/ext/NDTensorsGPUArraysCoreExt/blocksparsetensor.jl diff --git a/NDTensors/ext/NDTensorsGPUArraysCoreExt/NDTensorsGPUArraysCoreExt.jl b/NDTensors/ext/NDTensorsGPUArraysCoreExt/NDTensorsGPUArraysCoreExt.jl index 1ead107b45..c9e183cd52 100644 --- a/NDTensors/ext/NDTensorsGPUArraysCoreExt/NDTensorsGPUArraysCoreExt.jl +++ b/NDTensors/ext/NDTensorsGPUArraysCoreExt/NDTensorsGPUArraysCoreExt.jl @@ -1,3 +1,4 @@ module NDTensorsGPUArraysCoreExt include("contract.jl") +include("blocksparsetensor.jl") end diff --git a/NDTensors/ext/NDTensorsGPUArraysCoreExt/blocksparsetensor.jl b/NDTensors/ext/NDTensorsGPUArraysCoreExt/blocksparsetensor.jl new file mode 100644 index 0000000000..1073bd2495 --- /dev/null +++ b/NDTensors/ext/NDTensorsGPUArraysCoreExt/blocksparsetensor.jl @@ -0,0 +1,26 @@ +using GPUArraysCore: @allowscalar, AbstractGPUArray +using NDTensors: NDTensors, BlockSparseTensor, dense, diag, map_diag! +using NDTensors.DiagonalArrays: diaglength +using NDTensors.Expose: Exposed, unexpose + +## TODO to circumvent issues with blocksparse and scalar indexing +## convert blocksparse GPU tensors to dense tensors and call diag +## copying will probably have some impact on timing but this code +## currently isn't used in the main code, just in tests. +function NDTensors.diag(ETensor::Exposed{<:AbstractGPUArray,<:BlockSparseTensor}) + return diag(dense(unexpose(ETensor))) +end + +## TODO scalar indexing is slow here +function NDTensors.map_diag!( + f::Function, + exposed_t_destination::Exposed{<:AbstractGPUArray,<:BlockSparseTensor}, + exposed_t_source::Exposed{<:AbstractGPUArray,<:BlockSparseTensor}, +) + t_destination = unexpose(exposed_t_destination) + t_source = unexpose(exposed_t_source) + @allowscalar for i in 1:diaglength(t_destination) + NDTensors.setdiagindex!(t_destination, f(NDTensors.getdiagindex(t_source, i)), i) + end + return t_destination +end diff --git a/NDTensors/src/blocksparse/blocksparsetensor.jl b/NDTensors/src/blocksparse/blocksparsetensor.jl index 6ff06ab0ca..77ed73b40b 100644 --- a/NDTensors/src/blocksparse/blocksparsetensor.jl +++ b/NDTensors/src/blocksparse/blocksparsetensor.jl @@ -256,7 +256,7 @@ end # Returns the offset of the new block added. # XXX rename to insertblock!, no need to return offset using .TypeParameterAccessors: unwrap_array_type -using .Expose: expose +using .Expose: Exposed, expose, unexpose function insertblock_offset!(T::BlockSparseTensor{ElT,N}, newblock::Block{N}) where {ElT,N} newdim = blockdim(T, newblock) newoffset = nnz(T) @@ -356,6 +356,30 @@ function dense(T::TensorT) where {TensorT<:BlockSparseTensor} return tensor(Dense(r), inds(T)) end +function diag(ETensor::Exposed{<:AbstractArray,<:BlockSparseTensor}) + tensor = unexpose(ETensor) + tensordiag = NDTensors.similar( + dense(typeof(tensor)), eltype(tensor), (diaglength(tensor),) + ) + for j in 1:diaglength(tensor) + @inbounds tensordiag[j] = getdiagindex(tensor, j) + end + return tensordiag +end + +## TODO currently this fails on GPU with scalar indexing +function map_diag!( + f::Function, + exposed_t_destination::Exposed{<:AbstractArray,<:BlockSparseTensor}, + exposed_t_source::Exposed{<:AbstractArray,<:BlockSparseTensor}, +) + t_destination = unexpose(exposed_t_destination) + t_source = unexpose(exposed_t_source) + for i in 1:diaglength(t_destination) + NDTensors.setdiagindex!(t_destination, f(NDTensors.getdiagindex(t_source, i)), i) + end + return t_destination +end # # Operations # diff --git a/NDTensors/src/dense/densetensor.jl b/NDTensors/src/dense/densetensor.jl index d73234a6a9..8e938cc387 100644 --- a/NDTensors/src/dense/densetensor.jl +++ b/NDTensors/src/dense/densetensor.jl @@ -68,6 +68,12 @@ convert(::Type{Array}, T::DenseTensor) = reshape(data(storage(T)), dims(inds(T)) # Useful for using Base Array functions array(T::DenseTensor) = convert(Array, T) +using .DiagonalArrays: DiagonalArrays, diagview + +function DiagonalArrays.diagview(T::DenseTensor) + return diagview(array(T)) +end + function Array{ElT,N}(T::DenseTensor{ElT,N}) where {ElT,N} return copy(array(T)) end diff --git a/NDTensors/src/diag/diagtensor.jl b/NDTensors/src/diag/diagtensor.jl index 7a5cddff0e..bfde30a397 100644 --- a/NDTensors/src/diag/diagtensor.jl +++ b/NDTensors/src/diag/diagtensor.jl @@ -1,4 +1,4 @@ -using .DiagonalArrays: diaglength +using .DiagonalArrays: diaglength, diagview const DiagTensor{ElT,N,StoreT,IndsT} = Tensor{ElT,N,StoreT,IndsT} where {StoreT<:Diag} const NonuniformDiagTensor{ElT,N,StoreT,IndsT} = @@ -9,9 +9,7 @@ const UniformDiagTensor{ElT,N,StoreT,IndsT} = function diag(tensor::DiagTensor) tensor_diag = NDTensors.similar(dense(typeof(tensor)), (diaglength(tensor),)) # TODO: Define `eachdiagindex`. - for j in 1:diaglength(tensor) - tensor_diag[j] = getdiagindex(tensor, j) - end + diagview(tensor_diag) .= diagview(tensor) return tensor_diag end @@ -33,6 +31,10 @@ function Array(T::DiagTensor{ElT,N}) where {ElT,N} return Array{ElT,N}(T) end +function DiagonalArrays.diagview(T::NonuniformDiagTensor) + return data(T) +end + function zeros(tensortype::Type{<:DiagTensor}, inds) return tensor(generic_zeros(storagetype(tensortype), mindim(inds)), inds) end @@ -110,32 +112,11 @@ end using .TypeParameterAccessors: unwrap_array_type # convert to Dense function dense(T::DiagTensor) - return dense(unwrap_array_type(T), T) -end - -# CPU version -function dense(::Type{<:Array}, T::DiagTensor) R = zeros(dense(typeof(T)), inds(T)) - for i in 1:diaglength(T) - setdiagindex!(R, getdiagindex(T, i), i) - end + diagview(R) .= diagview(T) return R end -# GPU version -function dense(::Type{<:AbstractArray}, T::DiagTensor) - D_cpu = dense(Array, cpu(T)) - return adapt(unwrap_array_type(T), D_cpu) -end - -# UniformDiag version -# TODO: Delete once new DiagonalArray is designed. -# TODO: This creates a tensor on CPU by default so may cause -# problems for GPU. -function dense(::Type{<:Number}, T::DiagTensor) - return dense(Tensor(Diag(fill(getdiagindex(T, 1), diaglength(T))), inds(T))) -end - denseblocks(T::DiagTensor) = dense(T) function permutedims!( @@ -145,16 +126,14 @@ function permutedims!( f::Function=(r, t) -> t, ) where {N} # TODO: check that inds(R)==permute(inds(T),perm)? - for i in 1:diaglength(R) - @inbounds setdiagindex!(R, f(getdiagindex(R, i), getdiagindex(T, i)), i) - end + diagview(R) .= f.(diagview(R), diagview(T)) return R end function permutedims( T::DiagTensor{<:Number,N}, perm::NTuple{N,Int}, f::Function=identity ) where {N} - R = NDTensors.similar(T, permute(inds(T), perm)) + R = NDTensors.similar(T) g(r, t) = f(t) permutedims!(R, T, perm, g) return R @@ -193,9 +172,7 @@ end function permutedims!( R::DenseTensor{ElR,N}, T::DiagTensor{ElT,N}, perm::NTuple{N,Int}, f::Function=(r, t) -> t ) where {ElR,ElT,N} - for i in 1:diaglength(T) - @inbounds setdiagindex!(R, f(getdiagindex(R, i), getdiagindex(T, i)), i) - end + diagview(R) .= f.(diagview(R), diagview(T)) return R end diff --git a/NDTensors/src/linearalgebra/linearalgebra.jl b/NDTensors/src/linearalgebra/linearalgebra.jl index bd1690d127..f038659475 100644 --- a/NDTensors/src/linearalgebra/linearalgebra.jl +++ b/NDTensors/src/linearalgebra/linearalgebra.jl @@ -369,7 +369,6 @@ matrix is unique. Returns a tuple (Q,R). function qr_positive(M::AbstractMatrix) sparseQ, R = qr(M) Q = convert(typeof(R), sparseQ) - nc = size(Q, 2) signs = nonzero_sign.(diag(R)) Q = Q * Diagonal(signs) R = Diagonal(conj.(signs)) * R diff --git a/NDTensors/src/tensor/tensor.jl b/NDTensors/src/tensor/tensor.jl index 4dfa483030..ac6fee9a6d 100644 --- a/NDTensors/src/tensor/tensor.jl +++ b/NDTensors/src/tensor/tensor.jl @@ -361,16 +361,18 @@ function getdiagindex(T::Tensor{<:Number,N}, ind::Int) where {N} return getindex(T, CartesianIndex(ntuple(_ -> ind, Val(N)))) end +using .Expose: Exposed, expose, unexpose # TODO: add support for off-diagonals, return # block sparse vector instead of dense. -function diag(tensor::Tensor) +diag(tensor::Tensor) = diag(expose(tensor)) + +function diag(ETensor::Exposed) + tensor = unexpose(ETensor) ## d = NDTensors.similar(T, ElT, (diaglength(T),)) tensordiag = NDTensors.similar( dense(typeof(tensor)), eltype(tensor), (diaglength(tensor),) ) - for n in 1:diaglength(tensor) - tensordiag[n] = tensor[n, n] - end + array(tensordiag) .= diagview(tensor) return tensordiag end @@ -384,6 +386,12 @@ function setdiagindex!(T::Tensor{<:Number,N}, val, ind::Int) where {N} return T end +function map_diag!(f::Function, exposed_t_destination::Exposed, exposed_t_source::Exposed) + diagview(unexpose(exposed_t_destination)) .= f.(diagview(unexpose(exposed_t_source))) + return unexpose(exposed_t_destination) +end +map_diag(f::Function, t::Tensor) = map_diag!(f, expose(copy(t)), expose(t)) + # # Some generic contraction functionality # diff --git a/NDTensors/test/test_blocksparse.jl b/NDTensors/test/test_blocksparse.jl index 3ed4379eba..b230effadc 100644 --- a/NDTensors/test/test_blocksparse.jl +++ b/NDTensors/test/test_blocksparse.jl @@ -10,6 +10,8 @@ using NDTensors: blockview, data, dense, + diag, + diaglength, dims, eachnzblock, inds, @@ -52,6 +54,8 @@ using Test: @test, @test_throws, @testset @test isblocknz(A, (1, 2)) @test !isblocknz(A, (1, 1)) @test !isblocknz(A, (2, 2)) + dA = diag(A) + @test @allowscalar dA ≈ diag(dense(A)) # Test different ways of getting nnz @test nnz(blockoffsets(A), inds(A)) == nnz(A) @@ -104,6 +108,10 @@ using Test: @test, @test_throws, @testset @allowscalar for I in eachindex(C) @test C[I] == A[I] + B[I] end + Cp = NDTensors.map_diag(i -> 2 * i, C) + @allowscalar for i in 1:diaglength(Cp) + @test Cp[i, i] == 2 * C[i, i] + end Ap = permutedims(A, (2, 1)) diff --git a/NDTensors/test/test_dense.jl b/NDTensors/test/test_dense.jl index 78368d2841..94c52f4132 100644 --- a/NDTensors/test/test_dense.jl +++ b/NDTensors/test/test_dense.jl @@ -48,6 +48,10 @@ NDTensors.dim(i::MyInd) = i.dim randn!(B) C = copy(A) C = permutedims!!(C, B, (1, 2), +) + Cp = NDTensors.map_diag(i -> 2 * i, C) + @allowscalar for i in 1:diaglength(Cp) + @test Cp[i, i] == 2 * C[i, i] + end Ap = permutedims(A, (2, 1)) @allowscalar begin diff --git a/NDTensors/test/test_diag.jl b/NDTensors/test/test_diag.jl index f6390a733f..1876919117 100644 --- a/NDTensors/test/test_diag.jl +++ b/NDTensors/test/test_diag.jl @@ -34,19 +34,39 @@ using LinearAlgebra: dot D = Tensor(Diag(1), (2, 2)) @test norm(D) == √2 d = 3 + ## TODO this fails because uniform diag tensors are immutable + #S = NDTensors.map_diag((i->i * 2), dev(D)) + # @allowscalar for i in 1:diaglength(S) + # @test S[i,i] == 2.0 * D[i,i] + # end + vr = rand(elt, d) D = dev(tensor(Diag(vr), (d, d))) Da = Array(D) Dm = Matrix(D) + Da = permutedims(D, (2, 1)) @allowscalar begin @test Da == NDTensors.LinearAlgebra.diagm(0 => vr) @test Da == NDTensors.LinearAlgebra.diagm(0 => vr) - ## TODO Currently this permutedims requires scalar indexing on GPU. - Da = permutedims(D, (2, 1)) @test Da == D end + # This if statement corresponds to the reported bug: + # https://github.com/JuliaGPU/Metal.jl/issues/364 + if !(dev == NDTensors.mtl && elt === ComplexF32) + S = permutedims(dev(D), (1, 2), sqrt) + @allowscalar begin + for i in 1:diaglength(S) + @test S[i, i] ≈ sqrt(D[i, i]) + end + end + end + S = NDTensors.map_diag(i -> 2 * i, dev(D)) + @allowscalar for i in 1:diaglength(S) + @test S[i, i] == 2 * D[i, i] + end + # Regression test for https://github.com/ITensor/ITensors.jl/issues/1199 S = dev(tensor(Diag(randn(elt, 2)), (2, 2))) ## This was creating a `Dense{ReshapedArray{Adjoint{Matrix}}}` which, in mul!, was diff --git a/jenkins/Jenkinsfile b/jenkins/Jenkinsfile index f1254db10f..c50a7c874c 100644 --- a/jenkins/Jenkinsfile +++ b/jenkins/Jenkinsfile @@ -27,7 +27,7 @@ pipeline { } steps { sh ''' - julia -e 'using Pkg; Pkg.activate(temp=true); Pkg.develop(path="./NDTensors"); Pkg.develop(path="."); Pkg.test("NDTensors"; test_args=["cuda"])' + julia -e 'using Pkg; Pkg.Registry.update(); Pkg.update(); Pkg.activate(temp=true); Pkg.develop(path="./NDTensors"); Pkg.develop(path="."); Pkg.test("NDTensors"; test_args=["cuda"])' ''' } } @@ -51,7 +51,7 @@ pipeline { } steps { sh ''' - julia -e 'using Pkg; Pkg.activate(temp=true); Pkg.develop(path="./NDTensors"); Pkg.develop(path="."); Pkg.test("NDTensors"; test_args=["cuda"])' + julia -e 'using Pkg; Pkg.Registry.update(); Pkg.update(); Pkg.activate(temp=true); Pkg.develop(path="./NDTensors"); Pkg.develop(path="."); Pkg.test("NDTensors"; test_args=["cuda"])' ''' } } @@ -75,7 +75,7 @@ pipeline { } steps { sh ''' - julia -e 'using Pkg; Pkg.activate(temp=true); Pkg.develop(path="./NDTensors"); Pkg.develop(path="."); Pkg.test("NDTensors"; test_args=["cutensor"])' + julia -e 'using Pkg; Pkg.Registry.update(); Pkg.update(); Pkg.activate(temp=true); Pkg.develop(path="./NDTensors"); Pkg.develop(path="."); Pkg.test("NDTensors"; test_args=["cutensor"])' ''' } }