Skip to content

Commit

Permalink
[NDTensorsCUDAExt] Fix bug in slicing Adjoint CuMatrix (#1236)
Browse files Browse the repository at this point in the history
  • Loading branch information
kmp5VT authored Nov 8, 2023
1 parent 3b1fa88 commit 8ba0e17
Show file tree
Hide file tree
Showing 18 changed files with 166 additions and 35 deletions.
3 changes: 2 additions & 1 deletion NDTensors/ext/NDTensorsCUDAExt/adapt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ buffertype(::NDTensorCuArrayAdaptor{B}) where {B} = B
function Adapt.adapt_storage(adaptor::NDTensorCuArrayAdaptor, xs::AbstractArray)
ElT = eltype(xs)
BufT = buffertype(adaptor)
return isbits(xs) ? xs : adapt(CuArray{ElT,1,BufT}, xs)
N = ndims(xs)
return isbits(xs) ? xs : adapt(CuArray{ElT,N,BufT}, xs)
end

function NDTensors.adapt_storagetype(
Expand Down
8 changes: 3 additions & 5 deletions NDTensors/ext/NDTensorsCUDAExt/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,12 @@ function setindex!(E::Exposed{<:CuArray}, x::Number)
return unexpose(E)
end

function Base.getindex(E::Exposed{<:CuArray,<:Adjoint}, I...)
Ap = parent(E)
return expose(Ap)[I...]
function Base.getindex(E::Exposed{<:CuArray,<:Adjoint}, i, j)
return (expose(parent(E))[j, i])'
end

function Base.copy(E::Exposed{<:CuArray,<:Base.ReshapedArray})
Ap = parent(E)
return copy(expose(Ap))
return reshape(copy(expose(parent(E))), size(unexpose(E)))
end

Base.any(f, E::Exposed{<:CuArray,<:NDTensors.Tensor}) = any(f, data(unexpose(E)))
Expand Down
3 changes: 3 additions & 0 deletions NDTensors/ext/examples/NDTensorCUDA.jl
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ function main()
f(A, B, C, D) = (A * B * C * D)[]

#Use Zygote to take the gradient of the four tensors on GPU
#Currently this code fails with CUDA.allowscalar(false)
# Because of outer calling the _gemm! function which calls a
# generic implementation
grad = gradient(f, cA, cB, cC, cD)
@test NDTensors.cpu(cB * cC * cD) NDTensors.cpu(grad[1])
@test (cB * cC * cD) grad[1]
Expand Down
2 changes: 0 additions & 2 deletions NDTensors/src/Unwrap/src/Unwrap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,4 @@ include("functions/permutedims.jl")
export IsWrappedArray,
is_wrapped_array, parenttype, unwrap_type, expose, Exposed, unexpose, cpu

## TODO write exposed based functions in the NDTensors Extensions when necessary

end
90 changes: 90 additions & 0 deletions NDTensors/src/Unwrap/test/runtests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
using Test
using NDTensors.Unwrap
using NDTensors
using LinearAlgebra

include("../../../test/device_list.jl")
@testset "Testing Unwrap" for dev in devices_list(ARGS)
v = dev(Vector{Float64}(undef, 10))
vt = transpose(v)
va = v'

E = expose(v)
Et = expose(vt)
Ea = expose(va)
v_type = typeof(v)
e_type = eltype(v)
@test typeof(E) == Exposed{v_type,v_type}
@test typeof(Et) == Exposed{v_type,LinearAlgebra.Transpose{e_type,v_type}}
@test typeof(Ea) == Exposed{v_type,LinearAlgebra.Adjoint{e_type,v_type}}

@test parent(E) == v
@test parent(Et) == v
@test parent(Ea) == v
@test transpose(E) == vt
@test cpu(E) == v
@test cpu(Et) == vt

m = reshape(v, (5, 2))
mt = transpose(m)
ma = m'
E = expose(m)
Et = expose(mt)
Ea = expose(ma)

m_type = typeof(m)
@test typeof(E) == Exposed{m_type,m_type}
@test typeof(Et) == Exposed{m_type,LinearAlgebra.Transpose{e_type,m_type}}
@test typeof(Ea) == Exposed{m_type,LinearAlgebra.Adjoint{e_type,m_type}}

o = dev(Vector{Float32})(undef, 1)
expose(o)[] = 2
@test expose(o)[] == 2

fill!(m, 0)
@test any(!Base.isinf, expose(m))

mp = copy(Ea)
@test mp == ma
fill!(ma, 2.0)
copyto!(expose(mp), expose(ma))
@test mp == ma

q, r = qr(expose(mp))
@test q * r mp

q, r = Unwrap.qr_positive(expose(mp))
@test q * r mp

square = dev(rand(Float64, (10, 10)))
square = (square + transpose(square)) ./ 2.0
## CUDA only supports Hermitian or Symmetric eigen decompositions
## So I symmetrize square and call symetric here
l, U = eigen(expose(Symmetric(square)))
@test square * U U * Diagonal(l)

U, S, V, = svd(expose(mp))
@test U * Diagonal(S) * V' mp

cm = dev(fill!(Matrix{Float64}(undef, (2, 2)), 0.0))
mul!(expose(cm), expose(mp), expose(mp'), 1.0, 0.0)
@test cm mp * mp'

@test permutedims(expose(mp), (2, 1)) == transpose(mp)
fill!(mt, 3.0)
permutedims!(expose(m), expose(mt), (2, 1))
@test norm(m) == sqrt(3^2 * 10)
@test size(m) == (5, 2)
permutedims!(expose(m), expose(mt), (2, 1), +)
@test size(m) == (5, 2)
@test norm(m) == sqrt(6^2 * 10)

m = reshape(m, (5, 2, 1))
mt = fill!(similar(m), 3.0)
m = permutedims(expose(m), (2, 1, 3))
@test size(m) == (2, 5, 1)
permutedims!(expose(m), expose(mt), (2, 1, 3))
@test norm(m) == sqrt(3^2 * 10)
permutedims!(expose(m), expose(mt), (2, 1, 3), -)
@test norm(m) == 0
end
3 changes: 3 additions & 0 deletions NDTensors/src/dense/tensoralgebra/outer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,12 @@ function outer!(
v1 = data(T1)
v2 = data(T2)
RM = reshape(R, length(v1), length(v2))
## Potential fix is call reshape on array
#RM = reshape(array(R), length(v1), length(v2))
#RM .= v1 .* transpose(v2)
#mul!(RM, v1, transpose(v2))
_gemm!('N', 'T', one(ElR), v1, v2, zero(ElR), RM)
#mul!!(RM, v1, transpose(v2), one(ElR), zero(ElR))
return R
end

Expand Down
4 changes: 3 additions & 1 deletion NDTensors/src/linearalgebra/linearalgebra.jl
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ function svd(
spec = Spectrum(P, truncerr)
dS = length(P)
if dS < length(MS)
MU = MU[:, 1:dS]
MU = expose(MU)[:, 1:dS]
# Fails on some GPU backends like Metal.
# resize!(MS, dS)
MS = MS[1:dS]
Expand Down Expand Up @@ -187,6 +187,7 @@ function eigen(
matrixT = matrix(T)
## TODO Here I am calling parent to ensure that the correct `any` function
## is envoked for non-cpu matrices
## TODO use expose here
if any(!isfinite, parent(matrixT))
throw(
ArgumentError(
Expand All @@ -195,6 +196,7 @@ function eigen(
)
end

### What do we do if DM is full of Nan or Inf?
DM, VM = eigen(expose(matrixT))

# Sort by largest to smallest eigenvalues
Expand Down
16 changes: 16 additions & 0 deletions NDTensors/test/ITensors/TestITensorDMRG/TestITensorDMRG.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
## This is getting closer still working on it. No need to review
## Failing for CUDA mostly with eigen (I believe there is some noise in
## eigen decomp with CUBLAS to give slightly different answer than BLAS)
module TestITensorDMRG
using Test
using ITensors
using NDTensors
using Random

reference_energies = Dict([
(4, -1.6160254037844384), (8, -3.374932598687889), (10, -4.258035207282885)
])

default_rtol(elt::Type) = 10^(0.75 * log10(eps(real(elt))))
include("dmrg.jl")
end
24 changes: 24 additions & 0 deletions NDTensors/test/ITensors/TestITensorDMRG/dmrg.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
function test_dmrg(elt, N::Integer, dev::Function)
sites = siteinds("S=1/2", N)

os = OpSum()
for j in 1:(N - 1)
os += "Sz", j, "Sz", j + 1
os += 0.5, "S+", j, "S-", j + 1
os += 0.5, "S-", j, "S+", j + 1
end

Random.seed!(1234)
psi0 = dev(randomMPS(Float64, sites; linkdims=4))
H = dev(MPO(elt, os, sites))

nsweeps = 3
cutoff = [1e-3, 1e-13]
noise = [1e-12, 0]
## running these with nsweeps = 100 and no maxdim
## all problems do not have a maxlinkdim > 32
maxdim = 32

energy, psi = dmrg(H, psi0; nsweeps, cutoff, maxdim, noise, outputlevel=0)
@test energy reference_energies[N] rtol = default_rtol(elt)
end
14 changes: 14 additions & 0 deletions NDTensors/test/ITensors/TestITensorDMRG/runtests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
using Test
using NDTensors
include("TestITensorDMRG.jl")

include("../../device_list.jl")

@testset "Testing DMRG different backends" begin
for dev in devices_list(ARGS),
N in [4, 10],
elt in (Float32, ComplexF32, Float64, ComplexF64)

TestITensorDMRG.test_dmrg(elt, N, dev)
end
end
1 change: 1 addition & 0 deletions NDTensors/test/ITensors/runtests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include("TestITensorDMRG/runtests.jl")
1 change: 1 addition & 0 deletions NDTensors/test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ ITensors = "9136182c-28ba-11e9-034c-db9fb085ebd5"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Octavian = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
TBLIS = "48530278-0828-4a49-9772-0f3830dfa1e9"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand Down
4 changes: 4 additions & 0 deletions NDTensors/test/Unwrap.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
using Test
using NDTensors

include(joinpath(pkgdir(NDTensors), "src", "Unwrap", "test", "runtests.jl"))
6 changes: 0 additions & 6 deletions NDTensors/test/blocksparse.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
using NDTensors
using LinearAlgebra
using Test
if "cuda" in ARGS || "all" in ARGS
using CUDA
end
if "metal" in ARGS || "all" in ARGS
using Metal
end

@testset "BlockSparseTensor basic functionality" begin
C = nothing
Expand Down
7 changes: 0 additions & 7 deletions NDTensors/test/combiner.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,6 @@ using Test
# Testing generic block indices
using ITensors: QN, Index

if "cuda" in ARGS || "all" in ARGS
using CUDA
end
if "metal" in ARGS || "all" in ARGS
using Metal
end

@testset "CombinerTensor basic functionality" begin
include("device_list.jl")
devs = devices_list(copy(ARGS))
Expand Down
6 changes: 0 additions & 6 deletions NDTensors/test/dense.jl
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
using NDTensors
using Test
if "cuda" in ARGS || "all" in ARGS
using CUDA
end
if "metal" in ARGS || "all" in ARGS
using Metal
end

@testset "Dense Tensors" begin
include("device_list.jl")
Expand Down
7 changes: 0 additions & 7 deletions NDTensors/test/linearalgebra.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,6 @@ using NDTensors
using LinearAlgebra
using Test

if "cuda" in ARGS || "all" in ARGS
using CUDA
end
if "metal" in ARGS || "all" in ARGS
using Metal
end

@testset "random_orthog" begin
n, m = 10, 4
O1 = random_orthog(n, m)
Expand Down
2 changes: 2 additions & 0 deletions NDTensors/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ end
"SmallVectors.jl",
"SortedSets.jl",
"TagSets.jl",
"Unwrap.jl",
"linearalgebra.jl",
"dense.jl",
"blocksparse.jl",
Expand All @@ -34,6 +35,7 @@ end
"emptystorage.jl",
"combiner.jl",
"arraytensor/arraytensor.jl",
"ITensors/runtests.jl",
]
println("Running $filename")
include(filename)
Expand Down

0 comments on commit 8ba0e17

Please sign in to comment.