diff --git a/BatchNormalization.lua b/BatchNormalization.lua index c353dc3..bcabdae 100644 --- a/BatchNormalization.lua +++ b/BatchNormalization.lua @@ -16,6 +16,7 @@ function BatchNormalization:__init(nFeature, eps, momentum, affine) self.eps = eps or 1e-5 self.train = true self.momentum = momentum or 0.1 + self.iSize = torch.LongStorage(self.nDim):fill(0) self.running_mean = torch.zeros(nFeature) self.running_std = torch.ones(nFeature) @@ -45,7 +46,7 @@ function BatchNormalization:createIODescriptors(input) 'Only CUDA tensors are supported for cudnn.BatchNormalization!') if not self.iDesc or not self.oDesc or not input:isSize(self.iSize) then local nFeature = self.running_mean:numel() - self.iSize = input:size() + self.iSize:copy(input:size()) self.output:resizeAs(input) self.gradInput:resizeAs(input) self.iDesc = cudnn.toDescriptor(input) diff --git a/Pooling.lua b/Pooling.lua index c2061e3..ac9143c 100644 --- a/Pooling.lua +++ b/Pooling.lua @@ -52,7 +52,7 @@ function Pooling:createIODescriptors(input) if not self.iDesc or not self.oDesc or input:size(1) ~= self.iSize[1] or input:size(2) ~= self.iSize[2] or input:size(3) ~= self.iSize[3] or input:size(4) ~= self.iSize[4] then - self.iSize = input:size() + self.iSize:copy(input:size()) -- resize gradInput self.gradInput:resizeAs(input) -- resize output diff --git a/Pooling3D.lua b/Pooling3D.lua index 072f2c3..c620e77 100644 --- a/Pooling3D.lua +++ b/Pooling3D.lua @@ -57,7 +57,7 @@ function Pooling:createIODescriptors(input) input:size(1) ~= self.iSize[1] or input:size(2) ~= self.iSize[2] or input:size(3) ~= self.iSize[3] or input:size(4) ~= self.iSize[4] or input:size(5) ~= self.iSize[5] then - self.iSize = input:size() + self.iSize:copy(input:size()) -- resize gradInput self.gradInput:resizeAs(input) -- resize output diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua index 71eaa69..e00b648 100644 --- a/SpatialConvolution.lua +++ b/SpatialConvolution.lua @@ -23,6 +23,7 @@ function SpatialConvolution:__init(nInputPlane, nOutputPlane, 'nOutputPlane should be divisible by nGroups') self.weight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kH, kW) self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kH, kW) + self.iSize = torch.LongStorage(4):fill(0) self:reset() -- should nil for serialization, the reset will still work self.reset = nil @@ -99,11 +100,10 @@ function SpatialConvolution:createIODescriptors(input) batch = false end assert(input:dim() == 4 and input:isContiguous()); - self.iSize = self.iSize or torch.LongStorage(4):fill(0) if not self.iDesc or not self.oDesc or input:size(1) ~= self.iSize[1] or input:size(2) ~= self.iSize[2] or input:size(3) ~= self.iSize[3] or input:size(4) ~= self.iSize[4] then - self.iSize = input:size() + self.iSize:copy(input:size()) assert(self.nInputPlane == input:size(2), 'input has to contain: ' .. self.nInputPlane diff --git a/SpatialCrossMapLRN.lua b/SpatialCrossMapLRN.lua index aefe618..06954e8 100644 --- a/SpatialCrossMapLRN.lua +++ b/SpatialCrossMapLRN.lua @@ -8,6 +8,7 @@ function LRN:__init(size, alpha, beta, k) self.alpha = alpha or 1e-4 self.beta = beta or 0.75 self.k = k or 1.0 + self.iSize = torch.LongStorage(4):fill(0) assert(self.size >= 1 and self.size <= 16, "size has to be between 1 and 16") assert(self.k >= 1e-5, "k has to be greater than 1e-5") assert(self.beta >= 0.01, "Beta has to be > 0.01") @@ -35,7 +36,7 @@ function LRN:createIODescriptors(input) if not self.iDesc or input:size(1) ~= self.iSize[1] or input:size(2) ~= self.iSize[2] or input:size(3) ~= self.iSize[3] or input:size(4) ~= self.iSize[4] then - self.iSize = input:size() + self.iSize:copy(input:size()) self.gradInput:resizeAs(input) self.output:resizeAs(input) diff --git a/SpatialDivisiveNormalization.lua b/SpatialDivisiveNormalization.lua index 3462b61..12a5bd3 100644 --- a/SpatialDivisiveNormalization.lua +++ b/SpatialDivisiveNormalization.lua @@ -8,6 +8,7 @@ function DivisiveNorm:__init(size, alpha, beta, K) self.alpha = alpha or 1e-4 self.beta = beta or 0.75 self.K = K or 2.0 + self.iSize = torch.LongStorage(4):fill(0) assert(self.size >= 1 and self.size <= 16, "size has to be between 1 and 16") assert(self.K >= 1e-5, "K has to be greater than 1e-5") assert(self.beta >= 0.01, "Beta has to be > 0.01") @@ -35,7 +36,7 @@ function DivisiveNorm:createIODescriptors(input) if not self.iDesc or input:size(1) ~= self.iSize[1] or input:size(2) ~= self.iSize[2] or input:size(3) ~= self.iSize[3] or input:size(4) ~= self.iSize[4] then - self.iSize = input:size() + self.iSize:copy(input:size()) self.gradInput:resizeAs(input) self.output:resizeAs(input) diff --git a/SpatialFullConvolution.lua b/SpatialFullConvolution.lua index d00a8a2..dbd5795 100644 --- a/SpatialFullConvolution.lua +++ b/SpatialFullConvolution.lua @@ -8,6 +8,11 @@ autotunerCache[1] = {} -- forward autotunerCache[2] = {} -- backwardFilter autotunerCache[3] = {} -- backwardData +function SpatialFullConvolution:__init(...) + parent.__init(self, ...) + self.iSize = torch.LongStorage(4):fill(0) +end + -- if you change the configuration of the module manually, call this function SpatialFullConvolution:resetWeightDescriptors() assert(torch.typename(self.weight) == 'torch.CudaTensor', @@ -69,11 +74,10 @@ function SpatialFullConvolution:createIODescriptors(input) batch = false end assert(input:dim() == 4 and input:isContiguous()); - self.iSize = self.iSize or torch.LongStorage(4):fill(0) if not self.iDesc or not self.oDesc or input:size(1) ~= self.iSize[1] or input:size(2) ~= self.iSize[2] or input:size(3) ~= self.iSize[3] or input:size(4) ~= self.iSize[4] then - self.iSize = input:size() + self.iSize:copy(input:size()) -- resize gradInput if self.gradInput then self.gradInput:resizeAs(input); end @@ -309,7 +313,7 @@ function SpatialFullConvolution:updateOutput(input) self:createIODescriptors(input) -- Because SpatialFullConvolution is performing the adjoint of the forward - -- convolution operator, we need to swap the forward and backward passes. + -- convolution operator, we need to swap the forward and backward passes. errcheck('cudnnConvolutionBackwardData', cudnn.getHandle(), one:data(), self.weightDesc[0], self.weight:data(), diff --git a/SpatialSoftMax.lua b/SpatialSoftMax.lua index 493477f..48ef9ac 100644 --- a/SpatialSoftMax.lua +++ b/SpatialSoftMax.lua @@ -8,13 +8,13 @@ function SpatialSoftMax:__init(fast) else self.algorithm = 'CUDNN_SOFTMAX_ACCURATE' end + self.iSize = torch.LongStorage(4):fill(0) end function SpatialSoftMax:createIODescriptors(input) self.mode = self.mode or 'CUDNN_SOFTMAX_MODE_CHANNEL' -- after converting from nn use accurate self.algorithm = self.algorithm or 'CUDNN_SOFTMAX_ACCURATE' - self.iSize = self.iSize or torch.LongStorage(4):fill(0) local batch = true local singleDim = false @@ -34,7 +34,7 @@ function SpatialSoftMax:createIODescriptors(input) if not self.iDesc or not self.oDesc or input:size(1) ~= self.iSize[1] or input:size(2) ~= self.iSize[2] or input:size(3) ~= self.iSize[3] or input:size(4) ~= self.iSize[4] then - self.iSize = input:size() + self.iSize:copy(input:size()) self.gradInput:resizeAs(input) self.output:resizeAs(input) self.iDesc = cudnn.toDescriptor(input) diff --git a/TemporalConvolution.lua b/TemporalConvolution.lua index a9e6470..5a26da3 100644 --- a/TemporalConvolution.lua +++ b/TemporalConvolution.lua @@ -19,6 +19,7 @@ function TemporalConvolution:__init(inputFrameSize, outputFrameSize, self.gradWeight = self.gradWeight:view(outputFrameSize, inputFrameSize*kH) --self.dW and self.kW now have different meaning than in nn.TemporalConvolution, because --W and H are switched in temporal and spatial + self.iSize = torch.LongStorage(4):fill(0) end function TemporalConvolution:createIODescriptors(input) @@ -27,6 +28,7 @@ function TemporalConvolution:createIODescriptors(input) input:size(1) ~= self.iSize[1] or input:size(2) ~= self.iSize[2] or input:size(3) ~= self.iSize[3] or input:size(4) ~= self.iSize[4] then sizeChanged = true + self.iSize:copy(input:size()) end cudnn.SpatialConvolution.createIODescriptors(self,input) if sizeChanged then diff --git a/VolumetricConvolution.lua b/VolumetricConvolution.lua index 1f8025c..b7c2629 100644 --- a/VolumetricConvolution.lua +++ b/VolumetricConvolution.lua @@ -3,6 +3,11 @@ local VolumetricConvolution, parent local ffi = require 'ffi' local errcheck = cudnn.errcheck +function VolumetricConvolution:__init(...) + parent.__init(self, ...) + self.iSize = torch.LongStorage(5):fill(0) +end + -- if you change the configuration of the module manually, call this function VolumetricConvolution:resetWeightDescriptors() assert(torch.typename(self.weight) == 'torch.CudaTensor', @@ -30,7 +35,7 @@ end function VolumetricConvolution:fastest(mode) if mode == nil then mode = true end self.fastest_mode = mode - self.iSize = self.iSize or torch.LongStorage(4) + self.iSize = self.iSize or torch.LongStorage(5) self.iSize:fill(0) return self end @@ -45,7 +50,7 @@ function VolumetricConvolution:setMode(fmode, bdmode, bwmode) if bwmode ~= nil then self.bwmode = bwmode end - self.iSize = self.iSize or torch.LongStorage(4) + self.iSize = self.iSize or torch.LongStorage(5) self.iSize:fill(0) return self end @@ -65,12 +70,11 @@ function VolumetricConvolution:createIODescriptors(input) batch = false end assert(input:dim() == 5 and input:isContiguous()); - self.iSize = self.iSize or torch.LongStorage(4):fill(0) if not self.iDesc or not self.oDesc or input:size(1) ~= self.iSize[1] or input:size(2) ~= self.iSize[2] or input:size(3) ~= self.iSize[3] or input:size(4) ~= self.iSize[4] or input:size(5) ~= self.iSize[5] then - self.iSize = input:size() + self.iSize:copy(input:size()) -- resize gradInput if self.gradInput then self.gradInput:resizeAs(input); end -- create input descriptor