diff --git a/CMakeLists.txt b/CMakeLists.txt index c4e6e2d92..6b02b28d2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -118,7 +118,7 @@ if(ENABLE_PYTHON_SUPPORT) GIT_REPOSITORY git://github.com/pvieito/PythonKit GIT_TAG - master + 6a05a15 CMAKE_ARGS -D BUILD_SHARED_LIBS=YES -D CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} @@ -189,7 +189,7 @@ if(NOT X10_FOUND AND NOT USE_BUNDLED_X10) COMMAND rm -rf /bazel-bin # ${CMAKE_COMMAND} -E rm -Rrf /bazel-bin COMMAND - bazel build ${VISIBILITY_FLAGS} -c opt --define framework_shared_object=false //tensorflow/compiler/tf2xla/xla_tensor:x10 --nocheck_visibility + bazel build ${VISIBILITY_FLAGS} -c opt --define framework_shared_object=false //tensorflow:tensorflow //tensorflow/compiler/tf2xla/xla_tensor:x10 --nocheck_visibility COMMAND bazel shutdown INSTALL_COMMAND diff --git a/Documentation/X10/SUMMARY.md b/Documentation/X10/SUMMARY.md index 1beaa6aa0..2d00e42fc 100644 --- a/Documentation/X10/SUMMARY.md +++ b/Documentation/X10/SUMMARY.md @@ -22,7 +22,7 @@ public struct MyModel: Layer { public var dense3 = Dense(inputSize: 4, outputSize: 4) public var flatten = Flatten() - @differentiable + @differentiable(reverse) public func callAsFunction(_ input: Tensor) -> Tensor { let layer1 = dense1(input) let layer2 = layer1.reshaped(to: [1, 4]) diff --git a/README.md b/README.md index 8c795d224..1f9ec86fe 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ struct Model: Layer { var layer2 = Dense(inputSize: hiddenSize, outputSize: hiddenSize, activation: relu) var layer3 = Dense(inputSize: hiddenSize, outputSize: 3, activation: identity) - @differentiable + @differentiable(reverse) func callAsFunction(_ input: Tensor) -> Tensor { return input.sequenced(through: layer1, layer2, layer3) } diff --git a/Sources/TensorFlow/BackwardsCompatibility.swift b/Sources/TensorFlow/BackwardsCompatibility.swift index 3752a22b3..3aaf26adb 100644 --- a/Sources/TensorFlow/BackwardsCompatibility.swift +++ b/Sources/TensorFlow/BackwardsCompatibility.swift @@ -23,8 +23,8 @@ import _Differentiation /// - Parameters: /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func l1Loss( predicted: Tensor, expected: Tensor @@ -37,8 +37,8 @@ public func l1Loss( /// - Parameters: /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func l2Loss( predicted: Tensor, expected: Tensor @@ -51,8 +51,8 @@ public func l2Loss( /// - Parameters: /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func hingeLoss( predicted: Tensor, expected: Tensor @@ -65,8 +65,8 @@ public func hingeLoss( /// - Parameters: /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func squaredHingeLoss( predicted: Tensor, expected: Tensor @@ -79,8 +79,8 @@ public func squaredHingeLoss( /// - Parameters: /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func categoricalHingeLoss( predicted: Tensor, expected: Tensor @@ -94,8 +94,8 @@ public func categoricalHingeLoss( /// - Parameters: /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func logCoshLoss( predicted: Tensor, expected: Tensor @@ -108,8 +108,8 @@ public func logCoshLoss( /// - Parameters: /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func poissonLoss( predicted: Tensor, expected: Tensor @@ -123,8 +123,8 @@ public func poissonLoss( /// - Parameters: /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func kullbackLeiblerDivergence( predicted: Tensor, expected: Tensor @@ -137,7 +137,7 @@ public func kullbackLeiblerDivergence( /// - Parameters: /// - logits: One-hot encoded outputs from a neural network. /// - labels: Indices (zero-indexed) of the correct outputs. -@differentiable(wrt: logits) +@differentiable(reverse, wrt: logits) public func softmaxCrossEntropy( logits: Tensor, probabilities: Tensor @@ -149,8 +149,8 @@ public func softmaxCrossEntropy( /// - Parameters: /// - logits: The unscaled output of a neural network. /// - labels: Integer values that correspond to the correct output. -@differentiable(wrt: logits) -@differentiable(wrt: (logits, labels)) +@differentiable(reverse, wrt: logits) +@differentiable(reverse, wrt: (logits, labels)) public func sigmoidCrossEntropy( logits: Tensor, labels: Tensor diff --git a/Sources/TensorFlow/Core/DifferentialOperators.swift b/Sources/TensorFlow/Core/DifferentialOperators.swift index d3030892c..cfd6ce810 100644 --- a/Sources/TensorFlow/Core/DifferentialOperators.swift +++ b/Sources/TensorFlow/Core/DifferentialOperators.swift @@ -23,10 +23,10 @@ import _Differentiation @inlinable public func valueWithGradient( at x: T, - in f: @differentiable (T) -> Tensor + in f: @differentiable(reverse) (T) -> Tensor ) -> (value: Tensor, gradient: T.TangentVector) where T: Differentiable, R: TensorFlowFloatingPoint { - let (y, pullback) = valueWithPullback(at: x, in: f) + let (y, pullback) = valueWithPullback(at: x, of: f) precondition( y.rank == 0, """ @@ -40,10 +40,10 @@ where T: Differentiable, R: TensorFlowFloatingPoint { public func valueWithGradient( at x: T, _ y: U, - in f: @differentiable (T, U) -> Tensor + in f: @differentiable(reverse) (T, U) -> Tensor ) -> (value: Tensor, gradient: (T.TangentVector, U.TangentVector)) where T: Differentiable, U: Differentiable, R: TensorFlowFloatingPoint { - let (y, pullback) = valueWithPullback(at: x, y, in: f) + let (y, pullback) = valueWithPullback(at: x, y, of: f) precondition( y.rank == 0, """ @@ -58,10 +58,10 @@ public func valueWithGradient( at x: T, _ y: U, _ z: V, - in f: @differentiable (T, U, V) -> Tensor + in f: @differentiable(reverse) (T, U, V) -> Tensor ) -> (value: Tensor, gradient: (T.TangentVector, U.TangentVector, V.TangentVector)) where T: Differentiable, U: Differentiable, V: Differentiable, R: TensorFlowFloatingPoint { - let (y, pullback) = valueWithPullback(at: x, y, z, in: f) + let (y, pullback) = valueWithPullback(at: x, y, z, of: f) precondition(y.rank == 0) return (y, pullbackOfOneLikeY(y: y, pullback: pullback)) } @@ -70,7 +70,7 @@ where T: Differentiable, U: Differentiable, V: Differentiable, R: TensorFlowFloa @inlinable public func valueWithGradient( - of f: @escaping @differentiable (T) -> Tensor + of f: @escaping @differentiable(reverse) (T) -> Tensor ) -> (T) -> (value: Tensor, gradient: T.TangentVector) where T: Differentiable, R: TensorFlowFloatingPoint { return { x in valueWithGradient(at: x, in: f) } @@ -78,7 +78,7 @@ where T: Differentiable, R: TensorFlowFloatingPoint { @inlinable public func valueWithGradient( - of f: @escaping @differentiable (T, U) -> Tensor + of f: @escaping @differentiable(reverse) (T, U) -> Tensor ) -> (T, U) -> (value: Tensor, gradient: (T.TangentVector, U.TangentVector)) where T: Differentiable, U: Differentiable, R: TensorFlowFloatingPoint { return { x, y in valueWithGradient(at: x, y, in: f) } @@ -86,7 +86,7 @@ where T: Differentiable, U: Differentiable, R: TensorFlowFloatingPoint { @inlinable public func valueWithGradient( - of f: @escaping @differentiable (T, U, V) -> Tensor + of f: @escaping @differentiable(reverse) (T, U, V) -> Tensor ) -> (T, U, V) -> ( value: Tensor, gradient: (T.TangentVector, U.TangentVector, V.TangentVector) @@ -100,7 +100,7 @@ where T: Differentiable, U: Differentiable, V: Differentiable, R: TensorFlowFloa @inlinable public func gradient( at x: T, - in f: @differentiable (T) -> Tensor + in f: @differentiable(reverse) (T) -> Tensor ) -> T.TangentVector where T: Differentiable, R: TensorFlowFloatingPoint { return valueWithGradient(at: x, in: f).1 } @@ -109,7 +109,7 @@ public func gradient( public func gradient( at x: T, _ y: U, - in f: @differentiable (T, U) -> Tensor + in f: @differentiable(reverse) (T, U) -> Tensor ) -> (T.TangentVector, U.TangentVector) where T: Differentiable, U: Differentiable, R: TensorFlowFloatingPoint { return valueWithGradient(at: x, y, in: f).1 @@ -120,7 +120,7 @@ public func gradient( at x: T, _ y: U, _ z: V, - in f: @differentiable (T, U, V) -> Tensor + in f: @differentiable(reverse) (T, U, V) -> Tensor ) -> (T.TangentVector, U.TangentVector, V.TangentVector) where T: Differentiable, U: Differentiable, V: Differentiable, R: TensorFlowFloatingPoint { return valueWithGradient(at: x, y, z, in: f).1 @@ -130,14 +130,14 @@ where T: Differentiable, U: Differentiable, V: Differentiable, R: TensorFlowFloa @inlinable public func gradient( - of f: @escaping @differentiable (T) -> Tensor + of f: @escaping @differentiable(reverse) (T) -> Tensor ) -> (T) -> T.TangentVector where T: Differentiable, R: TensorFlowFloatingPoint { return { x in gradient(at: x, in: f) } } @inlinable public func gradient( - of f: @escaping @differentiable (T, U) -> Tensor + of f: @escaping @differentiable(reverse) (T, U) -> Tensor ) -> (T, U) -> (T.TangentVector, U.TangentVector) where T: Differentiable, U: Differentiable, R: TensorFlowFloatingPoint { return { x, y in gradient(at: x, y, in: f) } @@ -145,7 +145,7 @@ where T: Differentiable, U: Differentiable, R: TensorFlowFloatingPoint { @inlinable public func gradient( - of f: @escaping @differentiable (T, U, V) -> Tensor + of f: @escaping @differentiable(reverse) (T, U, V) -> Tensor ) -> (T, U, V) -> (T.TangentVector, U.TangentVector, V.TangentVector) where T: Differentiable, U: Differentiable, V: Differentiable, R: TensorFlowFloatingPoint { return { x, y, z in gradient(at: x, y, z, in: f) } diff --git a/Sources/TensorFlow/Core/MixedPrecision.swift b/Sources/TensorFlow/Core/MixedPrecision.swift index 97d5d98ff..ea58503f8 100644 --- a/Sources/TensorFlow/Core/MixedPrecision.swift +++ b/Sources/TensorFlow/Core/MixedPrecision.swift @@ -153,7 +153,7 @@ extension Tensor { /// Promotes a scalar to a tensor with the same device and precision as the given tensor. // TODO (SR-12968): Mark `tensor` with `@noDerivative` and remove custom vjp below. - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public init(_ value: Scalar, deviceAndPrecisionLike tensor: Tensor) { let device = tensor.device let tmp = Tensor(value, on: device) diff --git a/Sources/TensorFlow/Core/Tensor.swift b/Sources/TensorFlow/Core/Tensor.swift index b376aa6de..497f02afa 100644 --- a/Sources/TensorFlow/Core/Tensor.swift +++ b/Sources/TensorFlow/Core/Tensor.swift @@ -40,12 +40,12 @@ public struct Tensor { @usableFromInline internal var _isScalarZero = false - /// An internal workaround for SR-13263: debug info generation crash. - @usableFromInline - class SR13263Workaround {} + // /// An internal workaround for SR-13263: debug info generation crash. + // @usableFromInline + // class SR13263Workaround {} - /// An internal workaround for SR-13263: debug info generation crash. - internal var _sr13263Workaround: SR13263Workaround? + // /// An internal workaround for SR-13263: debug info generation crash. + // internal var _sr13263Workaround: SR13263Workaround? @inlinable public init(handle: TensorHandle) { @@ -132,7 +132,7 @@ extension Tensor { /// Reshape to scalar. /// - Precondition: The tensor has exactly one scalar. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public func scalarized() -> Scalar { precondition( shape.contiguousSize == 1, @@ -174,7 +174,7 @@ extension Tensor { return handle.makeHostCopy() } - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public var scalars: [Scalar] { if handle.backend == .XLA { let (storage, _) = xlaTensor.fetchTensorValues(Scalar.self) @@ -203,7 +203,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { extension Tensor { /// Creates a 0-D tensor from a scalar value. - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public init(_ value: Scalar, on device: Device = .default) { switch device.backend { case .XLA: @@ -227,7 +227,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { extension Tensor { /// Creates a 1D tensor from scalars. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public init(_ scalars: [Scalar], on device: Device = .default) { self.init(shape: [scalars.count], scalars: scalars, on: device) } @@ -247,7 +247,7 @@ extension Tensor { /// - scalars: The scalar contents of the tensor. /// - Precondition: The product of the dimensions of the shape must equal the number of scalars. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public init(shape: TensorShape, scalars: [Scalar], on device: Device = .default) { precondition( shape.contiguousSize == scalars.count, @@ -628,7 +628,7 @@ extension Tensor: AdditiveArithmetic where Scalar: Numeric { /// Adds two tensors and produces their sum. /// - Note: `+` supports broadcasting. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public static func + (lhs: Tensor, rhs: Tensor) -> Tensor { if lhs._isScalarZero { return rhs @@ -641,7 +641,7 @@ extension Tensor: AdditiveArithmetic where Scalar: Numeric { /// Subtracts one tensor from another and produces their difference. /// - Note: `-` supports broadcasting. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public static func - (lhs: Tensor, rhs: Tensor) -> Tensor { if rhs._isScalarZero { return lhs @@ -745,7 +745,7 @@ public protocol TensorProtocol { public protocol DifferentiableTensorProtocol: TensorProtocol & Differentiable & EuclideanDifferentiable where Scalar: TensorFlowFloatingPoint { - @differentiable(wrt: self) + @differentiable(reverse, wrt: self) func annotate(_ annotation: String) -> Self } @@ -773,7 +773,7 @@ where Scalar: TensorFlowFloatingPoint { /// /// - Parameter annotation: The annotation to be added. /// - Returns: The annotated tensor. - @differentiable(wrt: self) + @differentiable(reverse, wrt: self) public func annotate(_ annotation: String) -> Tensor { switch handle.backend { case .XLA: diff --git a/Sources/TensorFlow/Initializers.swift b/Sources/TensorFlow/Initializers.swift index 33a703d0d..e4ffea51c 100644 --- a/Sources/TensorFlow/Initializers.swift +++ b/Sources/TensorFlow/Initializers.swift @@ -36,7 +36,7 @@ extension Tensor { /// - repeatedValue: The scalar value to repeat. /// - shape: The dimensions of the tensor. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public init( repeating repeatedValue: Scalar, shape: TensorShape, on device: Device = .default @@ -49,7 +49,7 @@ extension Tensor { /// Creates a tensor by broadcasting the given scalar to a given rank with /// all dimensions being 1. @inlinable - // @differentiable(where Scalar: TensorFlowFloatingPoint) + // @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public init(broadcasting scalar: Scalar, rank: Int, on device: Device = .default) { self = Tensor(scalar, on: device).reshaped(to: TensorShape(repeating: 1, count: rank)) } @@ -93,7 +93,7 @@ extension Tensor where Scalar: Numeric { /// Perform an element-wise conversion from another `Tensor`. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint, OtherScalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint, OtherScalar: TensorFlowFloatingPoint) public init(_ other: Tensor) { self = _Raw.cast(other) } @@ -116,7 +116,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { extension Tensor { /// Creates a tensor from an array of tensors (which may themselves be scalars). @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public init(_ elements: [Tensor]) { self = _Raw.pack(elements) } @@ -150,7 +150,7 @@ extension Tensor { /// /// - Returns: The stacked tensor. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public init(stacking tensors: [Tensor], alongAxis axis: Int = 0) { self = _Raw.pack(tensors, axis: Int64(axis)) } @@ -188,7 +188,7 @@ extension Tensor { /// /// - Returns: The concatenated tensor. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public init(concatenating tensors: [Tensor], alongAxis axis: Int = 0) { precondition(tensors.count > 0) self = _Raw.concatV2(tensors, axis: Tensor(Int32(axis), on: tensors.first!.device)) diff --git a/Sources/TensorFlow/Layer.swift b/Sources/TensorFlow/Layer.swift index 44aa6fdca..884696709 100644 --- a/Sources/TensorFlow/Layer.swift +++ b/Sources/TensorFlow/Layer.swift @@ -32,14 +32,14 @@ where /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable(wrt: self) + @differentiable(reverse, wrt: self) func callAsFunction(_ input: Input) -> Output /// Returns the output obtained from applying the layer to the given input. /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable(wrt: self) + @differentiable(reverse, wrt: self) func forward(_ input: Input) -> Output } @@ -48,7 +48,7 @@ extension Module { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable(wrt: self) + @differentiable(reverse, wrt: self) public func forward(_ input: Input) -> Output { return callAsFunction(input) } @@ -60,7 +60,7 @@ extension Module where Input: TensorProtocol, Output: DifferentiableTensorProtoc /// /// - Parameter input: The input to the layer. /// - Returns: The annotated output. - @differentiable(wrt: self) + @differentiable(reverse, wrt: self) public func callAsFunction(_ input: Input) -> Output { let activation = forward(input) return annotated(activation) @@ -72,7 +72,7 @@ extension Module where Input: TensorProtocol, Output: DifferentiableTensorProtoc /// /// - Parameter output: The output to the layer. /// - Returns: The annotated output. - @differentiable + @differentiable(reverse) public func annotated(_ output: Output) -> Output { let annotated = output.annotate("type=\(Self.self)") return annotated @@ -153,31 +153,19 @@ public protocol Layer: Module where Input: Differentiable { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable + @differentiable(reverse) func callAsFunction(_ input: Input) -> Output - - @differentiable - func forward(_ input: Input) -> Output } -extension Layer { - // Workaround for SR-13455: autodiff undefined symbol linker error. - @differentiable(wrt: self) - @differentiable - public func forward(_ input: Input) -> Output { - return callAsFunction(input) - } -} -extension Layer where Input: DifferentiableTensorProtocol, Output: DifferentiableTensorProtocol { - // Workaround for SR-13455: autodiff undefined symbol linker error. - @differentiable(wrt: self) - @differentiable - public func callAsFunction(_ input: Input) -> Output { - let activation = forward(input) - return annotated(activation) - } -} +// extension Layer where Input: DifferentiableTensorProtocol, Output: DifferentiableTensorProtocol { +// // Workaround for SR-13455: autodiff undefined symbol linker error. +// @differentiable(reverse, wrt: self) +// public func callAsFunction(_ input: Input) -> Output { +// let activation = callAsFunction(input) +// return annotated(activation) +// } +// } /// An empty struct representing empty `TangentVector`s for parameterless layers. public struct EmptyTangentVector: EuclideanDifferentiable, VectorProtocol, ElementaryFunctions, @@ -200,12 +188,11 @@ public struct EmptyTangentVector: EuclideanDifferentiable, VectorProtocol, Eleme /// /// The `TangentVector` of parameterless layers is always `EmptyTangentVector`. public protocol ParameterlessLayer: Layer where TangentVector == EmptyTangentVector { - @differentiable - func callAsFunction(_ input: Input) -> Output + @differentiable(reverse) func callAsFunction(_ input: Input) -> Output } extension ParameterlessLayer { - public mutating func move(along direction: EmptyTangentVector) {} + public mutating func move(by direction: EmptyTangentVector) {} public var differentiableVectorView: EmptyTangentVector { EmptyTangentVector() } } @@ -269,7 +256,7 @@ extension Differentiable { /// - l1: The first layer. /// - l2: The second layer. /// - Returns: The final layer's output after sequential application. - @differentiable + @differentiable(reverse) public func sequenced(through l1: L1, _ l2: L2) -> L2.Output where L1.Input == Self, L1.Output == L2.Input { let o1 = l1(self) @@ -284,7 +271,7 @@ extension Differentiable { /// - l2: The second layer. /// - l3: The third layer. /// - Returns: The final layer's output after sequential application. - @differentiable + @differentiable(reverse) public func sequenced(through l1: L1, _ l2: L2, _ l3: L3) -> L3.Output where L1.Input == Self, L1.Output == L2.Input, L2.Output == L3.Input { @@ -302,7 +289,7 @@ extension Differentiable { /// - l3: The third layer. /// - l4: The fourth layer. /// - Returns: The final layer's output after sequential application. - @differentiable + @differentiable(reverse) public func sequenced( through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4 ) -> L4.Output @@ -326,7 +313,7 @@ extension Differentiable { /// - l4: The third layer. /// - l5: The fifth layer. /// - Returns: The final layer's output after sequential application. - @differentiable + @differentiable(reverse) public func sequenced( through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4, _ l5: L5 ) -> L5.Output @@ -352,7 +339,7 @@ extension Differentiable { /// - l5: The fifth layer. /// - l6: The sixth layer. /// - Returns: The final layer's output after sequential application. - @differentiable + @differentiable(reverse) public func sequenced( through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4, _ l5: L5, _ l6: L6 ) -> L6.Output diff --git a/Sources/TensorFlow/Layers/Convolutional.swift b/Sources/TensorFlow/Layers/Convolutional.swift index 024533572..3a5cc1331 100644 --- a/Sources/TensorFlow/Layers/Convolutional.swift +++ b/Sources/TensorFlow/Layers/Convolutional.swift @@ -36,7 +36,7 @@ public struct Conv1D: Layer { @noDerivative private let useBias: Bool /// The element-wise activation function type. - public typealias Activation = @differentiable (Tensor) -> Tensor + public typealias Activation = @differentiable(reverse) (Tensor) -> Tensor /// Creates a `Conv1D` layer with the specified filter, bias, activation function, stride, /// dilation and padding. @@ -79,8 +79,8 @@ public struct Conv1D: Layer { /// - Returns: The output of shape [batch size, output width, output channel count]. /// /// - Note: Padding size equals zero when using `.valid`. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { let conv = conv1D( input, filter: filter, @@ -149,7 +149,7 @@ public struct Conv2D: Layer { @noDerivative private let useBias: Bool /// The element-wise activation function type. - public typealias Activation = @differentiable (Tensor) -> Tensor + public typealias Activation = @differentiable(reverse) (Tensor) -> Tensor /// Creates a `Conv2D` layer with the specified filter, bias, activation function, strides, /// dilations and padding. @@ -201,8 +201,8 @@ public struct Conv2D: Layer { /// [batch count, output height, output width, output channel count]. /// /// - Note: Padding size equals zero when using `.valid`. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { let conv = conv2D( input, filter: filter, @@ -273,7 +273,7 @@ public struct Conv3D: Layer { @noDerivative private let useBias: Bool /// The element-wise activation function type. - public typealias Activation = @differentiable (Tensor) -> Tensor + public typealias Activation = @differentiable(reverse) (Tensor) -> Tensor /// Creates a `Conv3D` layer with the specified filter, bias, activation function, strides, and /// padding. @@ -332,8 +332,8 @@ public struct Conv3D: Layer { /// [batch count, output depth, output height, output width, output channel count]. /// /// - Note: Padding size equals zero when using `.valid`. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { let conv = conv3D( input, filter: filter, @@ -405,7 +405,7 @@ public struct TransposedConv1D: Layer { @noDerivative private let useBias: Bool /// The element-wise activation function type. - public typealias Activation = @differentiable (Tensor) -> Tensor + public typealias Activation = @differentiable(reverse) (Tensor) -> Tensor /// Creates a `TransposedConv1D` layer with the specified filter, bias, /// activation function, strides, and padding. @@ -436,8 +436,8 @@ public struct TransposedConv1D: Layer { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { let batchSize = input.shape[0] let w = (input.shape[1] - (1 * paddingIndex)) * stride + (filter.shape[0] * paddingIndex) let c = filter.shape[2] @@ -506,7 +506,7 @@ public struct TransposedConv2D: Layer { @noDerivative private let useBias: Bool /// The element-wise activation function type. - public typealias Activation = @differentiable (Tensor) -> Tensor + public typealias Activation = @differentiable(reverse) (Tensor) -> Tensor /// Creates a `TransposedConv2D` layer with the specified filter, bias, /// activation function, strides, and padding. @@ -538,8 +538,8 @@ public struct TransposedConv2D: Layer { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { let batchSize = input.shape[0] let h = (input.shape[1] - (1 * paddingIndex)) * strides.0 + (filter.shape[0] * paddingIndex) let w = (input.shape[2] - (1 * paddingIndex)) * strides.1 + (filter.shape[1] * paddingIndex) @@ -610,7 +610,7 @@ public struct TransposedConv3D: Layer { @noDerivative private let useBias: Bool /// The element-wise activation function type. - public typealias Activation = @differentiable (Tensor) -> Tensor + public typealias Activation = @differentiable(reverse) (Tensor) -> Tensor /// Creates a `TransposedConv3D` layer with the specified filter, bias, /// activation function, strides, and padding. @@ -641,8 +641,8 @@ public struct TransposedConv3D: Layer { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { let batchSize = input.shape[0] let w = (input.shape[1] - (1 * paddingIndex)) * strides.0 + (filter.shape[0] * paddingIndex) let h = (input.shape[2] - (1 * paddingIndex)) * strides.1 + (filter.shape[1] * paddingIndex) @@ -714,7 +714,7 @@ public struct DepthwiseConv2D: Layer { @noDerivative private let useBias: Bool /// The element-wise activation function type. - public typealias Activation = @differentiable (Tensor) -> Tensor + public typealias Activation = @differentiable(reverse) (Tensor) -> Tensor /// Creates a `DepthwiseConv2D` layer with the specified filter, bias, activation function, /// strides, and padding. @@ -749,8 +749,8 @@ public struct DepthwiseConv2D: Layer { /// [batch count, input height, input width, input channel count] /// - Returns: The output of shape, /// [batch count, output height, output width, input channel count * channel multiplier] - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { let conv = depthwiseConv2D( input, filter: filter, @@ -823,8 +823,8 @@ public struct ZeroPadding1D: ParameterlessLayer /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { input.padded(forSizes: [(0, 0), padding, (0, 0)]) } } @@ -857,8 +857,8 @@ public struct ZeroPadding2D: ParameterlessLayer /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { input.padded(forSizes: [(0, 0), padding.0, padding.1, (0, 0)]) } } @@ -891,8 +891,8 @@ public struct ZeroPadding3D: ParameterlessLayer /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { input.padded(forSizes: [(0, 0), padding.0, padding.1, padding.2, (0, 0)]) } } @@ -922,7 +922,7 @@ public struct SeparableConv1D: Layer { @noDerivative private let useBias: Bool /// The element-wise activation function type. - public typealias Activation = @differentiable (Tensor) -> Tensor + public typealias Activation = @differentiable(reverse) (Tensor) -> Tensor /// Creates a `SeparableConv1D` layer with the specified depthwise and pointwise filter, /// bias, activation function, strides, and padding. @@ -960,8 +960,8 @@ public struct SeparableConv1D: Layer { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { let depthwise = depthwiseConv2D( input.expandingShape(at: 1), filter: depthwiseFilter.expandingShape(at: 1), @@ -1044,7 +1044,7 @@ public struct SeparableConv2D: Layer { @noDerivative private let useBias: Bool /// The element-wise activation function type. - public typealias Activation = @differentiable (Tensor) -> Tensor + public typealias Activation = @differentiable(reverse) (Tensor) -> Tensor /// Creates a `SeparableConv2D` layer with the specified depthwise and pointwise filter, /// bias, activation function, strides, and padding. @@ -1082,8 +1082,8 @@ public struct SeparableConv2D: Layer { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { let depthwise = depthwiseConv2D( input, filter: depthwiseFilter, diff --git a/Sources/TensorFlow/Layers/Core.swift b/Sources/TensorFlow/Layers/Core.swift index 6bedff653..43552ffed 100644 --- a/Sources/TensorFlow/Layers/Core.swift +++ b/Sources/TensorFlow/Layers/Core.swift @@ -28,8 +28,8 @@ public struct Flatten: ParameterlessLayer { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { let batchSize = input.shape[0] let remaining = input.shape[1..: ParameterlessLayer { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { return input.reshaped(toShape: shape) } } @@ -75,7 +75,7 @@ public struct Reshape: ParameterlessLayer { /// A layer that encloses a custom differentiable function. public struct Function: ParameterlessLayer { public typealias TangentVector = EmptyTangentVector - public typealias Body = @differentiable (Input) -> Output + public typealias Body = @differentiable(reverse) (Input) -> Output @noDerivative public let body: Body @@ -83,7 +83,7 @@ public struct Function: Parameter self.body = body } - @differentiable + @differentiable(reverse) public func callAsFunction(_ input: Input) -> Output { body(input) } diff --git a/Sources/TensorFlow/Layers/Dense.swift b/Sources/TensorFlow/Layers/Dense.swift index c91bbde86..0d50f28c7 100644 --- a/Sources/TensorFlow/Layers/Dense.swift +++ b/Sources/TensorFlow/Layers/Dense.swift @@ -38,14 +38,14 @@ public struct Dense: Layer { @noDerivative private let useBias: Bool /// The element-wise activation function type. - public typealias Activation = @differentiable (Tensor) -> Tensor + public typealias Activation = @differentiable(reverse) (Tensor) -> Tensor /// Creates an instance from the given weight, optional bias, and activation function. /// /// - Note: currently, `weight` is the only differentiability parameter. `bias` can be made a /// differentiability parameter after `Optional` conditionally conforms to `Differentiable`: /// TF-499. - @differentiable(wrt: weight) + @differentiable(reverse, wrt: weight) public init( weight: Tensor, bias: Tensor? = nil, @@ -77,8 +77,8 @@ public struct Dense: Layer { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { if batched { let hidden = matmul(input.expandingShape(at: 1), weight).squeezingShape(at: 1) return activation(useBias ? hidden + bias : hidden) diff --git a/Sources/TensorFlow/Layers/Dropout.swift b/Sources/TensorFlow/Layers/Dropout.swift index a4d5f488f..b6047e17d 100644 --- a/Sources/TensorFlow/Layers/Dropout.swift +++ b/Sources/TensorFlow/Layers/Dropout.swift @@ -20,7 +20,7 @@ import _Differentiation extension Tensor where Scalar: TensorFlowFloatingPoint { /// Computes dropout given a probability. - @differentiable(wrt: self where Scalar: Differentiable) + @differentiable(reverse, wrt: self where Scalar: Differentiable) fileprivate func droppingOut(probability: Double) -> Tensor { let noise = Tensor(randomUniform: shape, on: device) let keepMask = noise .>= Scalar(probability) @@ -54,8 +54,8 @@ public struct Dropout: ParameterlessLayer { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { switch Context.local.learningPhase { case .training: return input.droppingOut(probability: probability) @@ -81,8 +81,8 @@ public struct GaussianNoise: ParameterlessLayer } /// Returns a tensor obtained by adding noise to `input` - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { switch Context.local.learningPhase { case .training: let noise = Tensor( @@ -118,8 +118,8 @@ public struct GaussianDropout: ParameterlessLay } /// Applies multiplicative 1-centered Gaussian noise to the input during training only. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { switch Context.local.learningPhase { case .training: let noise = Tensor( @@ -158,8 +158,8 @@ public struct AlphaDropout: ParameterlessLayer } /// Adds noise to `input` during training, and is a no-op during inference. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { switch Context.local.learningPhase { case .training: let alpha = 1.6732632423543772848170429916717 diff --git a/Sources/TensorFlow/Layers/Embedding.swift b/Sources/TensorFlow/Layers/Embedding.swift index 5c68f7c5c..95e6fdac3 100644 --- a/Sources/TensorFlow/Layers/Embedding.swift +++ b/Sources/TensorFlow/Layers/Embedding.swift @@ -54,8 +54,8 @@ public struct Embedding: Module { /// - Parameter /// - input: The indices that will be mapped to their vector representations. /// - Returns: The tensor created by replacing input indices with their vector representations. - @differentiable(wrt: self) - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse, wrt: self) + public func callAsFunction(_ input: Tensor) -> Tensor { embeddings.gathering(atIndices: input) } } diff --git a/Sources/TensorFlow/Layers/Morphological.swift b/Sources/TensorFlow/Layers/Morphological.swift index e0e06cce7..106781cab 100644 --- a/Sources/TensorFlow/Layers/Morphological.swift +++ b/Sources/TensorFlow/Layers/Morphological.swift @@ -69,8 +69,8 @@ public struct `Dilation2D`: Layer { /// [batch count, output height, output width, output channel count]. /// /// - Note: Padding size equals zero when using `.valid`. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { let dilated = dilation2D( input, filter: filter, @@ -139,8 +139,8 @@ public struct `Erosion2D`: Layer { /// [batch count, output height, output width, output channel count]. /// /// - Note: Padding size equals zero when using `.valid`. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { let eroded = erosion2D( input, filter: filter, diff --git a/Sources/TensorFlow/Layers/Normalization.swift b/Sources/TensorFlow/Layers/Normalization.swift index 79af7667f..e46e0ed22 100644 --- a/Sources/TensorFlow/Layers/Normalization.swift +++ b/Sources/TensorFlow/Layers/Normalization.swift @@ -23,7 +23,7 @@ import _Differentiation /// - offset: The tensor to be added to normalized tensor. /// - scale: The tensor to be applied to normalized tensor. /// - varianceEpsilon: The small number to avoid dividing by 0. -@differentiable(wrt: (input, mean, variance, offset, scale)) +@differentiable(reverse, wrt: (input, mean, variance, offset, scale)) private func normalize( _ input: Tensor, mean: Tensor, @@ -98,20 +98,26 @@ public struct BatchNorm: Layer { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { - let positiveAxis = (input.rank + axis) % input.rank + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { + let inputRank = input.rank + let positiveAxis = (inputRank + axis) % inputRank precondition( input.shape[positiveAxis] == offset.shape[0], "The number of features of the input and the offset doesn't match.") - var offset = self.offset - var scale = self.scale - if positiveAxis != input.rank - 1 { - var broadcastShape = TensorShape([Int](repeating: 1, count: input.rank)) - broadcastShape[positiveAxis] = input.shape[positiveAxis] - offset = offset.reshaped(to: broadcastShape) - scale = scale.reshaped(to: broadcastShape) - } +// var (offset, scale) = {x in (x.offset, x.scale) }(self) +// if positiveAxis != input.rank - 1 { +// var broadcastShape = TensorShape([Int](repeating: 1, count: input.rank)) +// broadcastShape[positiveAxis] = input.shape[positiveAxis] +// offset = offset.reshaped(to: broadcastShape) +// scale = scale.reshaped(to: broadcastShape) +// } + let offsetOriginal = self.offset + let scaleOriginal = self.scale + let (offset, scale) = Self._sr13263workaround(offset: offsetOriginal, + scale: scaleOriginal, + input: input, + positiveAxis: positiveAxis) switch Context.local.learningPhase { case .training: return doTraining(input, offset: offset, scale: scale, axis: positiveAxis) @@ -119,6 +125,23 @@ public struct BatchNorm: Layer { return doInference(input, offset: offset, scale: scale) } } + + @inline(never) + @differentiable(reverse) // if the function is `public` or `internal`, the compiler crashes + private static func _sr13263workaround( + offset: Tensor, + scale: Tensor, + input: Tensor, + positiveAxis: Int + ) -> (Tensor, Tensor) { + if positiveAxis != input.rank - 1 { + var broadcastShape = TensorShape([Int](repeating: 1, count: input.rank)) + broadcastShape[positiveAxis] = input.shape[positiveAxis] + return (offset.reshaped(to: broadcastShape), scale.reshaped(to: broadcastShape)) + } else { + return (offset, scale) + } + } private func doTraining( _ input: Tensor, offset: Tensor, scale: Tensor, axis: Int @@ -240,8 +263,8 @@ public struct LayerNorm: Layer { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { // Note: `withoutDerivative(at:)` is currently needed in the following to prevent the resulting // tensor for `epsilon` from being scalarized on the backwards pass, breaking X10 traces. let epsilon = withoutDerivative(at: input) { Tensor(self.epsilon, deviceAndPrecisionLike: $0) } @@ -341,8 +364,8 @@ public struct GroupNorm: Layer { /// - Returns: The output. /// - Precondition: The axis cannot be batch axis. /// - Precondition: The numbers of features of the input and the offset must be same. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { let positiveAxis = (input.rank + axis) % input.rank precondition(positiveAxis != 0, "The axis cannot be batch axis.") precondition( @@ -447,8 +470,8 @@ public struct InstanceNorm: Layer { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { delegate(input) } } diff --git a/Sources/TensorFlow/Layers/Pooling.swift b/Sources/TensorFlow/Layers/Pooling.swift index 6a5c66c06..563534c10 100644 --- a/Sources/TensorFlow/Layers/Pooling.swift +++ b/Sources/TensorFlow/Layers/Pooling.swift @@ -44,8 +44,8 @@ public struct MaxPool1D: ParameterlessLayer { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { maxPool2D( input.expandingShape(at: 1), filterSize: (1, 1, poolSize, 1), @@ -85,8 +85,8 @@ public struct MaxPool2D: ParameterlessLayer { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { maxPool2D(input, filterSize: poolSize, strides: strides, padding: padding) } } @@ -142,8 +142,8 @@ public struct MaxPool3D: ParameterlessLayer { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { maxPool3D(input, filterSize: poolSize, strides: strides, padding: padding) } } @@ -204,8 +204,8 @@ public struct AvgPool1D: ParameterlessLayer { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { avgPool2D( input.expandingShape(at: 1), filterSize: (1, 1, poolSize, 1), @@ -245,8 +245,8 @@ public struct AvgPool2D: ParameterlessLayer { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { avgPool2D(input, filterSize: poolSize, strides: strides, padding: padding) } } @@ -302,8 +302,8 @@ public struct AvgPool3D: ParameterlessLayer { /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { avgPool3D(input, filterSize: poolSize, strides: strides, padding: padding) } } @@ -346,8 +346,8 @@ public struct GlobalAvgPool1D: ParameterlessLay /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { precondition(input.rank == 3, "The rank of the input must be 3.") return input.mean(squeezingAxes: 1) } @@ -365,8 +365,8 @@ public struct GlobalAvgPool2D: ParameterlessLay /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { precondition(input.rank == 4, "The rank of the input must be 4.") return input.mean(squeezingAxes: [1, 2]) } @@ -384,8 +384,8 @@ public struct GlobalAvgPool3D: ParameterlessLay /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { precondition(input.rank == 5, "The rank of the input must be 5.") return input.mean(squeezingAxes: [1, 2, 3]) } @@ -406,8 +406,8 @@ public struct GlobalMaxPool1D: ParameterlessLay /// - context: The contextual information for the layer application, e.g. the current learning /// phase. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { precondition(input.rank == 3, "The rank of the input must be 3.") return input.max(squeezingAxes: 1) } @@ -425,8 +425,8 @@ public struct GlobalMaxPool2D: ParameterlessLay /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { precondition(input.rank == 4, "The rank of the input must be 4.") return input.max(squeezingAxes: [1, 2]) } @@ -444,8 +444,8 @@ public struct GlobalMaxPool3D: ParameterlessLay /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { precondition(input.rank == 5, "The rank of the input must be 5.") return input.max(squeezingAxes: [1, 2, 3]) } @@ -494,8 +494,8 @@ public struct FractionalMaxPool2D: Parameterles /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { fractionalMaxPool2D( input, poolingRatio: poolingRatio, diff --git a/Sources/TensorFlow/Layers/Recurrent.swift b/Sources/TensorFlow/Layers/Recurrent.swift index 1fd0c30bf..9a07c4120 100644 --- a/Sources/TensorFlow/Layers/Recurrent.swift +++ b/Sources/TensorFlow/Layers/Recurrent.swift @@ -24,7 +24,7 @@ public struct RNNCellInput: Differ /// The previous state. public var state: State - @differentiable + @differentiable(reverse) public init(input: Input, state: State) { self.input = input self.state = state @@ -41,7 +41,7 @@ public struct RNNCellOutput: Diff /// The current state. public var state: State - @differentiable + @differentiable(reverse) public init(output: Output, state: State) { self.output = output self.state = state @@ -76,7 +76,7 @@ extension RecurrentLayerCell { /// - timeStepInput: The input at the current time step. /// - previousState: The previous state of the recurrent layer cell. /// - Returns: The output. - @differentiable + @differentiable(reverse) public func callAsFunction( input: TimeStepInput, state: State @@ -84,7 +84,7 @@ extension RecurrentLayerCell { self(RNNCellInput(input: input, state: state)) } - @differentiable + @differentiable(reverse) public func call(input: TimeStepInput, state: State) -> RNNCellOutput { self(RNNCellInput(input: input, state: state)) } @@ -122,7 +122,7 @@ public struct BasicRNNCell: RecurrentLayerCell /// /// - Parameter input: The input to the layer. /// - Returns: The hidden state. - @differentiable + @differentiable(reverse) public func callAsFunction(_ input: Input) -> Output { let concatenatedInput = input.input.concatenated(with: input.state, alongAxis: 1) let newState = tanh(matmul(concatenatedInput, weight) + bias) @@ -202,14 +202,14 @@ public struct LSTMCell: RecurrentLayerCell { public var cell: Tensor public var hidden: Tensor - @differentiable + @differentiable(reverse) public init(cell: Tensor, hidden: Tensor) { self.cell = cell self.hidden = hidden } /// Concatenates two values. - @differentiable + @differentiable(reverse) public static func concatenate(_ lhs: Self, _ rhs: Self) -> Self { // TODO(TF-1005): Remove workaround for differenting concatenated. let concatCell = lhs.cell.concatenated(with: rhs.cell, alongAxis: -1) @@ -224,25 +224,25 @@ public struct LSTMCell: RecurrentLayerCell { } /// Adds two values and produces their sum. - @differentiable + @differentiable(reverse) public static func sum(_ lhs: Self, _ rhs: Self) -> Self { Self(cell: lhs.cell + rhs.cell, hidden: lhs.hidden + rhs.hidden) } /// Averages two values. - @differentiable + @differentiable(reverse) public static func average(_ lhs: Self, _ rhs: Self) -> Self { Self(cell: (lhs.cell + rhs.cell) / 2, hidden: (lhs.hidden + rhs.hidden) / 2) } /// Multiplies two values. - @differentiable + @differentiable(reverse) public static func multiply(_ lhs: Self, _ rhs: Self) -> Self { Self(cell: lhs.cell * rhs.cell, hidden: lhs.hidden * rhs.hidden) } /// Stack two values. - @differentiable + @differentiable(reverse) public static func stack(_ lhs: Self, _ rhs: Self) -> Self { // TODO(TF-1005): Remove workaround for differenting stacking. let stackCell = Tensor(stacking: [lhs.cell, rhs.cell]) @@ -269,7 +269,7 @@ public struct LSTMCell: RecurrentLayerCell { /// /// - Parameter input: The input to the layer. /// - Returns: The hidden state. - @differentiable + @differentiable(reverse) public func callAsFunction(_ input: Input) -> Output { let gateInput = input.input.concatenated(with: input.state.hidden, alongAxis: 1) @@ -344,7 +344,7 @@ public struct GRUCell: RecurrentLayerCell { /// /// - Parameter input: The input to the layer. /// - Returns: The hidden state. - @differentiable + @differentiable(reverse) public func callAsFunction(_ input: Input) -> Output { let updateGate = sigmoid( (matmul(input.input, updateKernel) + updateBias) @@ -377,7 +377,7 @@ public struct RecurrentLayer: Layer { self.cell = cell() } - @differentiable(wrt: (self, inputs, initialState)) + @differentiable(reverse, wrt: (self, inputs, initialState)) public func callAsFunction( _ inputs: [Cell.TimeStepInput], initialState: Cell.State @@ -393,7 +393,7 @@ public struct RecurrentLayer: Layer { return timeStepOutputs } - @differentiable(wrt: (self, inputs, initialState)) + @differentiable(reverse, wrt: (self, inputs, initialState)) public func call( _ inputs: [Cell.TimeStepInput], initialState: Cell.State @@ -445,13 +445,13 @@ public struct RecurrentLayer: Layer { ) } - @differentiable + @differentiable(reverse) public func callAsFunction(_ inputs: [Cell.TimeStepInput]) -> [Cell.TimeStepOutput] { let initialState = withoutDerivative(at: cell.zeroState(for: inputs[0])) return self(inputs, initialState: initialState) } - @differentiable(wrt: (self, inputs, initialState)) + @differentiable(reverse, wrt: (self, inputs, initialState)) public func lastOutput( from inputs: [Cell.TimeStepInput], initialState: Cell.State @@ -460,7 +460,7 @@ public struct RecurrentLayer: Layer { return self(inputs, initialState: initialState)[withoutDerivative(at: inputs.count - 1)] } - @differentiable(wrt: (self, inputs)) + @differentiable(reverse, wrt: (self, inputs)) public func lastOutput(from inputs: [Cell.TimeStepInput]) -> Cell.TimeStepOutput { precondition(!inputs.isEmpty, "'inputs' must be non-empty.") let initialState = withoutDerivative(at: cell.zeroState(for: inputs[0])) @@ -473,32 +473,32 @@ public struct RecurrentLayer: Layer { /// Used by `BidirectionalRecurrentLayer` as a generic requirement for merge functions. public protocol Mergeable: Differentiable, AdditiveArithmetic { /// Concatenates two values. - @differentiable + @differentiable(reverse) static func concatenate(_ lhs: Self, _ rhs: Self) -> Self /// Adds two values and produces their sum. /// /// - Note: renaming `sum` to `+` results in a compiler crash when conforming `Tensor` to /// `Mergeable` (SR-13229). - @differentiable + @differentiable(reverse) static func sum(_ lhs: Self, _ rhs: Self) -> Self /// Averages two values. - @differentiable + @differentiable(reverse) static func average(_ lhs: Self, _ rhs: Self) -> Self /// Multiplies two values. - @differentiable + @differentiable(reverse) static func multiply(_ lhs: Self, _ rhs: Self) -> Self /// Stack two values. - @differentiable + @differentiable(reverse) static func stack(_ lhs: Self, _ rhs: Self) -> Self } extension Tensor: Mergeable where Scalar: TensorFlowFloatingPoint { /// Concatenates two tensors along last axis. - @differentiable + @differentiable(reverse) public static func concatenate(_ lhs: Tensor, _ rhs: Tensor) -> Tensor { // TODO(TF-1005): Remove workaround for differenting concatenated. let concat = lhs.concatenated(with: rhs, alongAxis: -1) @@ -508,25 +508,25 @@ extension Tensor: Mergeable where Scalar: TensorFlowFloatingPoint { } /// Adds two values and produces their sum. - @differentiable + @differentiable(reverse) public static func sum(_ lhs: Tensor, _ rhs: Tensor) -> Tensor { lhs + rhs } /// Averages two values. - @differentiable + @differentiable(reverse) public static func average(_ lhs: Tensor, _ rhs: Tensor) -> Tensor { (lhs + rhs) / 2 } /// Multiplies two values. - @differentiable + @differentiable(reverse) public static func multiply(_ lhs: Tensor, _ rhs: Tensor) -> Tensor { lhs * rhs } /// Stack two values. - @differentiable + @differentiable(reverse) public static func stack(_ lhs: Tensor, _ rhs: Tensor) -> Tensor { // TODO(TF-1005): Remove workaround for differenting stacking. let stack = Tensor(stacking: [lhs, rhs]) @@ -537,7 +537,7 @@ extension Tensor: Mergeable where Scalar: TensorFlowFloatingPoint { } /// Concatenates two values. -@differentiable +@differentiable(reverse) public func concatenate( _ first: T, _ second: T @@ -546,7 +546,7 @@ public func concatenate( } /// Adds two values and produces their sum. -@differentiable +@differentiable(reverse) public func sum( _ first: T, _ second: T @@ -555,7 +555,7 @@ public func sum( } /// Averages two values. -@differentiable +@differentiable(reverse) public func average( _ first: T, _ second: T @@ -564,7 +564,7 @@ public func average( } /// Multiplies two values. -@differentiable +@differentiable(reverse) public func multiply( _ first: T, _ second: T @@ -573,7 +573,7 @@ public func multiply( } /// Stack two values. -@differentiable +@differentiable(reverse) public func stack( _ first: T, _ second: T @@ -585,7 +585,7 @@ public struct BidirectionalRecurrentLayer: Layer where Cell.TimeStepOutput: Mergeable { public typealias Input = [Cell.TimeStepInput] public typealias Output = [Cell.TimeStepOutput] - public typealias MergeFunction = @differentiable (Cell.TimeStepOutput, Cell.TimeStepOutput) -> Cell.TimeStepOutput + public typealias MergeFunction = @differentiable(reverse) (Cell.TimeStepOutput, Cell.TimeStepOutput) -> Cell.TimeStepOutput /// A wrapper around a `@differentiable` merge function. /// @@ -615,7 +615,7 @@ where Cell.TimeStepOutput: Mergeable { _mergeFunction = .init(mergeFunction) } - @differentiable + @differentiable(reverse) public func callAsFunction( _ inputs: Input, initialForwardLayerState: Cell.State, @@ -629,7 +629,7 @@ where Cell.TimeStepOutput: Mergeable { backwardOutputs.differentiableReversed(), mergeFunction: mergeFunction) } - @differentiable + @differentiable(reverse) public func callAsFunction(_ inputs: Input) -> Output { precondition(!inputs.isEmpty, "'inputs' must be non-empty.") let initialForwardLayerState = withoutDerivative( @@ -643,7 +643,7 @@ where Cell.TimeStepOutput: Mergeable { ) } - @differentiable + @differentiable(reverse) public func lastOutput( from inputs: Input, initialForwardLayerState: Cell.State, @@ -657,7 +657,7 @@ where Cell.TimeStepOutput: Mergeable { )[withoutDerivative(at: inputs.count - 1)] } - @differentiable + @differentiable(reverse) public func lastOutput(from inputs: Input) -> Cell.TimeStepOutput { precondition(!inputs.isEmpty, "'inputs' must be non-empty.") return self(inputs)[withoutDerivative(at: inputs.count - 1)] @@ -695,7 +695,7 @@ fileprivate extension Array where Element: Differentiable { /// /// This has a custom derivative, which works around the SR-13945 segfault that you would /// encounter if you tried to implement this at the callsite using a for loop. - @differentiable + @differentiable(reverse) func differentiableReversed() -> Self { .init(self.reversed()) } @@ -711,19 +711,19 @@ fileprivate extension Array where Element: Differentiable { /// /// This has a custom derivative, which works around the SR-13945 segfault that you would /// encounter if you tried to implement this at the callsite using a for loop. - @differentiable + @differentiable(reverse) func differentiableMerging( - _ other: Self, mergeFunction: @differentiable (Element, Element) -> Element + _ other: Self, mergeFunction: @differentiable(reverse) (Element, Element) -> Element ) -> Self { zip(self, other).map { mergeFunction($0.0, $0.1) } } @derivative(of: differentiableMerging) func vjpDifferentiableMerging( - _ other: Self, mergeFunction: @differentiable (Element, Element) -> Element + _ other: Self, mergeFunction: @differentiable(reverse) (Element, Element) -> Element ) -> (value: Self, pullback: (TangentVector) -> (TangentVector, TangentVector)) { let valuesWithPullbacks = zip(self, other).map { - valueWithPullback(at: $0.0, $0.1, in: mergeFunction) + valueWithPullback(at: $0.0, $0.1, of: mergeFunction) } let pullbacks = valuesWithPullbacks.map { $0.pullback } return ( diff --git a/Sources/TensorFlow/Layers/Sequential.swift b/Sources/TensorFlow/Layers/Sequential.swift index 3631ec405..951c2c088 100644 --- a/Sources/TensorFlow/Layers/Sequential.swift +++ b/Sources/TensorFlow/Layers/Sequential.swift @@ -47,8 +47,7 @@ import _Differentiation /// ```` public struct Sequential: Module where - Layer1.Output == Layer2.Input, - Layer1.TangentVector.VectorSpaceScalar == Layer2.TangentVector.VectorSpaceScalar + Layer1.Output == Layer2.Input { public var layer1: Layer1 public var layer2: Layer2 @@ -58,7 +57,7 @@ where self.layer2 = layer2 } - @differentiable(wrt: self) + @differentiable(reverse, wrt: self) public func callAsFunction(_ input: Layer1.Input) -> Layer2.Output { layer2(layer1(input)) } @@ -69,7 +68,7 @@ where } extension Sequential: Layer where Layer1: Layer { - @differentiable + @differentiable(reverse) public func callAsFunction(_ input: Layer1.Input) -> Layer2.Output { layer2(layer1(input)) } @@ -78,42 +77,28 @@ extension Sequential: Layer where Layer1: Layer { /// A layer that sequentially composes 3 layers. public typealias Sequential3 = Sequential> where - L1.Output == L2.Input, L2.Output == L3.Input, - L1.TangentVector.VectorSpaceScalar == L2.TangentVector.VectorSpaceScalar, - L2.TangentVector.VectorSpaceScalar == L3.TangentVector.VectorSpaceScalar + L1.Output == L2.Input, L2.Output == L3.Input /// A layer that sequentially composes 4 layers. public typealias Sequential4 = Sequential< L1, Sequential> > where - L1.Output == L2.Input, L2.Output == L3.Input, L3.Output == L4.Input, - L1.TangentVector.VectorSpaceScalar == L2.TangentVector.VectorSpaceScalar, - L2.TangentVector.VectorSpaceScalar == L3.TangentVector.VectorSpaceScalar, - L3.TangentVector.VectorSpaceScalar == L4.TangentVector.VectorSpaceScalar + L1.Output == L2.Input, L2.Output == L3.Input, L3.Output == L4.Input /// A layer that sequentially composes 5 layers. public typealias Sequential5 = Sequential< L1, Sequential>> > where - L1.Output == L2.Input, L2.Output == L3.Input, L3.Output == L4.Input, L4.Output == L5.Input, - L1.TangentVector.VectorSpaceScalar == L2.TangentVector.VectorSpaceScalar, - L2.TangentVector.VectorSpaceScalar == L3.TangentVector.VectorSpaceScalar, - L3.TangentVector.VectorSpaceScalar == L4.TangentVector.VectorSpaceScalar, - L4.TangentVector.VectorSpaceScalar == L5.TangentVector.VectorSpaceScalar + L1.Output == L2.Input, L2.Output == L3.Input, L3.Output == L4.Input, L4.Output == L5.Input /// A layer that sequentially composes 6 layers. public typealias Sequential6 = Sequential>>>> where L1.Output == L2.Input, L2.Output == L3.Input, L3.Output == L4.Input, L4.Output == L5.Input, - L5.Output == L6.Input, - L1.TangentVector.VectorSpaceScalar == L2.TangentVector.VectorSpaceScalar, - L2.TangentVector.VectorSpaceScalar == L3.TangentVector.VectorSpaceScalar, - L3.TangentVector.VectorSpaceScalar == L4.TangentVector.VectorSpaceScalar, - L4.TangentVector.VectorSpaceScalar == L5.TangentVector.VectorSpaceScalar, - L5.TangentVector.VectorSpaceScalar == L6.TangentVector.VectorSpaceScalar + L5.Output == L6.Input /// A layer that sequentially composes 7 layers. public typealias Sequential7< @@ -123,13 +108,7 @@ public typealias Sequential7< > where L1.Output == L2.Input, L2.Output == L3.Input, L3.Output == L4.Input, L4.Output == L5.Input, - L5.Output == L6.Input, L6.Output == L7.Input, - L1.TangentVector.VectorSpaceScalar == L2.TangentVector.VectorSpaceScalar, - L2.TangentVector.VectorSpaceScalar == L3.TangentVector.VectorSpaceScalar, - L3.TangentVector.VectorSpaceScalar == L4.TangentVector.VectorSpaceScalar, - L4.TangentVector.VectorSpaceScalar == L5.TangentVector.VectorSpaceScalar, - L5.TangentVector.VectorSpaceScalar == L6.TangentVector.VectorSpaceScalar, - L6.TangentVector.VectorSpaceScalar == L7.TangentVector.VectorSpaceScalar + L5.Output == L6.Input, L6.Output == L7.Input /// A layer that sequentially composes 8 layers. public typealias Sequential8< @@ -140,14 +119,7 @@ public typealias Sequential8< > where L1.Output == L2.Input, L2.Output == L3.Input, L3.Output == L4.Input, L4.Output == L5.Input, - L5.Output == L6.Input, L6.Output == L7.Input, L7.Output == L8.Input, - L1.TangentVector.VectorSpaceScalar == L2.TangentVector.VectorSpaceScalar, - L2.TangentVector.VectorSpaceScalar == L3.TangentVector.VectorSpaceScalar, - L3.TangentVector.VectorSpaceScalar == L4.TangentVector.VectorSpaceScalar, - L4.TangentVector.VectorSpaceScalar == L5.TangentVector.VectorSpaceScalar, - L5.TangentVector.VectorSpaceScalar == L6.TangentVector.VectorSpaceScalar, - L6.TangentVector.VectorSpaceScalar == L7.TangentVector.VectorSpaceScalar, - L7.TangentVector.VectorSpaceScalar == L8.TangentVector.VectorSpaceScalar + L5.Output == L6.Input, L6.Output == L7.Input, L7.Output == L8.Input /// A layer that sequentially composes 9 layers. public typealias Sequential9< @@ -163,15 +135,7 @@ public typealias Sequential9< > where L1.Output == L2.Input, L2.Output == L3.Input, L3.Output == L4.Input, L4.Output == L5.Input, - L5.Output == L6.Input, L6.Output == L7.Input, L7.Output == L8.Input, L8.Output == L9.Input, - L1.TangentVector.VectorSpaceScalar == L2.TangentVector.VectorSpaceScalar, - L2.TangentVector.VectorSpaceScalar == L3.TangentVector.VectorSpaceScalar, - L3.TangentVector.VectorSpaceScalar == L4.TangentVector.VectorSpaceScalar, - L4.TangentVector.VectorSpaceScalar == L5.TangentVector.VectorSpaceScalar, - L5.TangentVector.VectorSpaceScalar == L6.TangentVector.VectorSpaceScalar, - L6.TangentVector.VectorSpaceScalar == L7.TangentVector.VectorSpaceScalar, - L7.TangentVector.VectorSpaceScalar == L8.TangentVector.VectorSpaceScalar, - L8.TangentVector.VectorSpaceScalar == L9.TangentVector.VectorSpaceScalar + L5.Output == L6.Input, L6.Output == L7.Input, L7.Output == L8.Input, L8.Output == L9.Input /// A layer that sequentially composes 10 layers. public typealias Sequential10< @@ -192,16 +156,7 @@ public typealias Sequential10< where L1.Output == L2.Input, L2.Output == L3.Input, L3.Output == L4.Input, L4.Output == L5.Input, L5.Output == L6.Input, L6.Output == L7.Input, L7.Output == L8.Input, L8.Output == L9.Input, - L9.Output == L10.Input, - L1.TangentVector.VectorSpaceScalar == L2.TangentVector.VectorSpaceScalar, - L2.TangentVector.VectorSpaceScalar == L3.TangentVector.VectorSpaceScalar, - L3.TangentVector.VectorSpaceScalar == L4.TangentVector.VectorSpaceScalar, - L4.TangentVector.VectorSpaceScalar == L5.TangentVector.VectorSpaceScalar, - L5.TangentVector.VectorSpaceScalar == L6.TangentVector.VectorSpaceScalar, - L6.TangentVector.VectorSpaceScalar == L7.TangentVector.VectorSpaceScalar, - L7.TangentVector.VectorSpaceScalar == L8.TangentVector.VectorSpaceScalar, - L8.TangentVector.VectorSpaceScalar == L9.TangentVector.VectorSpaceScalar, - L9.TangentVector.VectorSpaceScalar == L10.TangentVector.VectorSpaceScalar + L9.Output == L10.Input @resultBuilder public struct LayerBuilder { @@ -218,9 +173,7 @@ public struct LayerBuilder { -> Sequential> where L1.Output == L2.Input, - L2.Output == L3.Input, - L1.TangentVector.VectorSpaceScalar == L2.TangentVector.VectorSpaceScalar, - L2.TangentVector.VectorSpaceScalar == L3.TangentVector.VectorSpaceScalar + L2.Output == L3.Input { Sequential(l1, Sequential(l2, l3)) } @@ -235,10 +188,7 @@ public struct LayerBuilder { where L1.Output == L2.Input, L2.Output == L3.Input, - L3.Output == L4.Input, - L1.TangentVector.VectorSpaceScalar == L2.TangentVector.VectorSpaceScalar, - L2.TangentVector.VectorSpaceScalar == L3.TangentVector.VectorSpaceScalar, - L3.TangentVector.VectorSpaceScalar == L4.TangentVector.VectorSpaceScalar + L3.Output == L4.Input { Sequential(l1, Sequential(l2, Sequential(l3, l4))) } @@ -255,11 +205,7 @@ public struct LayerBuilder { L1.Output == L2.Input, L2.Output == L3.Input, L3.Output == L4.Input, - L4.Output == L5.Input, - L1.TangentVector.VectorSpaceScalar == L2.TangentVector.VectorSpaceScalar, - L2.TangentVector.VectorSpaceScalar == L3.TangentVector.VectorSpaceScalar, - L3.TangentVector.VectorSpaceScalar == L4.TangentVector.VectorSpaceScalar, - L4.TangentVector.VectorSpaceScalar == L5.TangentVector.VectorSpaceScalar + L4.Output == L5.Input { Sequential(l1, Sequential(l2, Sequential(l3, Sequential(l4, l5)))) } @@ -278,12 +224,7 @@ public struct LayerBuilder { L2.Output == L3.Input, L3.Output == L4.Input, L4.Output == L5.Input, - L5.Output == L6.Input, - L1.TangentVector.VectorSpaceScalar == L2.TangentVector.VectorSpaceScalar, - L2.TangentVector.VectorSpaceScalar == L3.TangentVector.VectorSpaceScalar, - L3.TangentVector.VectorSpaceScalar == L4.TangentVector.VectorSpaceScalar, - L4.TangentVector.VectorSpaceScalar == L5.TangentVector.VectorSpaceScalar, - L5.TangentVector.VectorSpaceScalar == L6.TangentVector.VectorSpaceScalar + L5.Output == L6.Input { Sequential(l1, Sequential(l2, Sequential(l3, Sequential(l4, Sequential(l5, l6))))) } @@ -306,13 +247,7 @@ public struct LayerBuilder { L3.Output == L4.Input, L4.Output == L5.Input, L5.Output == L6.Input, - L6.Output == L7.Input, - L1.TangentVector.VectorSpaceScalar == L2.TangentVector.VectorSpaceScalar, - L2.TangentVector.VectorSpaceScalar == L3.TangentVector.VectorSpaceScalar, - L3.TangentVector.VectorSpaceScalar == L4.TangentVector.VectorSpaceScalar, - L4.TangentVector.VectorSpaceScalar == L5.TangentVector.VectorSpaceScalar, - L5.TangentVector.VectorSpaceScalar == L6.TangentVector.VectorSpaceScalar, - L6.TangentVector.VectorSpaceScalar == L7.TangentVector.VectorSpaceScalar + L6.Output == L7.Input { Sequential( l1, Sequential(l2, Sequential(l3, Sequential(l4, Sequential(l5, Sequential(l6, l7)))))) @@ -341,14 +276,7 @@ public struct LayerBuilder { L4.Output == L5.Input, L5.Output == L6.Input, L6.Output == L7.Input, - L7.Output == L8.Input, - L1.TangentVector.VectorSpaceScalar == L2.TangentVector.VectorSpaceScalar, - L2.TangentVector.VectorSpaceScalar == L3.TangentVector.VectorSpaceScalar, - L3.TangentVector.VectorSpaceScalar == L4.TangentVector.VectorSpaceScalar, - L4.TangentVector.VectorSpaceScalar == L5.TangentVector.VectorSpaceScalar, - L5.TangentVector.VectorSpaceScalar == L6.TangentVector.VectorSpaceScalar, - L6.TangentVector.VectorSpaceScalar == L7.TangentVector.VectorSpaceScalar, - L7.TangentVector.VectorSpaceScalar == L8.TangentVector.VectorSpaceScalar + L7.Output == L8.Input { Sequential( l1, @@ -384,15 +312,7 @@ public struct LayerBuilder { L5.Output == L6.Input, L6.Output == L7.Input, L7.Output == L8.Input, - L8.Output == L9.Input, - L1.TangentVector.VectorSpaceScalar == L2.TangentVector.VectorSpaceScalar, - L2.TangentVector.VectorSpaceScalar == L3.TangentVector.VectorSpaceScalar, - L3.TangentVector.VectorSpaceScalar == L4.TangentVector.VectorSpaceScalar, - L4.TangentVector.VectorSpaceScalar == L5.TangentVector.VectorSpaceScalar, - L5.TangentVector.VectorSpaceScalar == L6.TangentVector.VectorSpaceScalar, - L6.TangentVector.VectorSpaceScalar == L7.TangentVector.VectorSpaceScalar, - L7.TangentVector.VectorSpaceScalar == L8.TangentVector.VectorSpaceScalar, - L8.TangentVector.VectorSpaceScalar == L9.TangentVector.VectorSpaceScalar + L8.Output == L9.Input { Sequential( l1, @@ -438,16 +358,7 @@ public struct LayerBuilder { L6.Output == L7.Input, L7.Output == L8.Input, L8.Output == L9.Input, - L9.Output == L10.Input, - L1.TangentVector.VectorSpaceScalar == L2.TangentVector.VectorSpaceScalar, - L2.TangentVector.VectorSpaceScalar == L3.TangentVector.VectorSpaceScalar, - L3.TangentVector.VectorSpaceScalar == L4.TangentVector.VectorSpaceScalar, - L4.TangentVector.VectorSpaceScalar == L5.TangentVector.VectorSpaceScalar, - L5.TangentVector.VectorSpaceScalar == L6.TangentVector.VectorSpaceScalar, - L6.TangentVector.VectorSpaceScalar == L7.TangentVector.VectorSpaceScalar, - L7.TangentVector.VectorSpaceScalar == L8.TangentVector.VectorSpaceScalar, - L8.TangentVector.VectorSpaceScalar == L9.TangentVector.VectorSpaceScalar, - L9.TangentVector.VectorSpaceScalar == L10.TangentVector.VectorSpaceScalar + L9.Output == L10.Input { Sequential( l1, diff --git a/Sources/TensorFlow/Layers/Sequential.swift.gyb b/Sources/TensorFlow/Layers/Sequential.swift.gyb index b1bf45afe..cf38485a3 100644 --- a/Sources/TensorFlow/Layers/Sequential.swift.gyb +++ b/Sources/TensorFlow/Layers/Sequential.swift.gyb @@ -56,7 +56,7 @@ public struct Sequential: Module self.layer2 = layer2 } - @differentiable(wrt: self) + @differentiable(reverse, wrt: self) public func callAsFunction(_ input: Layer1.Input) -> Layer2.Output { layer2(layer1(input)) } @@ -67,7 +67,7 @@ public struct Sequential: Module } extension Sequential: Layer where Layer1: Layer { - @differentiable + @differentiable(reverse) public func callAsFunction(_ input: Layer1.Input) -> Layer2.Output { layer2(layer1(input)) } diff --git a/Sources/TensorFlow/Layers/Upsampling.swift b/Sources/TensorFlow/Layers/Upsampling.swift index 0fb4a2fed..8e4eb0d2f 100644 --- a/Sources/TensorFlow/Layers/Upsampling.swift +++ b/Sources/TensorFlow/Layers/Upsampling.swift @@ -32,8 +32,8 @@ public struct UpSampling1D: ParameterlessLayer /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { let shape = input.shape let (batchSize, timesteps, channels) = (shape[0], shape[1], shape[2]) let scaleOnes = Tensor(ones: [1, 1, size, 1], on: input.device) @@ -60,8 +60,8 @@ public struct UpSampling2D: ParameterlessLayer /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { let device = input.device let shape = input.shape let (batchSize, height, width, channels) = (shape[0], shape[1], shape[2], shape[3]) @@ -88,7 +88,7 @@ public struct UpSampling3D: ParameterlessLayer /// Repeats the elements of a tensor along an axis, like `np.repeat`. /// Function adapted from `def repeat_elements`: /// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/backend.py - @differentiable + @differentiable(reverse) private func repeatingElements( _ input: Tensor, alongAxis axis: Int, count: Int ) -> Tensor { @@ -123,8 +123,8 @@ public struct UpSampling3D: ParameterlessLayer /// /// - Parameter input: The input to the layer. /// - Returns: The output. - @differentiable - public func forward(_ input: Tensor) -> Tensor { + @differentiable(reverse) + public func callAsFunction(_ input: Tensor) -> Tensor { var result = repeatingElements(input, alongAxis: 1, count: size) result = repeatingElements(result, alongAxis: 2, count: size) result = repeatingElements(result, alongAxis: 3, count: size) diff --git a/Sources/TensorFlow/Loss.swift b/Sources/TensorFlow/Loss.swift index 4477cff93..d5e7ed01c 100644 --- a/Sources/TensorFlow/Loss.swift +++ b/Sources/TensorFlow/Loss.swift @@ -21,12 +21,12 @@ import _Differentiation /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. /// - reduction: Reduction to apply on the computed element-wise loss values. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func l1Loss( predicted: Tensor, expected: Tensor, - reduction: @differentiable (Tensor) -> Tensor = _sum + reduction: @differentiable(reverse) (Tensor) -> Tensor = _sum ) -> Tensor { reduction(abs(expected - predicted)) } @@ -38,12 +38,12 @@ public func l1Loss( /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. /// - reduction: Reduction to apply on the computed element-wise loss values. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func l2Loss( predicted: Tensor, expected: Tensor, - reduction: @differentiable (Tensor) -> Tensor = _sum + reduction: @differentiable(reverse) (Tensor) -> Tensor = _sum ) -> Tensor { reduction((expected - predicted).squared()) } @@ -54,8 +54,8 @@ public func l2Loss( /// - Parameters: /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func meanAbsoluteError( predicted: Tensor, expected: Tensor @@ -69,8 +69,8 @@ public func meanAbsoluteError( /// - Parameters: /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func meanSquaredError( predicted: Tensor, expected: Tensor @@ -87,8 +87,8 @@ public func meanSquaredError( /// - Parameters: /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func meanSquaredLogarithmicError( predicted: Tensor, expected: Tensor @@ -104,8 +104,8 @@ public func meanSquaredLogarithmicError( /// - Parameters: /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func meanAbsolutePercentageError( predicted: Tensor, expected: Tensor @@ -121,12 +121,12 @@ public func meanAbsolutePercentageError( /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. /// - reduction: Reduction to apply on the computed element-wise loss values. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func hingeLoss( predicted: Tensor, expected: Tensor, - reduction: @differentiable (Tensor) -> Tensor = _mean + reduction: @differentiable(reverse) (Tensor) -> Tensor = _mean ) -> Tensor { let device = predicted.device return reduction(max(Tensor(0, on: device), Tensor(1, on: device) - expected * predicted)) @@ -140,12 +140,12 @@ public func hingeLoss( /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. /// - reduction: Reduction to apply on the computed element-wise loss values. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func squaredHingeLoss( predicted: Tensor, expected: Tensor, - reduction: @differentiable (Tensor) -> Tensor = _mean + reduction: @differentiable(reverse) (Tensor) -> Tensor = _mean ) -> Tensor { reduction(hingeLoss(predicted: predicted, expected: expected).squared()) } @@ -159,12 +159,12 @@ public func squaredHingeLoss( /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. /// - reduction: Reduction to apply on the computed element-wise loss values. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func categoricalHingeLoss( predicted: Tensor, expected: Tensor, - reduction: @differentiable (Tensor) -> Tensor = _mean + reduction: @differentiable(reverse) (Tensor) -> Tensor = _mean ) -> Tensor { let device = predicted.device let positive = (expected * predicted).sum(alongAxes: -1) @@ -180,12 +180,12 @@ public func categoricalHingeLoss( /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. /// - reduction: Reduction to apply on the computed element-wise loss values. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func logCoshLoss( predicted: Tensor, expected: Tensor, - reduction: @differentiable (Tensor) -> Tensor = _mean + reduction: @differentiable(reverse) (Tensor) -> Tensor = _mean ) -> Tensor { let device = predicted.device let x = predicted - expected @@ -200,12 +200,12 @@ public func logCoshLoss( /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. /// - reduction: Reduction to apply on the computed element-wise loss values. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func poissonLoss( predicted: Tensor, expected: Tensor, - reduction: @differentiable (Tensor) -> Tensor = _mean + reduction: @differentiable(reverse) (Tensor) -> Tensor = _mean ) -> Tensor { reduction(predicted - expected * log(predicted)) } @@ -217,12 +217,12 @@ public func poissonLoss( /// - predicted: Predicted outputs from a neural network. /// - expected: Expected values, i.e. targets, that correspond to the correct output. /// - reduction: Reduction to apply on the computed element-wise loss values. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func kullbackLeiblerDivergence( predicted: Tensor, expected: Tensor, - reduction: @differentiable (Tensor) -> Tensor = _sum + reduction: @differentiable(reverse) (Tensor) -> Tensor = _sum ) -> Tensor { reduction(expected * log(expected / predicted)) } @@ -236,17 +236,17 @@ public func kullbackLeiblerDivergence( /// - logits: One-hot encoded outputs from a neural network. /// - labels: Indices (zero-indexed) of the correct outputs. /// - reduction: Reduction to apply on the computed element-wise loss values. -@differentiable(wrt: logits) +@differentiable(reverse, wrt: logits) public func softmaxCrossEntropy( logits: Tensor, labels: Tensor, - reduction: @differentiable (Tensor) -> Tensor = _mean + reduction: @differentiable(reverse) (Tensor) -> Tensor = _mean ) -> Tensor { reduction(softmaxCrossEntropyHelper(logits: logits, labels: labels)) } @inlinable -@differentiable(wrt: logits) +@differentiable(reverse, wrt: logits) func softmaxCrossEntropyHelper( logits: Tensor, labels: Tensor @@ -274,17 +274,17 @@ func _vjpSoftmaxCrossEntropyHelper( /// - probabilities: Probability values that correspond to the correct output. Each row must be a /// valid probability distribution. /// - reduction: Reduction to apply on the computed element-wise loss values. -@differentiable(wrt: logits) +@differentiable(reverse, wrt: logits) public func softmaxCrossEntropy( logits: Tensor, probabilities: Tensor, - reduction: @differentiable (Tensor) -> Tensor = _mean + reduction: @differentiable(reverse) (Tensor) -> Tensor = _mean ) -> Tensor { reduction(softmaxCrossEntropyHelper(logits: logits, probabilities: probabilities)) } @inlinable -@differentiable(wrt: logits) +@differentiable(reverse, wrt: logits) func softmaxCrossEntropyHelper( logits: Tensor, probabilities: Tensor @@ -311,12 +311,12 @@ func _vjpSoftmaxCrossEntropyHelper( /// - logits: The unscaled output of a neural network. /// - labels: Integer values that correspond to the correct output. /// - reduction: Reduction to apply on the computed element-wise loss values. -@differentiable(wrt: logits) -@differentiable(wrt: (logits, labels)) +@differentiable(reverse, wrt: logits) +@differentiable(reverse, wrt: (logits, labels)) public func sigmoidCrossEntropy( logits: Tensor, labels: Tensor, - reduction: @differentiable (Tensor) -> Tensor = _mean + reduction: @differentiable(reverse) (Tensor) -> Tensor = _mean ) -> Tensor { let device = logits.device // This numerically stable implementation is based on the TensorFlow Python API. @@ -339,13 +339,13 @@ public func sigmoidCrossEntropy( /// - delta: A floating point scalar representing the point where the Huber loss function changes /// from quadratic to linear. /// - reduction: Reduction to apply on the computed element-wise loss values. -@differentiable(wrt: predicted) -@differentiable(wrt: (predicted, expected)) +@differentiable(reverse, wrt: predicted) +@differentiable(reverse, wrt: (predicted, expected)) public func huberLoss( predicted: Tensor, expected: Tensor, delta: Scalar, - reduction: @differentiable (Tensor) -> Tensor = _sum + reduction: @differentiable(reverse) (Tensor) -> Tensor = _sum ) -> Tensor { let error = expected - predicted let absError = abs(error) @@ -357,7 +357,7 @@ public func huberLoss( /// Workaround for TF-1030 so that we can use sum as a default argument for reductions. /// `Tensor.sum()` is the preferred way to do this. // TODO(TF-1030): Remove this and replace with `{ $0.sum() }`. -@differentiable +@differentiable(reverse) public func _sum( _ value: Tensor ) -> Tensor { @@ -367,7 +367,7 @@ public func _sum( /// Workaround for TF-1030 so that we can use mean as a default argument for reductions. /// `Tensor.mean()` is the preferred way to do this. // TODO(TF-1030): Remove this and replace with `{ $0.mean() }`. -@differentiable +@differentiable(reverse) public func _mean( _ value: Tensor ) -> Tensor { diff --git a/Sources/TensorFlow/Operators/Basic.swift b/Sources/TensorFlow/Operators/Basic.swift index 1a131c2aa..d2ea259ac 100644 --- a/Sources/TensorFlow/Operators/Basic.swift +++ b/Sources/TensorFlow/Operators/Basic.swift @@ -18,7 +18,7 @@ infix operator .!=: ComparisonPrecedence /// Returns a tensor with the same shape and scalars as the specified tensor. @inlinable -@differentiable(where Scalar: TensorFlowFloatingPoint) +@differentiable(reverse where Scalar: TensorFlowFloatingPoint) public func identity(_ x: Tensor) -> Tensor { x } @@ -59,7 +59,7 @@ extension Tensor { /// /// - Returns: Array containing the unstacked tensors. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public func unstacked(alongAxis axis: Int = 0) -> [Tensor] { ensureValid(axis: axis) let posAxis = axis < 0 ? axis + rank : axis @@ -89,7 +89,7 @@ extension Tensor { /// /// - Returns: An array containing the tensors part. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public func split(count: Int, alongAxis axis: Int = 0) -> [Tensor] { ensureValid(axis: axis) let canonicalAxis = axis < 0 ? axis + rank : axis @@ -123,7 +123,7 @@ extension Tensor { /// /// - Returns: Array containing the tensors parts. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func split(sizes: Tensor, alongAxis axis: Int = 0) -> [Tensor] { ensureValid(axis: axis) precondition( @@ -137,7 +137,7 @@ extension Tensor { } @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func split(sizes: [Int], alongAxis axis: Int = 0) -> [Tensor] { ensureValid(axis: axis) let canonicalAxis = axis < 0 ? axis + rank : axis @@ -161,7 +161,7 @@ extension Tensor { /// - Precondition: The shape of `multiples` must be `[tensor.rank]`. /// - Precondition: All scalars in `multiples` must be non-negative. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func tiled(multiples: [Int]) -> Tensor { precondition( multiples.allSatisfy { $0 >= 0 }, @@ -179,7 +179,7 @@ extension Tensor { /// - Precondition: The expected `rank` of multiples must be `1`. /// - Precondition: The shape of `multiples` must be `[tensor.rank]`. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func tiled(multiples: Tensor) -> Tensor { precondition(multiples.rank == 1, "The expected rank of multiples must be 1.") precondition( @@ -191,7 +191,7 @@ extension Tensor { /// Reshape to the shape of the specified `Tensor`. /// - Precondition: The number of scalars matches the new shape. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func reshaped(like other: Tensor) -> Tensor { reshaped(toShape: other.shapeTensor) } @@ -199,7 +199,7 @@ extension Tensor { /// Reshape to the specified shape. /// - Precondition: The number of scalars matches the new shape. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func reshaped(to newShape: TensorShape) -> Tensor { _Raw.reshape(self, shape: newShape.dimensions.map(Int64.init)) } @@ -207,14 +207,14 @@ extension Tensor { /// Reshape to the specified `Tensor` representing a shape. /// - Precondition: The number of scalars matches the new shape. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func reshaped(toShape newShape: Tensor) -> Tensor { return _Raw.reshape(self, shape: newShape) } /// Return a copy of the tensor collapsed into a 1-D `Tensor`, in row-major order. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func flattened() -> Tensor { reshaped(to: [-1]) } @@ -222,7 +222,7 @@ extension Tensor { /// Returns a shape-expanded `Tensor`, with a dimension of 1 inserted at the specified shape /// indices. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func expandingShape(at axes: Int...) -> Tensor { expandingShape(at: axes) } @@ -230,7 +230,7 @@ extension Tensor { /// Returns a shape-expanded `Tensor`, with a dimension of 1 inserted at the /// specified shape indices. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func expandingShape(at axes: [Int]) -> Tensor { var resultShape = self.shape.dimensions.map { Int64($0) } for i in axes { @@ -243,7 +243,7 @@ extension Tensor { /// Returns a rank-lifted `Tensor` with a leading dimension of 1. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func rankLifted() -> Tensor { expandingShape(at: 0) } @@ -251,7 +251,7 @@ extension Tensor { /// Removes the specified dimensions of size 1 from the shape of a tensor. If no dimensions are /// specified, then all dimensions of size 1 will be removed. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func squeezingShape(at axes: Int...) -> Tensor { squeezingShape(at: axes) } @@ -259,7 +259,7 @@ extension Tensor { /// Removes the specified dimensions of size 1 from the shape of a tensor. If no dimensions are /// specified, then all dimensions of size 1 will be removed. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func squeezingShape(at axes: [Int]) -> Tensor { _Raw.squeeze(self, squeezeDims: axes.map(Int32.init)) } @@ -375,7 +375,7 @@ infix operator ++: AdditionPrecedence extension Tensor { /// Returns a transposed tensor, with dimensions permuted in the specified order. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func transposed(permutation: Tensor) -> Tensor { _Raw.transpose(self, perm: permutation) } @@ -383,14 +383,14 @@ extension Tensor { /// Returns a transposed tensor, with dimensions permuted in the specified order. @available(*, deprecated, renamed: "transposed(permutation:)") @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func transposed(withPermutations permutations: Tensor) -> Tensor { transposed(permutation: permutations) } /// Returns a transposed tensor, with dimensions permuted in the specified order. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func transposed(permutation: [Int]) -> Tensor { _Raw.transpose(self, perm: permutation) } @@ -398,14 +398,14 @@ extension Tensor { /// Returns a transposed tensor, with dimensions permuted in the specified order. @available(*, deprecated, renamed: "transposed(permutation:)") @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func transposed(withPermutations permutations: [Int]) -> Tensor { transposed(permutation: permutations) } /// Returns a transposed tensor, with dimensions permuted in the specified order. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func transposed(permutation: Int...) -> Tensor { transposed(permutation: permutation) } @@ -413,14 +413,14 @@ extension Tensor { /// Returns a transposed tensor, with dimensions permuted in the specified order. @available(*, deprecated, renamed: "transposed(permutation:)") @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func transposed(withPermutations permutations: Int...) -> Tensor { transposed(permutation: permutations) } /// Returns a transposed tensor, with dimensions permuted in reverse order. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func transposed() -> Tensor { return transposed(permutation: Array(stride(from: Int(rank - 1), to: -1, by: -1))) } @@ -429,7 +429,7 @@ extension Tensor { /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { ensureValid(axes: axes) return _Raw.reverseV2(self, axis: axes) @@ -439,7 +439,7 @@ extension Tensor { /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { precondition( axes.count == Set(axes.map { $0 < 0 ? $0 + rank : $0 }).count, @@ -452,7 +452,7 @@ extension Tensor { /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { reversed(inAxes: axes) } @@ -462,7 +462,7 @@ extension Tensor { /// specified axis. /// - Precondition: The axis must be in the range `-rank.. Tensor { return Tensor(concatenating: [self, other], alongAxis: axis) } @@ -473,7 +473,7 @@ extension Tensor { /// and may be controversial. The existence/naming of `++` will be discussed /// during a later API design phase. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public static func ++ (lhs: Tensor, rhs: Tensor) -> Tensor { return lhs.concatenated(with: rhs) } @@ -524,7 +524,7 @@ extension Tensor { /// /// - Returns: The gathered tensor. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func gathering( atIndices indices: Tensor, alongAxis axis: Int = 0 @@ -552,7 +552,7 @@ extension Tensor { /// /// - Returns: The gathered tensor. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func batchGathering( atIndices indices: Tensor, alongAxis axis: Int = 1, @@ -674,7 +674,7 @@ extension Tensor { /// - Returns: `(self.rank - K + 1)`-dimensional tensor populated by entries in this tensor /// corresponding to `true` values in `mask`. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func gathering(where mask: Tensor, alongAxis axis: Int = 0) -> Tensor { precondition(mask.rank != 0, "The boolean mask cannot be a scalar.") let posAxis = withoutDerivative(at: self.rank) { r in axis < 0 ? axis + r : axis } @@ -913,13 +913,13 @@ infix operator .= extension Tensor { @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func broadcasted(toShape shape: Tensor) -> Tensor { return _Raw.broadcastTo(self, shape: shape) } @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func broadcasted(to shape: TensorShape) -> Tensor { return broadcasted(toShape: Tensor({ shape.dimensions.map(Int32.init) }(), on: device)) } @@ -927,7 +927,7 @@ extension Tensor { /// Broadcast to the same shape as the specified `Tensor`. /// - Precondition: The specified shape must be compatible for broadcasting. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func broadcasted(like other: Tensor) -> Tensor { return broadcasted(toShape: other.shapeTensor) } @@ -940,7 +940,7 @@ extension Tensor { extension Tensor where Scalar: Numeric { @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func unbroadcasted(toShape otherShape: Tensor) -> Tensor { // TODO: Simplify this once differentiating control flow is supported. return unbroadcasted( @@ -951,13 +951,13 @@ extension Tensor where Scalar: Numeric { } @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func unbroadcasted(like other: Tensor) -> Tensor { return unbroadcasted(toShape: other.shapeTensor) } @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func unbroadcasted(to shape: TensorShape) -> Tensor { let dimensions = self.shape.dimensions var otherDimensions = shape.dimensions @@ -1031,7 +1031,7 @@ extension Tensor where Scalar: Numeric { /// Returns a tensor padded with constant according to the specified padding sizes. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func padded(forSizes sizes: [(before: Int, after: Int)], with value: Scalar = 0) -> Tensor { @@ -1040,7 +1040,7 @@ extension Tensor where Scalar: Numeric { /// Returns a padded tensor according to the specified padding sizes and mode. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func padded(forSizes sizes: [(before: Int, after: Int)], mode: PaddingMode) -> Tensor { let paddings = Tensor( shape: [sizes.count, 2], @@ -1102,7 +1102,7 @@ extension Tensor { /// - Parameter lowerBounds: The lower bounds at each dimension. /// - Parameter upperBounds: The upper bounds at each dimension. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func slice(lowerBounds: [Int], upperBounds: [Int]) -> Tensor { // TODO: Precondition `lowerBounds.count == upperBounds.count`, // preferably in graph. @@ -1113,13 +1113,13 @@ extension Tensor { } @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func slice(lowerBounds: Tensor, sizes: Tensor) -> Tensor { return _Raw.slice(self, begin: lowerBounds, size: sizes) } @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func slice(lowerBounds: [Int], sizes: [Int]) -> Tensor { return _Raw.slice(self, begin: lowerBounds, size: sizes) } @@ -1297,7 +1297,7 @@ extension Tensor { } @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + // @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) internal subscript(_ indexPath: IndexPath) -> Tensor { get { let device = self.device @@ -1323,7 +1323,7 @@ extension Tensor { } @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + // @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public subscript(_ ranges: TensorRangeExpression...) -> Tensor { get { return self[{ IndexPath({ ranges.map { $0.tensorRange } }()) }()] @@ -1334,27 +1334,27 @@ extension Tensor { } } -extension Tensor where Scalar: TensorFlowFloatingPoint { - @usableFromInline - @derivative(of: subscript) - internal func _vjpSubscript( - _ indexPath: IndexPath - ) -> (value: Tensor, pullback: (Tensor) -> Tensor) { - return ( - self[indexPath], - { [shape = shapeTensor] v in - _Raw.stridedSliceGrad( - shape: shape, begin: Tensor(indexPath.begin, on: device), - end: Tensor(indexPath.end, on: device), - strides: Tensor(indexPath.strides, on: device), dy: v, - beginMask: indexPath.beginMask, - endMask: indexPath.endMask, ellipsisMask: indexPath.ellipsisMask, - newAxisMask: indexPath.newAxisMask, - shrinkAxisMask: indexPath.squeezeAxisMask) - } - ) - } -} +// extension Tensor { +// @usableFromInline +// @derivative(of: subscript) +// internal func _vjpSubscript( +// _ indexPath: IndexPath +// ) -> (value: Tensor, pullback: (Tensor) -> Tensor) { +// return ( +// self[indexPath], +// { [shape = shapeTensor] v in +// _Raw.stridedSliceGrad( +// shape: shape, begin: Tensor(indexPath.begin, on: device), +// end: Tensor(indexPath.end, on: device), +// strides: Tensor(indexPath.strides, on: device), dy: v, +// beginMask: indexPath.beginMask, +// endMask: indexPath.endMask, ellipsisMask: indexPath.ellipsisMask, +// newAxisMask: indexPath.newAxisMask, +// shrinkAxisMask: indexPath.squeezeAxisMask) +// } +// ) +// } +// } extension Tensor.IndexPath { @inlinable diff --git a/Sources/TensorFlow/Operators/Image.swift b/Sources/TensorFlow/Operators/Image.swift index d3cfdf4cf..5562acf9f 100644 --- a/Sources/TensorFlow/Operators/Image.swift +++ b/Sources/TensorFlow/Operators/Image.swift @@ -41,7 +41,7 @@ public enum ResizeMethod { /// - antialias: Iff `true`, use an anti-aliasing filter when downsampling an image. /// - Precondition: The images must have rank `3` or `4`. /// - Precondition: The size must be positive. -@differentiable(wrt: images) +@differentiable(reverse, wrt: images) public func resize( images: Tensor, size: (newHeight: Int, newWidth: Int), @@ -168,7 +168,7 @@ public func resizeArea( } @usableFromInline -@differentiable(wrt: images) +@differentiable(reverse, wrt: images) func scaleAndTranslate( images: Tensor, size: Tensor, @@ -218,7 +218,7 @@ func _vjpScaleAndTranslate( } @usableFromInline -@differentiable(wrt: images where Scalar: TensorFlowFloatingPoint) +@differentiable(reverse, wrt: images where Scalar: TensorFlowFloatingPoint) func resizeNearestNeighbor( images: Tensor, size: Tensor, @@ -261,7 +261,7 @@ func _vjpResizeNearestNeighbor( } @usableFromInline -@differentiable(wrt: images where Scalar: TensorFlowFloatingPoint) +@differentiable(reverse, wrt: images where Scalar: TensorFlowFloatingPoint) func resizeBilinear( images: Tensor, size: Tensor, @@ -304,7 +304,7 @@ func _vjpResizeBilinear( } @usableFromInline -@differentiable(wrt: images where Scalar: TensorFlowFloatingPoint) +@differentiable(reverse, wrt: images where Scalar: TensorFlowFloatingPoint) func resizeBicubic( images: Tensor, size: Tensor, @@ -356,7 +356,7 @@ func _vjpResizeBicubic( /// - rates: The dilation rates for each dimension of the input. /// - Precondition: `input` must have rank `4`. /// - Precondition: `filter` must have rank `3`. -@differentiable(wrt: (input, filter)) +@differentiable(reverse, wrt: (input, filter)) public func dilation2D( _ input: Tensor, filter: Tensor, @@ -401,7 +401,7 @@ func _vjpDilation2D( } /// TensorFlow builtin dilation2d gradient helper for the input. -@differentiable(wrt: (x, filter)) +@differentiable(reverse, wrt: (x, filter)) @usableFromInline func dilation2DBackpropInput( _ x: Tensor, @@ -451,7 +451,7 @@ func _vjpDilation2DBackpropInput( } /// TensorFlow builtin dilation2d gradient helper for the input. -@differentiable(wrt: (x, input)) +@differentiable(reverse, wrt: (x, input)) @usableFromInline func dilation2DBackpropFilter( _ x: Tensor, @@ -510,7 +510,7 @@ func _vjpDilation2DBackpropFilter( /// - rates: The dilation rates for each dimension of the input. /// - Precondition: `input` must have rank `4`. /// - Precondition: `filter` must have rank 3. -@differentiable(wrt: (input, filter)) +@differentiable(reverse, wrt: (input, filter)) public func erosion2D( _ input: Tensor, filter: Tensor, diff --git a/Sources/TensorFlow/Operators/LinearAlgebra.swift b/Sources/TensorFlow/Operators/LinearAlgebra.swift index 1ad3c4f6d..7417fb2d9 100644 --- a/Sources/TensorFlow/Operators/LinearAlgebra.swift +++ b/Sources/TensorFlow/Operators/LinearAlgebra.swift @@ -32,7 +32,7 @@ extension Tensor where Scalar: TensorFlowNumeric { /// // [1, 2, 3, 4] /// ``` @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public func diagonalPart() -> Tensor { precondition(rank >= 2, "The tensor must have at least rank 2.") return _Raw.matrixDiagPart(self) @@ -53,7 +53,7 @@ extension Tensor where Scalar: TensorFlowNumeric { /// // [0, 0, 0, 4]] /// ``` @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public func diagonal() -> Tensor { _Raw.matrixDiag(diagonal: self) } @@ -70,7 +70,7 @@ extension Tensor where Scalar: TensorFlowNumeric { } @available(*, deprecated, renamed: "bandPart(subdiagonalCount:superdiagonalCount:)") - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func bandPart(_ subdiagonalCount: Int, _ superdiagonalCount: Int) -> Tensor { return bandPart(subdiagonalCount: subdiagonalCount, superdiagonalCount: superdiagonalCount) } @@ -105,7 +105,7 @@ extension Tensor where Scalar: TensorFlowNumeric { /// - superdiagonalCount: The number of superdiagonals to keep. If negative, keep entire upper /// triangle. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public func bandPart(subdiagonalCount: Int, superdiagonalCount: Int) -> Tensor { precondition(rank >= 2, "The tensor must have at least rank 2.") let lower = Tensor(Int32(subdiagonalCount), on: self.device) @@ -178,7 +178,7 @@ public func eye( /// - Parameter matrix: A tensor of shape `[..., M, N]`. /// - Precondition: `matrix` must be a tensor with shape `[..., M, N]`. @inlinable -@differentiable(wrt: matrix where T: TensorFlowFloatingPoint) +@differentiable(reverse, wrt: matrix where T: TensorFlowFloatingPoint) public func trace(_ matrix: Tensor) -> Tensor { precondition(matrix.rank >= 2, "The tensor must have at least rank 2.") return matrix.diagonalPart().sum(squeezingAxes: -1) @@ -214,7 +214,7 @@ func slogdet(_ matrix: Tensor) -> ( /// - Parameter matrix: A tensor of shape `[..., M, N]`. /// - Returns: The natural logarithm of the determinant of `matrix`. @inlinable -@differentiable(wrt: matrix where T: TensorFlowFloatingPoint) +@differentiable(reverse, wrt: matrix where T: TensorFlowFloatingPoint) func logdet(_ matrix: Tensor) -> Tensor { return 2.0 * log(cholesky(matrix).diagonalPart()).sum(squeezingAxes: -1) } @@ -235,7 +235,7 @@ func logdet(_ matrix: Tensor) -> Tensor { /// /// - Parameter input: A tensor of shape `[..., M, M]`. @inlinable -@differentiable +@differentiable(reverse) public func cholesky(_ x: Tensor) -> Tensor { _Raw.cholesky(x) } @@ -326,7 +326,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// - Precondition: `matrix` must be a tensor with shape `[..., M, M]`. /// - Precondition: `rhs` must be a tensor with shape `[..., M, K]`. @inlinable -@differentiable +@differentiable(reverse) public func triangularSolve( matrix: Tensor, rhs: Tensor, diff --git a/Sources/TensorFlow/Operators/Math.swift b/Sources/TensorFlow/Operators/Math.swift index 438b368bd..e2f72bd48 100644 --- a/Sources/TensorFlow/Operators/Math.swift +++ b/Sources/TensorFlow/Operators/Math.swift @@ -49,7 +49,7 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { /// /// For real types, if `x` is negative the result is `.nan`. For complex /// types there is a branch cut on the negative real axis. - @differentiable + @differentiable(reverse) public static func sqrt(_ x: Self) -> Self { _Raw.sqrt(x) } @@ -64,7 +64,7 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { } /// The cosine of `x`, interpreted as an angle in radians. - @differentiable + @differentiable(reverse) public static func cos(_ x: Self) -> Self { _Raw.cos(x) } @@ -78,7 +78,7 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { } /// The sine of `x`, interpreted as an angle in radians. - @differentiable + @differentiable(reverse) public static func sin(_ x: Self) -> Self { _Raw.sin(x) } @@ -92,7 +92,7 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { } /// The tangent of `x`, interpreted as an angle in radians. - @differentiable + @differentiable(reverse) public static func tan(_ x: Self) -> Self { _Raw.tan(x) } @@ -107,7 +107,7 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { } /// The inverse cosine of `x` in radians. - @differentiable + @differentiable(reverse) public static func acos(_ x: Self) -> Self { _Raw.acos(x) } @@ -121,7 +121,7 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { } /// The inverse sine of `x` in radians. - @differentiable + @differentiable(reverse) public static func asin(_ x: Self) -> Self { _Raw.asin(x) } @@ -135,7 +135,7 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { } /// The inverse tangent of `x` in radians. - @differentiable + @differentiable(reverse) public static func atan(_ x: Self) -> Self { _Raw.atan(x) } @@ -149,7 +149,7 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { } /// The hyperbolic cosine of `x`. - @differentiable + @differentiable(reverse) public static func cosh(_ x: Self) -> Self { _Raw.cosh(x) } @@ -163,7 +163,7 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { } /// The hyperbolic sine of `x`. - @differentiable + @differentiable(reverse) public static func sinh(_ x: Self) -> Self { _Raw.sinh(x) } @@ -177,7 +177,7 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { } /// The hyperbolic tangent of `x`. - @differentiable + @differentiable(reverse) public static func tanh(_ x: Self) -> Self { _Raw.tanh(x) } @@ -192,7 +192,7 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { } /// The inverse hyperbolic cosine of `x`. - @differentiable + @differentiable(reverse) public static func acosh(_ x: Self) -> Self { _Raw.acosh(x) } @@ -206,7 +206,7 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { } /// The inverse hyperbolic sine of `x`. - @differentiable + @differentiable(reverse) public static func asinh(_ x: Self) -> Self { _Raw.asinh(x) } @@ -220,7 +220,7 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { } /// The inverse hyperbolic tangent of `x`. - @differentiable + @differentiable(reverse) public static func atanh(_ x: Self) -> Self { _Raw.atanh(x) } @@ -234,7 +234,7 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { } /// The exponential function applied to `x`, or `e**x`. - @differentiable + @differentiable(reverse) public static func exp(_ x: Self) -> Self { _Raw.exp(x) } @@ -249,25 +249,25 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { } /// Two raised to to power `x`. - @differentiable + @differentiable(reverse) public static func exp2(_ x: Self) -> Self { pow(Tensor(2, on: x.device), x) } /// Ten raised to to power `x`. - @differentiable + @differentiable(reverse) public static func exp10(_ x: Self) -> Self { pow(Tensor(10, on: x.device), x) } /// `exp(x) - 1` evaluated so as to preserve accuracy close to zero. - @differentiable + @differentiable(reverse) public static func expm1(_ x: Self) -> Self { _Raw.expm1(x) } #if TENSORFLOW_USE_STANDARD_TOOLCHAIN - @differentiable + @differentiable(reverse) public static func expMinusOne(_ x: Self) -> Self { return expm1(x) } @@ -283,7 +283,7 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { } /// The natural logarithm of `x`. - @differentiable + @differentiable(reverse) public static func log(_ x: Self) -> Self { _Raw.log(x) } @@ -297,25 +297,25 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { } /// The base-two logarithm of `x`. - @differentiable + @differentiable(reverse) public static func log2(_ x: Self) -> Self { log(x) / Scalar.log(2) } /// The base-ten logarithm of `x`. - @differentiable + @differentiable(reverse) public static func log10(_ x: Self) -> Self { log(x) / Scalar.log(10) } /// `log(1 + x)` evaluated so as to preserve accuracy close to zero. - @differentiable + @differentiable(reverse) public static func log1p(_ x: Self) -> Self { _Raw.log1p(x) } #if TENSORFLOW_USE_STANDARD_TOOLCHAIN - @differentiable + @differentiable(reverse) public static func log(onePlus x: Self) -> Self { return log1p(x) } @@ -334,7 +334,7 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { /// For real types, if `x` is negative the result is NaN, even if `y` has /// an integral value. For complex types, there is a branch cut on the /// negative real axis. - @differentiable + @differentiable(reverse) public static func pow(_ x: Self, _ y: Self) -> Self { _Raw.pow(x, y) } @@ -364,7 +364,7 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { /// `x` raised to the `n`th power. /// /// The product of `n` copies of `x`. - @differentiable + @differentiable(reverse) public static func pow(_ x: Self, _ n: Int) -> Self { pow(x, Tensor(Scalar(n), on: x.device)) } @@ -373,7 +373,7 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { /// /// For real types, if `x` is negative and `n` is even, the result is NaN. /// For complex types, there is a branch cut along the negative real axis. - @differentiable + @differentiable(reverse) public static func root(_ x: Self, _ n: Int) -> Self { sign(x) * pow(abs(x), Tensor(Scalar(1) / Scalar(n), on: x.device)) } @@ -386,17 +386,17 @@ extension Tensor: ElementaryFunctions where Scalar: TensorFlowFloatingPoint { extension Tensor: VectorProtocol where Scalar: TensorFlowFloatingPoint { public typealias VectorSpaceScalar = Float - // @differentiable(where Scalar: TensorFlowFloatingPoint) + // @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public func scaled(by scale: Float) -> Self { Scalar(scale) * self } - // @differentiable(where Scalar: TensorFlowFloatingPoint) + // @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public func adding(_ scalar: Float) -> Self { self + Scalar(scalar) } - // @differentiable(where Scalar: TensorFlowFloatingPoint) + // @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public func subtracting(_ scalar: Float) -> Self { self - Scalar(scalar) } @@ -452,28 +452,28 @@ public extension VectorProtocol where VectorSpaceScalar: SignedNumeric { extension Tensor where Scalar: Numeric { /// Adds the scalar to every scalar of the tensor and produces the sum. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public static func + (lhs: Scalar, rhs: Tensor) -> Tensor { return Tensor(lhs, deviceAndPrecisionLike: rhs) + rhs } /// Adds the scalar to every scalar of the tensor and produces the sum. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public static func + (lhs: Tensor, rhs: Scalar) -> Tensor { return lhs + Tensor(rhs, deviceAndPrecisionLike: lhs) } /// Subtracts the scalar from every scalar of the tensor and produces the difference. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public static func - (lhs: Scalar, rhs: Tensor) -> Tensor { return Tensor(lhs, deviceAndPrecisionLike: rhs) - rhs } /// Subtracts the scalar from every scalar of the tensor and produces the difference @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public static func - (lhs: Tensor, rhs: Scalar) -> Tensor { return lhs - Tensor(rhs, deviceAndPrecisionLike: lhs) } @@ -510,21 +510,21 @@ extension Tensor where Scalar: Numeric { /// Returns the tensor produced by multiplying the two tensors. /// - Note: `*` supports broadcasting. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public static func * (lhs: Tensor, rhs: Tensor) -> Tensor { return _Raw.mul(lhs, rhs) } /// Returns the tensor by multiplying it with every scalar of the tensor. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public static func * (lhs: Scalar, rhs: Tensor) -> Tensor { return Tensor(lhs, deviceAndPrecisionLike: rhs) * rhs } /// Multiplies the scalar with every scalar of the tensor and produces the product. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public static func * (lhs: Tensor, rhs: Scalar) -> Tensor { return lhs * Tensor(rhs, deviceAndPrecisionLike: lhs) } @@ -546,21 +546,21 @@ extension Tensor where Scalar: Numeric { /// Returns the quotient of dividing the first tensor by the second. /// - Note: `/` supports broadcasting. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public static func / (lhs: Tensor, rhs: Tensor) -> Tensor { return _Raw.div(lhs, rhs) } /// Returns the quotient of dividing the scalar by the tensor, broadcasting the scalar. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public static func / (lhs: Scalar, rhs: Tensor) -> Tensor { return Tensor(lhs, deviceAndPrecisionLike: rhs) / rhs } /// Returns the quotient of dividing the tensor by the scalar, broadcasting the scalar. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public static func / (lhs: Tensor, rhs: Scalar) -> Tensor { return lhs / Tensor(rhs, deviceAndPrecisionLike: lhs) } @@ -822,28 +822,28 @@ extension Tensor where Scalar == Bool { extension Tensor where Scalar: TensorFlowNumeric { /// Returns `max(min(self, max), min)`. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public func clipped(min: Tensor, max: Tensor) -> Tensor { _Raw.clipByValue(t: self, clipValueMin: min, clipValueMax: max) } /// Returns `max(min(self, max), min)`. @inlinable - @differentiable(wrt: (self, min) where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: (self, min) where Scalar: TensorFlowFloatingPoint) public func clipped(min: Tensor, max: Scalar) -> Tensor { clipped(min: min, max: Tensor(max, deviceAndPrecisionLike: self)) } /// Returns `max(min(self, max), min)`. @inlinable - @differentiable(wrt: (self, max) where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: (self, max) where Scalar: TensorFlowFloatingPoint) public func clipped(min: Scalar, max: Tensor) -> Tensor { clipped(min: Tensor(min, deviceAndPrecisionLike: self), max: max) } /// Returns `max(min(self, max), min)`. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func clipped(min: Scalar, max: Scalar) -> Tensor { clipped( min: Tensor(min, deviceAndPrecisionLike: self), @@ -965,7 +965,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { extension Tensor where Scalar: SignedNumeric { /// Returns the negation of the specified tensor element-wise. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public static prefix func - (rhs: Tensor) -> Tensor { return _Raw.neg(rhs) } @@ -981,7 +981,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// Returns the absolute value of the specified tensor element-wise. @inlinable -@differentiable(where T: TensorFlowFloatingPoint) +@differentiable(reverse where T: TensorFlowFloatingPoint) public func abs(_ x: Tensor) -> Tensor { _Raw.abs(x) } @@ -997,28 +997,28 @@ internal func _vjpAbs( /// Returns the natural logarithm of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func log(_ x: Tensor) -> Tensor { Tensor.log(x) } /// Returns the base-two logarithm of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func log2(_ x: Tensor) -> Tensor { log(x) / T.log(2) } /// Returns the base-ten logarithm of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func log10(_ x: Tensor) -> Tensor { log(x) / T.log(10) } /// Returns the logarithm of `1 + x` element-wise. @inlinable -@differentiable +@differentiable(reverse) public func log1p(_ x: Tensor) -> Tensor { Tensor.log1p(x) } @@ -1028,7 +1028,7 @@ public func log1p(_ x: Tensor) -> Tensor { /// - Note: The approach is shown in Equation 7 of: /// https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf. @inlinable -@differentiable +@differentiable(reverse) public func log1mexp(_ x: Tensor) -> Tensor { let isTooSmall = withoutDerivative(at: x) { x in -x .< T(log(2.0)) } // This `replacing` will ultimately be a no-op because we will not select this code-path @@ -1040,84 +1040,84 @@ public func log1mexp(_ x: Tensor) -> Tensor { /// Returns the sine of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func sin(_ x: Tensor) -> Tensor { Tensor.sin(x) } /// Returns the cosine of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func cos(_ x: Tensor) -> Tensor { Tensor.cos(x) } /// Returns the tangent of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func tan(_ x: Tensor) -> Tensor { Tensor.tan(x) } /// Returns the hyperbolic sine of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func sinh(_ x: Tensor) -> Tensor { Tensor.sinh(x) } /// Returns the hyperbolic cosine of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func cosh(_ x: Tensor) -> Tensor { Tensor.cosh(x) } /// Returns the hyperbolic tangent of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func tanh(_ x: Tensor) -> Tensor { Tensor.tanh(x) } /// Returns the inverse cosine of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func acos(_ x: Tensor) -> Tensor { Tensor.acos(x) } /// Returns the inverse sine of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func asin(_ x: Tensor) -> Tensor { Tensor.asin(x) } /// Returns the inverse tangent of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func atan(_ x: Tensor) -> Tensor { Tensor.atan(x) } /// Returns the inverse hyperbolic cosine of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func acosh(_ x: Tensor) -> Tensor { Tensor.acosh(x) } /// Returns the inverse hyperbolic sine of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func asinh(_ x: Tensor) -> Tensor { Tensor.asinh(x) } /// Returns the inverse hyperbolic tangent of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func atanh(_ x: Tensor) -> Tensor { Tensor.atanh(x) } @@ -1125,7 +1125,7 @@ public func atanh(_ x: Tensor) -> Tensor { /// Returns the square of the tensor. extension Tensor where Scalar: Numeric { @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func squared() -> Tensor { _Raw.square(self) } @@ -1141,14 +1141,14 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// Returns the square root of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func sqrt(_ x: Tensor) -> Tensor { Tensor.sqrt(x) } /// Returns the inverse square root of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func rsqrt(_ x: Tensor) -> Tensor { _Raw.rsqrt(x) } @@ -1164,35 +1164,35 @@ internal func _vjpRsqrt( /// Returns the exponential of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func exp(_ x: Tensor) -> Tensor { Tensor.exp(x) } /// Returns two raised to the power of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func exp2(_ x: Tensor) -> Tensor { Tensor.exp2(x) } /// Returns ten raised to the power of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func exp10(_ x: Tensor) -> Tensor { Tensor.exp10(x) } /// Returns the exponential of `x - 1` element-wise. @inlinable -@differentiable +@differentiable(reverse) public func expm1(_ x: Tensor) -> Tensor { Tensor.expm1(x) } /// Returns the values of the specified tensor rounded to the nearest integer, element-wise. @inlinable -@differentiable +@differentiable(reverse) public func round(_ x: Tensor) -> Tensor { _Raw.round(x) } @@ -1207,7 +1207,7 @@ internal func _vjpRound( /// Returns the ceiling of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func ceil(_ x: Tensor) -> Tensor { _Raw.ceil(x) } @@ -1222,7 +1222,7 @@ internal func _vjpCeil( /// Returns the floor of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func floor(_ x: Tensor) -> Tensor { _Raw.floor(x) } @@ -1238,7 +1238,7 @@ internal func _vjpFloor( /// Returns an indication of the sign of the specified tensor element-wise. /// Specifically, computes `y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`. @inlinable -@differentiable(where T: TensorFlowFloatingPoint) +@differentiable(reverse where T: TensorFlowFloatingPoint) public func sign(_ x: Tensor) -> Tensor { _Raw.sign(x) } @@ -1254,7 +1254,7 @@ internal func _vjpSign( /// Returns the sigmoid of the specified tensor element-wise. /// Specifically, computes `1 / (1 + exp(-x))`. @inlinable -@differentiable +@differentiable(reverse) public func sigmoid(_ x: Tensor) -> Tensor { _Raw.sigmoid(x) } @@ -1271,7 +1271,7 @@ internal func _vjpSigmoid( /// Returns the log-sigmoid of the specified tensor element-wise. Specifically, /// `log(1 / (1 + exp(-x)))`. For numerical stability, we use `-softplus(-x)`. @inlinable -@differentiable +@differentiable(reverse) public func logSigmoid(_ x: Tensor) -> Tensor { -softplus(-x) } @@ -1279,7 +1279,7 @@ public func logSigmoid(_ x: Tensor) -> Tensor /// Returns the softplus of the specified tensor element-wise. /// Specifically, computes `log(exp(features) + 1)`. @inlinable -@differentiable +@differentiable(reverse) public func softplus(_ features: Tensor) -> Tensor { _Raw.softplus(features: features) } @@ -1295,7 +1295,7 @@ internal func _vjpSoftplus( /// Returns the softsign of the specified tensor element-wise. /// Specifically, computes `features/ (abs(features) + 1)`. @inlinable -@differentiable +@differentiable(reverse) public func softsign(_ features: Tensor) -> Tensor { _Raw.softsign(features: features) } @@ -1311,7 +1311,7 @@ internal func _vjpSoftsign( /// Returns the softmax of the specified tensor along the last axis. /// Specifically, computes `exp(x) / exp(x).sum(alongAxes: -1)`. @inlinable -@differentiable +@differentiable(reverse) public func softmax(_ x: Tensor) -> Tensor { _Raw.softmax(logits: x) } @@ -1319,7 +1319,7 @@ public func softmax(_ x: Tensor) -> Tensor { /// Returns the softmax of the specified tensor along the specified axis. /// Specifically, computes `exp(x) / exp(x).sum(alongAxes: axis)`. @inlinable -@differentiable +@differentiable(reverse) public func softmax(_ x: Tensor, alongAxis axis: Int) -> Tensor { let xExp = exp(x) return xExp / xExp.sum(alongAxes: Tensor(Int32(axis), on: xExp.device)) @@ -1342,7 +1342,7 @@ func _vjpSoftmax( /// Returns the log-softmax of the specified tensor element-wise. @inlinable -@differentiable +@differentiable(reverse) public func logSoftmax(_ x: Tensor) -> Tensor { _Raw.logSoftmax(logits: x) } @@ -1361,7 +1361,7 @@ func _vjpLogSoftmax( /// See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs) /// ](http://arxiv.org/abs/1511.07289) @inlinable -@differentiable +@differentiable(reverse) public func elu(_ x: Tensor) -> Tensor { _Raw.elu(features: x) } @@ -1382,7 +1382,7 @@ func _vjpElu( /// /// See [Gaussian Error Linear Units](https://arxiv.org/abs/1606.08415). @inlinable -@differentiable +@differentiable(reverse) public func gelu(_ x: Tensor) -> Tensor { // Use withoutDerivative to prevent device mismatch in pullback. let xWithoutDerivative = withoutDerivative(at: x) @@ -1403,7 +1403,7 @@ public func gelu(_ x: Tensor) -> Tensor { /// Returns a tensor by applying the ReLU activation function to the specified tensor element-wise. /// Specifically, computes `max(0, x)`. @inlinable -@differentiable +@differentiable(reverse) public func relu(_ x: Tensor) -> Tensor { _Raw.relu(features: x) } @@ -1418,7 +1418,7 @@ func _vjpRelu( /// Returns a tensor by applying the ReLU6 activation function, namely `min(max(0, x), 6)`. @inlinable -@differentiable +@differentiable(reverse) public func relu6(_ x: Tensor) -> Tensor { _Raw.relu6(features: x) } @@ -1435,7 +1435,7 @@ func _vjpRelu6( /// to the specified tensor element-wise. /// Specifically, computes `max(x, x * alpha)`. @inlinable -@differentiable(wrt: x) +@differentiable(reverse, wrt: x) public func leakyRelu( _ x: Tensor, alpha: Double = 0.2 @@ -1464,7 +1464,7 @@ func _vjpLeakyRelu( /// Please refer to [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) for more /// information. @inlinable -@differentiable +@differentiable(reverse) public func selu(_ x: Tensor) -> Tensor { _Raw.selu(features: x) } @@ -1489,7 +1489,7 @@ func _vjpSelu( /// Source: "Searching for Activation Functions" (Ramachandran et al. 2017) /// https://arxiv.org/abs/1710.05941 @inlinable -@differentiable +@differentiable(reverse) public func swish(_ x: Tensor) -> Tensor { x * sigmoid(x) } @@ -1519,7 +1519,7 @@ func _vjpSwish( /// Source: "Searching for MobileNetV3" (Howard et al. 2019) /// https://arxiv.org/abs/1905.02244 @inlinable -@differentiable +@differentiable(reverse) public func hardSigmoid(_ x: Tensor) -> Tensor { relu6(x + 3) / 6.0 } @@ -1530,7 +1530,7 @@ public func hardSigmoid(_ x: Tensor) -> Tensor /// Source: "Searching for MobileNetV3" (Howard et al. 2019) /// https://arxiv.org/abs/1905.02244 @inlinable -@differentiable +@differentiable(reverse) public func hardSwish(_ x: Tensor) -> Tensor { x * hardSigmoid(x) } @@ -1541,7 +1541,7 @@ public func hardSwish(_ x: Tensor) -> Tensor { /// Source: "Mish: A Self Regularized Non-Monotonic Neural Activation Function" /// https://arxiv.org/abs/1908.08681 @inlinable -@differentiable +@differentiable(reverse) public func mish(_ x: Tensor) -> Tensor { x * tanh(softplus(x)) } @@ -1563,35 +1563,35 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// Returns the power of the first tensor to the second tensor. @inlinable -@differentiable +@differentiable(reverse) public func pow(_ lhs: Tensor, _ rhs: Tensor) -> Tensor { Tensor.pow(lhs, rhs) } /// Returns the power of the scalar to the tensor, broadcasting the scalar. @inlinable -@differentiable(wrt: rhs where T: TensorFlowFloatingPoint) +@differentiable(reverse, wrt: rhs where T: TensorFlowFloatingPoint) public func pow(_ lhs: T, _ rhs: Tensor) -> Tensor { pow(Tensor(lhs, deviceAndPrecisionLike: rhs), rhs) } /// Returns the power of the tensor to the scalar, broadcasting the scalar. @inlinable -@differentiable(wrt: lhs where T: TensorFlowFloatingPoint) +@differentiable(reverse, wrt: lhs where T: TensorFlowFloatingPoint) public func pow(_ lhs: Tensor, _ rhs: T) -> Tensor { pow(lhs, Tensor(rhs, deviceAndPrecisionLike: lhs)) } /// Returns the power of the tensor to the scalar, broadcasting the scalar. @inlinable -@differentiable +@differentiable(reverse) public func pow(_ x: Tensor, _ n: Int) -> Tensor { pow(x, Tensor(T(n), deviceAndPrecisionLike: x)) } /// Returns the element-wise `n`th root of the tensor. @inlinable -@differentiable +@differentiable(reverse) public func root(_ x: Tensor, _ n: Int) -> Tensor { Tensor.root(x, n) } @@ -1599,7 +1599,7 @@ public func root(_ x: Tensor, _ n: Int) -> Tensor /// Returns the squared difference between `x` and `y`. /// - Returns: `(x - y) ^ 2`. @inlinable -@differentiable(where T: TensorFlowFloatingPoint) +@differentiable(reverse where T: TensorFlowFloatingPoint) public func squaredDifference(_ x: Tensor, _ y: Tensor) -> Tensor { _Raw.squaredDifference(x, y) } @@ -1622,7 +1622,7 @@ internal func _vjpSquaredDifference( /// Returns the element-wise maximum of two tensors. /// - Note: `max` supports broadcasting. @inlinable -@differentiable(where T: TensorFlowFloatingPoint) +@differentiable(reverse where T: TensorFlowFloatingPoint) public func max(_ lhs: Tensor, _ rhs: Tensor) -> Tensor where T: Numeric & Comparable { _Raw.maximum(lhs, rhs) } @@ -1644,14 +1644,14 @@ internal func _vjpMax( /// Returns the element-wise maximum of the scalar and the tensor, broadcasting the scalar. @inlinable -@differentiable(wrt: rhs where T: TensorFlowFloatingPoint) +@differentiable(reverse, wrt: rhs where T: TensorFlowFloatingPoint) public func max(_ lhs: T, _ rhs: Tensor) -> Tensor where T: Numeric & Comparable { max(Tensor(lhs, deviceAndPrecisionLike: rhs), rhs) } /// Returns the element-wise maximum of the scalar and the tensor, broadcasting the scalar. @inlinable -@differentiable(wrt: lhs where T: TensorFlowFloatingPoint) +@differentiable(reverse, wrt: lhs where T: TensorFlowFloatingPoint) public func max(_ lhs: Tensor, _ rhs: T) -> Tensor where T: Numeric & Comparable { max(lhs, Tensor(rhs, deviceAndPrecisionLike: lhs)) } @@ -1659,7 +1659,7 @@ public func max(_ lhs: Tensor, _ rhs: T) -> Tensor where T: Numeric & C /// Returns the element-wise minimum of two tensors. /// - Note: `min` supports broadcasting. @inlinable -@differentiable(where T: TensorFlowFloatingPoint) +@differentiable(reverse where T: TensorFlowFloatingPoint) public func min(_ lhs: Tensor, _ rhs: Tensor) -> Tensor where T: Numeric & Comparable { _Raw.minimum(lhs, rhs) } @@ -1681,14 +1681,14 @@ internal func _vjpMin( /// Returns the element-wise minimum of the scalar and the tensor, broadcasting the scalar. @inlinable -@differentiable(wrt: rhs where T: TensorFlowFloatingPoint) +@differentiable(reverse, wrt: rhs where T: TensorFlowFloatingPoint) public func min(_ lhs: T, _ rhs: Tensor) -> Tensor where T: Numeric & Comparable { min(Tensor(lhs, deviceAndPrecisionLike: rhs), rhs) } /// Returns the element-wise minimum of the scalar and the tensor, broadcasting the scalar. @inlinable -@differentiable(wrt: lhs where T: TensorFlowFloatingPoint) +@differentiable(reverse, wrt: lhs where T: TensorFlowFloatingPoint) public func min(_ lhs: Tensor, _ rhs: T) -> Tensor where T: Numeric & Comparable { min(lhs, Tensor(rhs, deviceAndPrecisionLike: lhs)) } @@ -1715,7 +1715,7 @@ internal func _vjpMinMaxHelper( } /// Returns the cosine similarity between `x` and `y`. -@differentiable +@differentiable(reverse) public func cosineSimilarity( _ x: Tensor, _ y: Tensor @@ -1725,7 +1725,7 @@ public func cosineSimilarity( /// Returns the cosine distance between `x` and `y`. Cosine distance is defined as /// `1 - cosineSimilarity(x, y)`. -@differentiable +@differentiable(reverse) public func cosineDistance( _ x: Tensor, _ y: Tensor @@ -1747,7 +1747,7 @@ extension Tensor { /// must be either have the same shape as `self` or be a 1-D `Tensor` such /// that `mask.scalarCount == self.shape[0]`. @inlinable - @differentiable(wrt: (self, other) where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: (self, other) where Scalar: TensorFlowFloatingPoint) public func replacing(with other: Tensor, where mask: Tensor) -> Tensor { precondition(self.shape == other.shape, "`self` and `other` must have the same shape.") return _Raw.select(condition: mask, t: other, e: self) @@ -1843,7 +1843,7 @@ extension Tensor where Scalar: Numeric & Comparable { // NOTE: This overload is necessary, otherwise `min()` would refer to the variadic method // `min(squeezingAxes:)` with zero indices. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public func min() -> Tensor { let axes = Tensor(rangeFrom: 0, to: Int32(rank), stride: 1, on: device) return min(squeezingAxes: axes) @@ -1852,7 +1852,7 @@ extension Tensor where Scalar: Numeric & Comparable { // NOTE: This overload is necessary, otherwise `max()` would refer to the variadic method // `max(squeezingAxes:)` with zero indices. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public func max() -> Tensor { let axes = Tensor(rangeFrom: 0, to: Int32(rank), stride: 1, on: device) return max(squeezingAxes: axes) @@ -1862,7 +1862,7 @@ extension Tensor where Scalar: Numeric & Comparable { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { ensureValid(axes: axes) return _Raw.max(self, reductionIndices: axes, keepDims: false) @@ -1872,7 +1872,7 @@ extension Tensor where Scalar: Numeric & Comparable { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { let axes = axes.map(Int32.init) return max(squeezingAxes: Tensor(axes, on: device)) @@ -1882,7 +1882,7 @@ extension Tensor where Scalar: Numeric & Comparable { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { max(squeezingAxes: axes) } @@ -1891,7 +1891,7 @@ extension Tensor where Scalar: Numeric & Comparable { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { ensureValid(axes: axes) return _Raw.min(self, reductionIndices: axes, keepDims: false) @@ -1901,7 +1901,7 @@ extension Tensor where Scalar: Numeric & Comparable { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { let axes = axes.map(Int32.init) return min(squeezingAxes: Tensor(axes, on: device)) @@ -1911,7 +1911,7 @@ extension Tensor where Scalar: Numeric & Comparable { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { min(squeezingAxes: axes) } @@ -1941,7 +1941,7 @@ extension Tensor where Scalar: Numeric & Comparable { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { ensureValid(axes: axes) return _Raw.min(self, reductionIndices: axes, keepDims: true) @@ -1952,7 +1952,7 @@ extension Tensor where Scalar: Numeric & Comparable { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { let axes = axes.map(Int32.init) return min(alongAxes: Tensor(axes, on: device)) @@ -1963,7 +1963,7 @@ extension Tensor where Scalar: Numeric & Comparable { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { min(alongAxes: axes) } @@ -1973,7 +1973,7 @@ extension Tensor where Scalar: Numeric & Comparable { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { ensureValid(axes: axes) return _Raw.max(self, reductionIndices: axes, keepDims: true) @@ -1984,7 +1984,7 @@ extension Tensor where Scalar: Numeric & Comparable { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { let axes = axes.map(Int32.init) return max(alongAxes: Tensor(axes, on: device)) @@ -1995,7 +1995,7 @@ extension Tensor where Scalar: Numeric & Comparable { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { max(alongAxes: axes) } @@ -2112,7 +2112,7 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func sum(squeezingAxes axes: Tensor) -> Tensor { ensureValid(axes: axes) return _Raw.sum(self, reductionIndices: axes.scalars.map { Int64($0) }, keepDims: false) @@ -2122,7 +2122,7 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func sum(squeezingAxes axes: [Int]) -> Tensor { let axes = axes.map(Int64.init) return _Raw.sum(self, reductionIndices: axes, keepDims: false) @@ -2132,13 +2132,13 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func sum(squeezingAxes axes: Int...) -> Tensor { sum(squeezingAxes: axes) } @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func sum() -> Tensor { flattened().sum(squeezingAxes: 0) } @@ -2147,7 +2147,7 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { ensureValid(axes: axes) return _Raw.sum(self, reductionIndices: axes, keepDims: true) @@ -2157,7 +2157,7 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { let axes = axes.map(Int64.init) return _Raw.sum(self, reductionIndices: axes, keepDims: true) @@ -2167,7 +2167,7 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { sum(alongAxes: axes) } @@ -2179,7 +2179,7 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func product(squeezingAxes axes: Tensor) -> Tensor { ensureValid(axes: axes) return _Raw.prod(self, reductionIndices: axes, keepDims: false) @@ -2190,7 +2190,7 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func product(squeezingAxes axes: [Int]) -> Tensor { let axes = axes.map(Int32.init) return product(squeezingAxes: Tensor(axes, on: device)) @@ -2201,13 +2201,13 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func product(squeezingAxes axes: Int...) -> Tensor { product(squeezingAxes: axes) } @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func product() -> Tensor { flattened().product(squeezingAxes: 0) } @@ -2247,7 +2247,7 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func mean(squeezingAxes axes: Tensor) -> Tensor { ensureValid(axes: axes) return _Raw.mean(self, reductionIndices: axes, keepDims: false) @@ -2257,7 +2257,7 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func mean(squeezingAxes axes: [Int]) -> Tensor { let axes = axes.map(Int64.init) return _Raw.mean(self, reductionIndices: axes, keepDims: false) @@ -2267,13 +2267,13 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank...rank`. @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func mean(squeezingAxes axes: Int...) -> Tensor { mean(squeezingAxes: axes) } @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func mean() -> Tensor { flattened().mean(squeezingAxes: [0]) } @@ -2283,7 +2283,7 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { ensureValid(axes: axes) return _Raw.mean(self, reductionIndices: axes, keepDims: true) @@ -2294,7 +2294,7 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { let axes = axes.map(Int64.init) return _Raw.mean(self, reductionIndices: axes, keepDims: true) @@ -2305,7 +2305,7 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { mean(alongAxes: axes) } @@ -2317,7 +2317,7 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { ensureValid(axes: axes) let squaredDiff = squaredDifference(self, mean(alongAxes: axes)) @@ -2329,7 +2329,7 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { let axes = axes.map(Int32.init) return variance(squeezingAxes: Tensor(axes, on: device)) @@ -2340,13 +2340,13 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { variance(squeezingAxes: axes) } @inlinable - @differentiable(wrt: self where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse, wrt: self where Scalar: TensorFlowFloatingPoint) public func variance() -> Tensor { let mean = self.mean() let squaredDiff = squaredDifference(self, mean) @@ -2358,7 +2358,7 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { ensureValid(axes: axes) let squaredDiff = squaredDifference(self, mean(alongAxes: axes)) @@ -2370,7 +2370,7 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { let axes = axes.map(Int32.init) return variance(alongAxes: Tensor(axes, on: device)) @@ -2381,7 +2381,7 @@ extension Tensor where Scalar: Numeric { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { variance(alongAxes: axes) } @@ -2412,7 +2412,7 @@ extension Tensor where Scalar: Numeric { /// - Returns: Result of the cumulative sum operation. /// - Precondition: `axis` must be in the range `-rank.., exclusive: Bool = false, @@ -2487,7 +2487,7 @@ extension Tensor where Scalar: Numeric { /// - Returns: Result of the cumulative product operation. /// - Precondition: `axis` must be in the range `-rank.., exclusive: Bool = false, @@ -2733,7 +2733,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { ensureValid(axes: axes) return Tensor.sqrt(variance(squeezingAxes: axes)) @@ -2745,7 +2745,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { ensureValid(axes: axes) return Tensor.sqrt(variance(squeezingAxes: axes)) @@ -2757,7 +2757,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { standardDeviation(squeezingAxes: axes) } @@ -2767,7 +2767,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { // Reduce along all dimensions. standardDeviation(squeezingAxes: Array(0..) -> Tensor { ensureValid(axes: axes) return Tensor.sqrt(variance(alongAxes: axes)) @@ -2791,7 +2791,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { let axes = axes.map(Int32.init) return standardDeviation(alongAxes: Tensor(axes, on: device)) @@ -2803,7 +2803,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { ensureValid(axes: axes) return Tensor.sqrt(variance(alongAxes: axes)) @@ -2818,7 +2818,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Tensor { ensureValid(axes: axes) let rawMax = max(alongAxes: axes) @@ -2841,7 +2841,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { let axes = axes.map(Int32.init) return logSumExp(squeezingAxes: Tensor(axes, on: device)) @@ -2856,7 +2856,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { logSumExp(squeezingAxes: axes) } @@ -2867,7 +2867,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// avoids overflows caused by computing the `exp` of large inputs and underflows caused by /// computing the `log` of small inputs. @inlinable - @differentiable(wrt: self) + @differentiable(reverse, wrt: self) public func logSumExp() -> Tensor { logSumExp(squeezingAxes: Array(0..) -> Tensor { ensureValid(axes: axes) let rawMax = max(alongAxes: axes) @@ -2905,7 +2905,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { let axes = axes.map(Int32.init) return logSumExp(alongAxes: Tensor(axes, on: device)) @@ -2921,7 +2921,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Tensor { logSumExp(alongAxes: axes) } @@ -2933,7 +2933,7 @@ public struct Moments: Differentiable { public var mean: Tensor public var variance: Tensor - @differentiable + @differentiable(reverse) public init(mean: Tensor, variance: Tensor) { self.mean = mean self.variance = variance @@ -2948,7 +2948,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// - Precondition: `axes` must have rank `1`. /// - Precondition: Each value in `axes` must be in the range `-rank..) -> Moments { ensureValid(axes: axes) let mean = self.mean(alongAxes: axes) @@ -2966,7 +2966,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Moments { ensureValid(axes: axes) let mean = self.mean(squeezingAxes: axes) @@ -2980,14 +2980,14 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Moments { moments(squeezingAxes: axes) } /// Returns the mean and variance of this tensor's elements. @inlinable - @differentiable(wrt: self) + @differentiable(reverse, wrt: self) public func moments() -> Moments { moments(squeezingAxes: Array(0..) -> Moments { ensureValid(axes: axes) let mean = self.mean(alongAxes: axes) @@ -3013,7 +3013,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Moments { ensureValid(axes: axes) let mean = self.mean(alongAxes: axes) @@ -3027,7 +3027,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// - Parameter axes: The dimensions to reduce. /// - Precondition: Each value in `axes` must be in the range `-rank.. Moments { moments(alongAxes: axes) } @@ -3039,7 +3039,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// Performs matrix multiplication with another tensor and produces the result. @inlinable -@differentiable(where Scalar: TensorFlowFloatingPoint) +@differentiable(reverse where Scalar: TensorFlowFloatingPoint) public func matmul( _ lhs: Tensor, transposed transposeLhs: Bool = false, @@ -3105,7 +3105,7 @@ infix operator •: MultiplicationPrecedence extension Tensor where Scalar: Numeric { /// Performs matrix multiplication between two tensors and produces the result. @inlinable - @differentiable(where Scalar: TensorFlowFloatingPoint) + @differentiable(reverse where Scalar: TensorFlowFloatingPoint) public static func • (lhs: Tensor, rhs: Tensor) -> Tensor { matmul(lhs, rhs) } diff --git a/Sources/TensorFlow/Operators/NN.swift b/Sources/TensorFlow/Operators/NN.swift index caa472a0a..81fb6250b 100644 --- a/Sources/TensorFlow/Operators/NN.swift +++ b/Sources/TensorFlow/Operators/NN.swift @@ -30,7 +30,7 @@ extension Tensor where Scalar: TensorFlowFloatingPoint { /// - scale: The scale, also known as gamma. /// - epsilon: A small value added to the denominator for numerical stability. @inlinable - @differentiable(wrt: (self, offset, scale)) + @differentiable(reverse, wrt: (self, offset, scale)) public func batchNormalized( alongAxis axis: Int, offset: Tensor = Tensor(0), @@ -84,7 +84,7 @@ extension Padding { /// - dilation: The dilation factor. /// - Precondition: `input` must have rank `3`. /// - Precondition: `filter` must have rank 3. -@differentiable(wrt: (input, filter)) +@differentiable(reverse, wrt: (input, filter)) public func conv1D( _ input: Tensor, filter: Tensor, @@ -113,7 +113,7 @@ public func conv1D( /// - dilations: The dilation factor for each dimension of the input. /// - Precondition: `input` must have rank `4`. /// - Precondition: `filter` must have rank 4. -@differentiable(wrt: (input, filter)) +@differentiable(reverse, wrt: (input, filter)) public func conv2D( _ input: Tensor, filter: Tensor, @@ -169,7 +169,7 @@ func _vjpConv2D( /// - dilations: The dilation factor for each dimension of the input. /// - Precondition: `input` must have rank `4`. /// - Precondition: `filter` must have rank 4. -@differentiable(wrt: (input, filter)) +@differentiable(reverse, wrt: (input, filter)) public func transposedConv2D( _ input: Tensor, shape: [Int64], @@ -186,7 +186,7 @@ public func transposedConv2D( } /// TensorFlow builtin conv2d gradient helper for the input. -@differentiable(wrt: (x, filter)) +@differentiable(reverse, wrt: (x, filter)) @usableFromInline func conv2DBackpropInput( _ x: Tensor, @@ -233,7 +233,7 @@ func _vjpConv2DBackpropInput( } /// TensorFlow builtin conv2d gradient helper for the filter. -@differentiable(wrt: (x, input)) +@differentiable(reverse, wrt: (x, input)) @usableFromInline func conv2DBackpropFilter( _ x: Tensor, @@ -289,7 +289,7 @@ func _vjpConv2DBackpropFilter( /// - dilations: The dilation factor for each dimension of the input. /// - Precondition: `input` must have rank `5`. /// - Precondition: `filter` must have rank 5. -@differentiable(wrt: (input, filter)) +@differentiable(reverse, wrt: (input, filter)) public func conv3D( _ input: Tensor, filter: Tensor, @@ -342,7 +342,7 @@ func _vjpConv3D( } /// TensorFlow builtin conv3d gradient helper for the input. -@differentiable(wrt: (x, filter)) +@differentiable(reverse, wrt: (x, filter)) @usableFromInline func conv3DBackpropInput( _ x: Tensor, @@ -395,7 +395,7 @@ func _vjpConv3DBackpropInput( } /// TensorFlow builtin conv3d gradient helper for the filter. -@differentiable(wrt: (x, input)) +@differentiable(reverse, wrt: (x, input)) @usableFromInline func conv3DBackpropFilter( _ x: Tensor, @@ -456,7 +456,7 @@ func _vjpConv3DBackpropFilter( /// - padding: The padding for the operation. /// - Precondition: `input` must have rank 4. /// - Precondition: `filter` must have rank 4. -@differentiable(wrt: (input, filter)) +@differentiable(reverse, wrt: (input, filter)) public func depthwiseConv2D( _ input: Tensor, filter: Tensor, @@ -502,7 +502,7 @@ func _vjpDepthwiseConv2D( } /// TensorFlow builtin depthwiseConv2D gradient helper for the input. -@differentiable(wrt: (x, filter)) +@differentiable(reverse, wrt: (x, filter)) @usableFromInline func depthwiseConv2dBackpropInput( _ x: Tensor, @@ -549,7 +549,7 @@ func _vjpDepthwiseConv2dBackpropInput( } /// TensorFlow builtin depthwiseConv2D gradient helper for the filter. -@differentiable(wrt: (x, input)) +@differentiable(reverse, wrt: (x, input)) @usableFromInline func depthwiseConv2dBackpropFilter( _ x: Tensor, @@ -602,7 +602,7 @@ func _vjpDepthwiseConv2dBackpropFilter( /// - filterSize: The dimensions of the pooling kernel. /// - strides: The strides of the sliding filter for each dimension of the input. /// - padding: The padding for the operation. -@differentiable(wrt: input) +@differentiable(reverse, wrt: input) public func maxPool2D( _ input: Tensor, filterSize: (Int, Int, Int, Int), @@ -662,7 +662,7 @@ func _vjpMaxPool2D( /// - filterSize: The dimensions of the pooling kernel. /// - strides: The strides of the sliding filter for each dimension of the input. /// - padding: The padding for the operation. -@differentiable(wrt: input) +@differentiable(reverse, wrt: input) public func maxPool3D( _ input: Tensor, filterSize: (Int, Int, Int, Int, Int), @@ -723,7 +723,7 @@ func _vjpMaxPool3D( /// - filterSize: The dimensions of the pooling kernel. /// - strides: The strides of the sliding filter for each dimension of the input. /// - padding: The padding for the operation. -@differentiable(wrt: input) +@differentiable(reverse, wrt: input) public func avgPool2D( _ input: Tensor, filterSize: (Int, Int, Int, Int), @@ -780,7 +780,7 @@ func _vjpAvgPool2D( /// - filterSize: The dimensions of the pooling kernel. /// - strides: The strides of the sliding filter for each dimension of the input. /// - padding: The padding for the operation. -@differentiable(wrt: input) +@differentiable(reverse, wrt: input) public func avgPool3D( _ input: Tensor, filterSize: (Int, Int, Int, Int, Int), @@ -849,7 +849,7 @@ func _vjpAvgPool3D( /// - seed: An optional `Int64`. Defaults to `0`. If set to be non-zero, the random number /// generator is seeded by the given seed. /// - seed2: An optional `Int64`. Defaults to `0`. A second seed to avoid seed collision. -@differentiable(wrt: input) +@differentiable(reverse, wrt: input) public func fractionalMaxPool2D( _ input: Tensor, poolingRatio: (Double, Double, Double, Double), @@ -972,7 +972,7 @@ func _vjpFractionalMaxPool( /// /// - Precondition: `input.rank == 4 && b >= 2`. /// - Precondition: The number of the features must be divisible by square of `b`. -@differentiable(wrt: input where Scalar: TensorFlowFloatingPoint) +@differentiable(reverse, wrt: input where Scalar: TensorFlowFloatingPoint) public func depthToSpace(_ input: Tensor, blockSize b: Int) -> Tensor { precondition(input.rank == 4, "The input must have rank 4.") precondition(b >= 2, "The size must be greater than 1.") @@ -1047,7 +1047,7 @@ func _vjpDepthToSpace( /// - Precondition: `input.rank == 4 && b >= 2`. /// - Precondition: The height of the input must be divisible by `b`. /// - Precondition: The width of the input must be divisible by `b`. -@differentiable(wrt: input where Scalar: TensorFlowFloatingPoint) +@differentiable(reverse, wrt: input where Scalar: TensorFlowFloatingPoint) public func spaceToDepth(_ input: Tensor, blockSize b: Int) -> Tensor { precondition(input.rank == 4, "The input must have rank 4.") precondition(b >= 2, "The block size must be greater than 1.") diff --git a/Sources/TensorFlow/Optimizers/MomentumBased.swift b/Sources/TensorFlow/Optimizers/MomentumBased.swift index fd0e3d2c7..c64c34748 100644 --- a/Sources/TensorFlow/Optimizers/MomentumBased.swift +++ b/Sources/TensorFlow/Optimizers/MomentumBased.swift @@ -80,7 +80,7 @@ where let learningRate = self.learningRate * 1 / (1 + decay * Float(step)) alpha = alpha.scaled(by: rho) + (direction .* direction).scaled(by: 1 - rho) let denominator = Model.TangentVector.sqrt(alpha).adding(epsilon) - model.move(along: (direction ./ denominator).scaled(by: -learningRate)) + model.move(by: (direction ./ denominator).scaled(by: -learningRate)) } public required init(copying other: RMSProp, to device: Device) { @@ -145,7 +145,7 @@ where public func update(_ model: inout Model, along direction: Model.TangentVector) { accumulator = accumulator + (direction .* direction) let denominator = Model.TangentVector.sqrt(accumulator).adding(epsilon) - model.move(along: (direction ./ denominator).scaled(by: -learningRate)) + model.move(by: (direction ./ denominator).scaled(by: -learningRate)) } public required init(copying other: AdaGrad, to device: Device) { @@ -221,7 +221,7 @@ where averageSquared.scaled(by: rho) + (direction .* direction).scaled(by: 1 - rho) var stepSize = direction .* Model.TangentVector.sqrt(accumulatedDelta.adding(epsilon)) stepSize ./= Model.TangentVector.sqrt(averageSquared.adding(epsilon)) - model.move(along: stepSize.scaled(by: -learningRate)) + model.move(by: stepSize.scaled(by: -learningRate)) accumulatedDelta = accumulatedDelta.scaled(by: rho) + (stepSize .* stepSize).scaled(by: 1 - rho) } @@ -379,7 +379,7 @@ where secondMoments = secondMoments.scaled(by: beta2) + (direction .* direction).scaled(by: 1 - beta2) let denominator = Model.TangentVector.sqrt(secondMoments).adding(epsilon) - model.move(along: (firstMoments ./ denominator).scaled(by: -stepSize)) + model.move(by: (firstMoments ./ denominator).scaled(by: -stepSize)) } public required init(copying other: Adam, to device: Device) { @@ -464,7 +464,7 @@ where } let denominator = infinityNorm.adding(epsilon) - model.move(along: (firstMoments ./ denominator).scaled(by: -stepSize)) + model.move(by: (firstMoments ./ denominator).scaled(by: -stepSize)) } public required init(copying other: AdaMax, to device: Device) { @@ -556,7 +556,7 @@ where } let denominator = Model.TangentVector.sqrt(secondMomentsMax).adding(epsilon) - model.move(along: (firstMoments ./ denominator).scaled(by: -stepSize)) + model.move(by: (firstMoments ./ denominator).scaled(by: -stepSize)) } public required init(copying other: AMSGrad, to device: Device) { @@ -643,11 +643,11 @@ where (N_sma_t - 4) * (N_sma_t - 2) * N_sma_inf / ((N_sma_inf - 4) * (N_sma_inf - 2) * (N_sma_t))) * learningRate / (1 - beta1Power) model.move( - along: (firstMoments ./ secondMoments_h).scaled(by: -stepSize * sqrtf(1 - beta2Power))) + by: (firstMoments ./ secondMoments_h).scaled(by: -stepSize * sqrtf(1 - beta2Power))) } else { // Update with un-adapted momentum. let stepSize = learningRate / (1 - beta1Power) - model.move(along: firstMoments.scaled(by: -stepSize)) + model.move(by: firstMoments.scaled(by: -stepSize)) } } diff --git a/Sources/TensorFlow/Optimizers/SGD.swift b/Sources/TensorFlow/Optimizers/SGD.swift index 11dcf1eb5..617d06b89 100644 --- a/Sources/TensorFlow/Optimizers/SGD.swift +++ b/Sources/TensorFlow/Optimizers/SGD.swift @@ -86,9 +86,9 @@ where let learningRate = self.learningRate * 1 / (1 + decay * Float(step)) velocity = velocity.scaled(by: momentum) - direction.scaled(by: learningRate) if nesterov { - model.move(along: velocity.scaled(by: momentum) - direction.scaled(by: learningRate)) + model.move(by: velocity.scaled(by: momentum) - direction.scaled(by: learningRate)) } else { - model.move(along: velocity) + model.move(by: velocity) } } diff --git a/Sources/TensorFlow/StdlibExtensions.swift b/Sources/TensorFlow/StdlibExtensions.swift index f8bb846ee..7f368ab54 100644 --- a/Sources/TensorFlow/StdlibExtensions.swift +++ b/Sources/TensorFlow/StdlibExtensions.swift @@ -201,7 +201,7 @@ where Element: Differentiable & ElementaryFunctions { /// For real types, if `x` is negative the result is NaN, even if `y` has /// an integral value. For complex types, there is a branch cut on the /// negative real axis. - public static func pow(_ x: Self, _ y: Self) -> Self { .init(zip(x, y).map(Element.pow)) } + // public static func pow(_ x: Self, _ y: Self) -> Self { .init(zip(x, y).map({ (x,y) -> Element in Element.pow(x,y)})) } /// `x` raised to the `n`th power. /// @@ -234,6 +234,17 @@ where Element: Differentiable { set { base[position] = newValue } } + @inlinable + public subscript(bounds: Range.Index>) -> Self.SubSequence { + _read { yield base[bounds] } + set { base[bounds] = newValue } + } + + @inlinable + public mutating func replaceSubrange(_ subrange: Range, with newElements: C) where C : Collection, Self.Element == C.Element { + fatalError("withUnsafeBufferPointer unimplemented because TensorBuffer is abstract") + } + @inlinable public var startIndex: Index { base.startIndex } @@ -288,20 +299,56 @@ where Element: Differentiable & PointwiseMultiplicative { public var reciprocal: Self { .init(map { $0.reciprocal }) } - public static func .* (lhs: Self, rhs: Self) -> Self { - precondition(lhs.count == rhs.count, "Count mismatch: \(lhs.count) and \(rhs.count)") - return .init(zip(lhs, rhs).map(.*)) - } - - public static func .*= (lhs: inout Self, rhs: Self) { - precondition(lhs.count == rhs.count, "Count mismatch: \(lhs.count) and \(rhs.count)") - for (i, x) in zip(lhs.indices, rhs) { - lhs[i] .*= x - } - } + // public static func .* (lhs: Self, rhs: Self) -> Self { + // precondition(lhs.count == rhs.count, "Count mismatch: \(lhs.count) and \(rhs.count)") + // return .init(zip(lhs, rhs).map(.*)) + // } + + // public static func .*= (lhs: inout Self, rhs: Self) { + // precondition(lhs.count == rhs.count, "Count mismatch: \(lhs.count) and \(rhs.count)") + // for (i, x) in zip(lhs.indices, rhs) { + // lhs[i] .*= x + // } + // } } extension Collection { /// Returns the `n`th position in `self`. func index(atOffset n: Int) -> Index { index(startIndex, offsetBy: n) } } + +/// Applies the given closure `body` to `x`. When used in a context where `x` is +/// being differentiated with respect to, this function will not produce any +/// derivative at `x`. +// FIXME: Support throws-rethrows. +@inlinable +@inline(__always) +@_semantics("autodiff.nonvarying") +public func withoutDerivative(at x: T, in body: (T) -> R) -> R { + body(x) +} + +public extension Differentiable { + /// Applies the given closure to the derivative of `self`. + /// + /// Returns `self` like an identity function. When the return value is used in + /// a context where it is differentiated with respect to, applies the given + /// closure to the derivative of the return value. + @inlinable + @differentiable(reverse, wrt: self) + func withDerivative(_ body: @escaping (inout TangentVector) -> Void) -> Self { + return self + } + + @inlinable + @derivative(of: withDerivative) + internal func _vjpWithDerivative( + _ body: @escaping (inout TangentVector) -> Void + ) -> (value: Self, pullback: (TangentVector) -> TangentVector) { + return (self, { grad in + var grad = grad + body(&grad) + return grad + }) + } +} diff --git a/Sources/third_party/Experimental/Complex.swift b/Sources/third_party/Experimental/Complex.swift index 373deaba9..f95cd8f2f 100644 --- a/Sources/third_party/Experimental/Complex.swift +++ b/Sources/third_party/Experimental/Complex.swift @@ -49,7 +49,7 @@ struct Complex { var real: T var imaginary: T - @differentiable(where T: Differentiable, T == T.TangentVector) + @differentiable(reverse where T: Differentiable, T == T.TangentVector) init(real: T = 0, imaginary: T = 0) { self.real = real self.imaginary = imaginary @@ -119,7 +119,7 @@ extension Complex: AdditiveArithmetic { lhs.imaginary += rhs.imaginary } - @differentiable(where T: Differentiable) + @differentiable(reverse where T: Differentiable) static func - (lhs: Complex, rhs: Complex) -> Complex { var temp = lhs temp -= rhs @@ -157,7 +157,7 @@ extension Complex: Numeric { ) } - @differentiable(where T: Differentiable) + @differentiable(reverse where T: Differentiable) static func * (lhs: Complex, rhs: Complex) -> Complex { var a = lhs.real var b = lhs.imaginary @@ -206,7 +206,7 @@ extension Complex: Numeric { } extension Complex: SignedNumeric { - @differentiable(where T: Differentiable) + @differentiable(reverse where T: Differentiable) static prefix func - (operand: Complex) -> Complex { return Complex(real: -operand.real, imaginary: -operand.imaginary) } @@ -218,7 +218,7 @@ extension Complex: SignedNumeric { } extension Complex { - @differentiable(where T: Differentiable) + @differentiable(reverse where T: Differentiable) static func / (lhs: Complex, rhs: Complex) -> Complex { var a = lhs.real var b = lhs.imaginary @@ -262,7 +262,7 @@ extension Complex { } extension Complex { - @differentiable(where T: Differentiable) + @differentiable(reverse where T: Differentiable) func complexConjugate() -> Complex { return Complex(real: real, imaginary: -imaginary) } @@ -273,28 +273,28 @@ func abs(_ z: Complex) -> Complex { } extension Complex { - @differentiable(where T: Differentiable, T == T.TangentVector) + @differentiable(reverse where T: Differentiable, T == T.TangentVector) func adding(real: T) -> Complex { var c = self c.real += real return c } - @differentiable(where T: Differentiable, T == T.TangentVector) + @differentiable(reverse where T: Differentiable, T == T.TangentVector) func subtracting(real: T) -> Complex { var c = self c.real -= real return c } - @differentiable(where T: Differentiable, T == T.TangentVector) + @differentiable(reverse where T: Differentiable, T == T.TangentVector) func adding(imaginary: T) -> Complex { var c = self c.imaginary += imaginary return c } - @differentiable(where T: Differentiable, T == T.TangentVector) + @differentiable(reverse where T: Differentiable, T == T.TangentVector) func subtracting(imaginary: T) -> Complex { var c = self c.imaginary -= imaginary diff --git a/Sources/x10/swift_bindings/optimizers/Optimizer.swift b/Sources/x10/swift_bindings/optimizers/Optimizer.swift index 81d10ed61..ebd998ebf 100644 --- a/Sources/x10/swift_bindings/optimizers/Optimizer.swift +++ b/Sources/x10/swift_bindings/optimizers/Optimizer.swift @@ -229,7 +229,7 @@ where for cb in paramGroup.callbacks { cb(&state, &optimizerState) } step = state.step ?? Tensor(zerosLike: step) } - model.move(along: step) + model.move(by: step) } /// Copies the optimizer to the specified device. diff --git a/Sources/x10/swift_bindings/training_loop.swift b/Sources/x10/swift_bindings/training_loop.swift index 565eca214..bef75a979 100644 --- a/Sources/x10/swift_bindings/training_loop.swift +++ b/Sources/x10/swift_bindings/training_loop.swift @@ -159,7 +159,7 @@ struct Statistics { } } -@differentiable +@differentiable(reverse) public func _defaultLossFunction(_ ŷ: Tensor, _ y: Tensor) -> Tensor { softmaxCrossEntropy(logits: ŷ, labels: y) } @@ -190,7 +190,7 @@ where public func run( train: Dataset, test: Dataset, crossReplicaSumDevices: [Device]? = nil, scheduleLearningRate: (Opt) -> Void = { _ in }, - lossFunction: @differentiable (Tensor, @noDerivative Tensor) -> Tensor = + lossFunction: @differentiable(reverse) (Tensor, @noDerivative Tensor) -> Tensor = _defaultLossFunction ) -> () -> (train: HostStatistics, test: HostStatistics) diff --git a/Tests/AnnotationTests/TFEagerTests.swift b/Tests/AnnotationTests/TFEagerTests.swift index ff3a4c705..093b5db09 100644 --- a/Tests/AnnotationTests/TFEagerTests.swift +++ b/Tests/AnnotationTests/TFEagerTests.swift @@ -24,7 +24,7 @@ final class AnnotationTFEagerTests: XCTestCase { public var dense3 = Dense(inputSize: 4, outputSize: 4) public var flatten = Flatten() - @differentiable + @differentiable(reverse) public func callAsFunction(_ input: Tensor) -> Tensor { let layer1 = dense1(input) let layer2 = layer1.reshaped(to: [1, 4]) diff --git a/Tests/AnnotationTests/XLATests.swift b/Tests/AnnotationTests/XLATests.swift index df3e2447e..386717df1 100644 --- a/Tests/AnnotationTests/XLATests.swift +++ b/Tests/AnnotationTests/XLATests.swift @@ -24,7 +24,7 @@ final class AnnotationXLATests: XCTestCase { public var dense3 = Dense(inputSize: 4, outputSize: 4) public var flatten = Flatten() - @differentiable + @differentiable(reverse) public func callAsFunction(_ input: Tensor) -> Tensor { let layer1 = dense1(input) let layer2 = layer1.reshaped(to: [1, 4]) diff --git a/Tests/TensorFlowTests/FreezableTests.swift b/Tests/TensorFlowTests/FreezableTests.swift index 464b62a5b..d8bf7dd40 100644 --- a/Tests/TensorFlowTests/FreezableTests.swift +++ b/Tests/TensorFlowTests/FreezableTests.swift @@ -31,7 +31,7 @@ final class FreezableTests: XCTestCase { self.bias = bias } - @differentiable + @differentiable(reverse) func callAsFunction(_ input: Tensor) -> Tensor { return input * weight + bias } @@ -40,19 +40,19 @@ final class FreezableTests: XCTestCase { var dense = FreezableDense(weight: Tensor(2), bias: Tensor(3)) let grad = FreezableDense.TangentVector(weight: Tensor(4), bias: Tensor(1)) - dense.move(along: grad) + dense.move(by: grad) XCTAssertEqual(Tensor(6), dense.weight) XCTAssertEqual(Tensor(4), dense.bias) // Freeze `dense.weight`: its value cannot be updated. dense.$weight.freeze() - dense.move(along: grad) + dense.move(by: grad) XCTAssertEqual(Tensor(6), dense.weight) XCTAssertEqual(Tensor(5), dense.bias) // Unfreeze `dense.weight`: its value can be updated again. dense.$weight.unfreeze() - dense.move(along: grad) + dense.move(by: grad) XCTAssertEqual(Tensor(10), dense.weight) XCTAssertEqual(Tensor(6), dense.bias) } diff --git a/Tests/TensorFlowTests/Helpers.swift b/Tests/TensorFlowTests/Helpers.swift index 41d8e6a50..70d1f92fe 100644 --- a/Tests/TensorFlowTests/Helpers.swift +++ b/Tests/TensorFlowTests/Helpers.swift @@ -59,7 +59,7 @@ extension Float: PointwiseMultiplicative { struct Multiply: Layer { var coefficient: Float - @differentiable + @differentiable(reverse) func callAsFunction(_ input: Float) -> Float { return coefficient * input } diff --git a/Tests/TensorFlowTests/LayerTests.swift b/Tests/TensorFlowTests/LayerTests.swift index 00032bc01..512446014 100644 --- a/Tests/TensorFlowTests/LayerTests.swift +++ b/Tests/TensorFlowTests/LayerTests.swift @@ -19,7 +19,7 @@ import XCTest fileprivate struct Sigmoid: ParameterlessLayer { typealias TangentVector = EmptyTangentVector - @differentiable + @differentiable(reverse) func callAsFunction(_ input: Tensor) -> Tensor { sigmoid(input) } diff --git a/Tests/TensorFlowTests/OptimizerTests.swift b/Tests/TensorFlowTests/OptimizerTests.swift index f4569abb8..56a5010ae 100644 --- a/Tests/TensorFlowTests/OptimizerTests.swift +++ b/Tests/TensorFlowTests/OptimizerTests.swift @@ -21,7 +21,7 @@ class OptimizerTests: XCTestCase { struct Model: Layer { var dense = Dense(weight: [[0.8]], bias: [0.8], activation: identity) - @differentiable + @differentiable(reverse) func callAsFunction(_ input: Tensor) -> Tensor { dense(input) } diff --git a/Tests/TensorFlowTests/SequencedTests.swift b/Tests/TensorFlowTests/SequencedTests.swift index 011f7c819..e82a36a15 100644 --- a/Tests/TensorFlowTests/SequencedTests.swift +++ b/Tests/TensorFlowTests/SequencedTests.swift @@ -26,7 +26,7 @@ struct Model2: Layer { var multiply2: Multiply = Multiply(coefficient: 2) // ###sourceLocation(file: "/usr/local/google/home/marcrasi/git/swift-apis/Tests/TensorFlowTests/SequencedTests.swift.gyb", line: 25) - @differentiable + @differentiable(reverse) func callAsFunction(_ input: Float) -> Float { input.sequenced( through: multiply1, multiply2 @@ -43,7 +43,7 @@ struct Model3: Layer { var multiply3: Multiply = Multiply(coefficient: 3) // ###sourceLocation(file: "/usr/local/google/home/marcrasi/git/swift-apis/Tests/TensorFlowTests/SequencedTests.swift.gyb", line: 25) - @differentiable + @differentiable(reverse) func callAsFunction(_ input: Float) -> Float { input.sequenced( through: multiply1, multiply2, multiply3 @@ -62,7 +62,7 @@ struct Model4: Layer { var multiply4: Multiply = Multiply(coefficient: 4) // ###sourceLocation(file: "/usr/local/google/home/marcrasi/git/swift-apis/Tests/TensorFlowTests/SequencedTests.swift.gyb", line: 25) - @differentiable + @differentiable(reverse) func callAsFunction(_ input: Float) -> Float { input.sequenced( through: multiply1, multiply2, multiply3, multiply4 @@ -83,7 +83,7 @@ struct Model5: Layer { var multiply5: Multiply = Multiply(coefficient: 5) // ###sourceLocation(file: "/usr/local/google/home/marcrasi/git/swift-apis/Tests/TensorFlowTests/SequencedTests.swift.gyb", line: 25) - @differentiable + @differentiable(reverse) func callAsFunction(_ input: Float) -> Float { input.sequenced( through: multiply1, multiply2, multiply3, multiply4, multiply5 @@ -106,7 +106,7 @@ struct Model6: Layer { var multiply6: Multiply = Multiply(coefficient: 6) // ###sourceLocation(file: "/usr/local/google/home/marcrasi/git/swift-apis/Tests/TensorFlowTests/SequencedTests.swift.gyb", line: 25) - @differentiable + @differentiable(reverse) func callAsFunction(_ input: Float) -> Float { input.sequenced( through: multiply1, multiply2, multiply3, multiply4, multiply5, multiply6 diff --git a/Tests/TensorFlowTests/SequencedTests.swift.gyb b/Tests/TensorFlowTests/SequencedTests.swift.gyb index e21aec19c..956d742a4 100644 --- a/Tests/TensorFlowTests/SequencedTests.swift.gyb +++ b/Tests/TensorFlowTests/SequencedTests.swift.gyb @@ -23,7 +23,7 @@ struct Model${count}: Layer { var multiply${i}: Multiply = Multiply(coefficient: ${i}) % end - @differentiable + @differentiable(reverse) func callAsFunction(_ input: Float) -> Float { input.sequenced( through: ${', '.join(['multiply%d' % i for i in range(1, count + 1)])} diff --git a/Tests/TensorFlowTests/TensorAutoDiffTests.swift b/Tests/TensorFlowTests/TensorAutoDiffTests.swift index ac6ae9200..e9507495c 100644 --- a/Tests/TensorFlowTests/TensorAutoDiffTests.swift +++ b/Tests/TensorFlowTests/TensorAutoDiffTests.swift @@ -16,7 +16,7 @@ import XCTest @testable import TensorFlow -let cube: @differentiable (Tensor) -> Tensor = { ($0 * $0 * $0) } +let cube: @differentiable(reverse) (Tensor) -> Tensor = { ($0 * $0 * $0) } final class TensorAutoDiffTests: XCTestCase { func testSimpleGrad() { @@ -207,7 +207,7 @@ final class TensorAutoDiffTests: XCTestCase { XCTAssertTrue( (Tensor(1), Tensor(1)) == gradient(at: Tensor(0), Tensor(0), in: f)) - XCTAssertTrue(([1], [1]) == pullback(at: [1], [10], in: f)([1])) + XCTAssertTrue(([1], [1]) == pullback(at: [1], [10], of: f)([1])) } func testSubtract() { @@ -215,7 +215,7 @@ final class TensorAutoDiffTests: XCTestCase { XCTAssertTrue( (Tensor(1), Tensor(-1)) == gradient(at: Tensor(0), Tensor(0), in: f)) - XCTAssertTrue(([1], [-1]) == pullback(at: [1], [10], in: f)([1])) + XCTAssertTrue(([1], [-1]) == pullback(at: [1], [10], of: f)([1])) } func testMultiply() { @@ -226,21 +226,21 @@ final class TensorAutoDiffTests: XCTestCase { func testDivide() { func f(a: Tensor, b: Tensor) -> Tensor { a / b } - XCTAssertTrue(([0.1], [-0.01]) == pullback(at: [1], [10], in: f)([1])) + XCTAssertTrue(([0.1], [-0.01]) == pullback(at: [1], [10], of: f)([1])) } func testMatmul() { func f(a: Tensor, b: Tensor) -> Tensor { matmul(a, b) } let v = Tensor(ones: [1, 1]) - XCTAssertTrue(([[0]], [[0]]) == pullback(at: [[0]], [[0]], in: f)(v)) - XCTAssertTrue(([[10]], [[1]]) == pullback(at: [[1]], [[10]], in: f)(v)) + XCTAssertTrue(([[0]], [[0]]) == pullback(at: [[0]], [[0]], of: f)(v)) + XCTAssertTrue(([[10]], [[1]]) == pullback(at: [[1]], [[10]], of: f)(v)) } func testDot() { func f(a: Tensor, b: Tensor) -> Tensor { a • b } let v = Tensor(ones: [1, 1]) - XCTAssertTrue(([[0]], [[0]]) == pullback(at: [[0]], [[0]], in: f)(v)) - XCTAssertTrue(([[10]], [[1]]) == pullback(at: [[1]], [[10]], in: f)(v)) + XCTAssertTrue(([[0]], [[0]]) == pullback(at: [[0]], [[0]], of: f)(v)) + XCTAssertTrue(([[10]], [[1]]) == pullback(at: [[1]], [[10]], of: f)(v)) } func testNegate() { @@ -509,15 +509,15 @@ final class TensorAutoDiffTests: XCTestCase { func testExpandingShape() { func f1(a: Tensor) -> Tensor { a.expandingShape(at: 0).squared() } func f2(a: Tensor) -> Tensor { a.squared().expandingShape(at: 0) } - XCTAssertEqual(pullback(at: [3, 5], in: f1)([[1, 1]]), [6, 10]) - XCTAssertEqual(pullback(at: [3, 5], in: f2)([[1, 1]]), [6, 10]) + XCTAssertEqual(pullback(at: [3, 5], of: f1)([[1, 1]]), [6, 10]) + XCTAssertEqual(pullback(at: [3, 5], of: f2)([[1, 1]]), [6, 10]) } func testSqueezingShape() { func f1(a: Tensor) -> Tensor { a.squeezingShape(at: 0).squared() } func f2(a: Tensor) -> Tensor { a.squared().squeezingShape(at: 0) } - XCTAssertEqual(pullback(at: [[3, 5]], in: f1)([1, 1]), [[6, 10]]) - XCTAssertEqual(pullback(at: [[3, 5]], in: f2)([1, 1]), [[6, 10]]) + XCTAssertEqual(pullback(at: [[3, 5]], of: f1)([1, 1]), [[6, 10]]) + XCTAssertEqual(pullback(at: [[3, 5]], of: f2)([1, 1]), [[6, 10]]) } func testTiled() { @@ -536,8 +536,8 @@ final class TensorAutoDiffTests: XCTestCase { func f2(a: Tensor) -> Tensor { a.squared().reshaped(toShape: Tensor([2, 1])) } - XCTAssertEqual(pullback(at: [[3, 5]], in: f1)([[1], [1]]), [[6, 10]]) - XCTAssertEqual(pullback(at: [[3, 5]], in: f2)([[1], [1]]), [[6, 10]]) + XCTAssertEqual(pullback(at: [[3, 5]], of: f1)([[1], [1]]), [[6, 10]]) + XCTAssertEqual(pullback(at: [[3, 5]], of: f2)([[1], [1]]), [[6, 10]]) } func testReshaped() { @@ -645,13 +645,13 @@ final class TensorAutoDiffTests: XCTestCase { } func testSideEffects() { - let foo: @differentiable (Tensor) -> Tensor = { x in + let foo: @differentiable(reverse) (Tensor) -> Tensor = { x in var a = x a = a + x a = a + x return a + x } - XCTAssertEqual(Tensor([4, 4]), pullback(at: Tensor([4, 5]), in: foo)([1, 1])) + XCTAssertEqual(Tensor([4, 4]), pullback(at: Tensor([4, 5]), of: foo)([1, 1])) func bar(x: Tensor) -> Tensor { var a = x diff --git a/Tests/TensorFlowTests/TrivialModelTests.swift b/Tests/TensorFlowTests/TrivialModelTests.swift index 1f093cfe2..2288cad1b 100644 --- a/Tests/TensorFlowTests/TrivialModelTests.swift +++ b/Tests/TensorFlowTests/TrivialModelTests.swift @@ -32,7 +32,7 @@ final class TrivialModelTests: XCTestCase { activation: relu, weightInitializer: glorotUniform(seed: (0xffeffe, 0xfffe))) } - @differentiable + @differentiable(reverse) func callAsFunction(_ input: Tensor) -> Tensor { let h1 = l1(input) return l2(h1) diff --git a/Tests/x10/TensorVisitorPlanTest.swift b/Tests/x10/TensorVisitorPlanTest.swift index bc5946e71..4c5132c65 100644 --- a/Tests/x10/TensorVisitorPlanTest.swift +++ b/Tests/x10/TensorVisitorPlanTest.swift @@ -20,7 +20,7 @@ struct Classifier: Layer { var layers = [Dense(inputSize: 784, outputSize: 30, activation: relu)] var final_layer = Dense(inputSize: 30, outputSize: 10) - @differentiable + @differentiable(reverse) func callAsFunction(_ input: Tensor) -> Tensor { return final_layer(layers.differentiableReduce(input) { last, layer in layer(last) }) } diff --git a/Tests/x10/ops_test.swift b/Tests/x10/ops_test.swift index f1369efff..52c63ffaf 100644 --- a/Tests/x10/ops_test.swift +++ b/Tests/x10/ops_test.swift @@ -30,15 +30,15 @@ private func TF(_ range: TensorRange) -> TensorRange { } private func assertEqualUnaryOperationGradients( - _ xlaOp: @differentiable (Tensor) -> Tensor, - _ tensorFlowOp: @differentiable (Tensor) -> Tensor, + _ xlaOp: @differentiable(reverse) (Tensor) -> Tensor, + _ tensorFlowOp: @differentiable(reverse) (Tensor) -> Tensor, _ x: Tensor, _ outGrad: Tensor, relTolerance: Float = 1e-5, absTolerance: Float = 1e-7, file: StaticString = #file, line: UInt = #line ) { - var (actual, actualPullback) = valueWithPullback(at: x, in: xlaOp) + var (actual, actualPullback) = valueWithPullback(at: x, of: xlaOp) let useReducedPrecision = x.isReducedPrecision if useReducedPrecision { XCTAssert(outGrad.isReducedPrecision) @@ -46,7 +46,7 @@ private func assertEqualUnaryOperationGradients( actual = actual.toFullPrecision } XCTAssert(!actual.isReducedPrecision) - let (expected, expectedPullback) = valueWithPullback(at: TF(x), in: tensorFlowOp) + let (expected, expectedPullback) = valueWithPullback(at: TF(x), of: tensorFlowOp) XCTAssert( allClose( actual: TF(actual), expected: expected, relTolerance: relTolerance, absTolerance: absTolerance @@ -65,8 +65,8 @@ private func assertEqualUnaryOperationGradients( } private func assertEqualBinaryOperationGradients( - _ xlaOp: @differentiable (Tensor, Tensor) -> Tensor, - _ tensorFlowOp: @differentiable (Tensor, Tensor) -> Tensor, + _ xlaOp: @differentiable(reverse) (Tensor, Tensor) -> Tensor, + _ tensorFlowOp: @differentiable(reverse) (Tensor, Tensor) -> Tensor, _ x: Tensor, _ y: Tensor, _ outGrad: Tensor, @@ -74,7 +74,7 @@ private func assertEqualBinaryOperationGradients( absTolerance: Float = 1e-7, file: StaticString = #file, line: UInt = #line ) { - var (actual, actualPullback) = valueWithPullback(at: x, y, in: xlaOp) + var (actual, actualPullback) = valueWithPullback(at: x, y, of: xlaOp) let useReducedPrecision = x.isReducedPrecision if useReducedPrecision { XCTAssert(y.isReducedPrecision) @@ -83,7 +83,7 @@ private func assertEqualBinaryOperationGradients( actual = actual.toFullPrecision } XCTAssert(!actual.isReducedPrecision) - let (expected, expectedPullback) = valueWithPullback(at: TF(x), TF(y), in: tensorFlowOp) + let (expected, expectedPullback) = valueWithPullback(at: TF(x), TF(y), of: tensorFlowOp) XCTAssert( allClose( actual: TF(actual), expected: expected, relTolerance: relTolerance, absTolerance: absTolerance