diff --git a/CMakeLists.txt b/CMakeLists.txt index 150a11a..d125f57 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,7 +38,7 @@ set(SOURCES ${SOURCE_DIR}/internals/functions/internal_function_logsoftmax.cpp ${SOURCE_DIR}/internals/functions/internal_function_relu.cpp ${SOURCE_DIR}/internals/criterions/internal_criterion_nllloss.cpp - ${SOURCE_DIR}/internals/optimizers/internal_optimizer_sgd.cpp) +) find_package(Eigen3 REQUIRED CONFIG) diff --git a/examples/layers.cpp b/examples/layers.cpp index e8124d8..bfe15fe 100644 --- a/examples/layers.cpp +++ b/examples/layers.cpp @@ -14,8 +14,8 @@ cmake --build . --target cabernet-examples-layers struct Autoencoder : public net::Model { Autoencoder() { - encoder.configure_optimizer(encoder_optimizer); - decoder.configure_optimizer(decoder_optimizer); + encoder.configure_optimizer(std::dynamic_pointer_cast(encoder_optimizer)); + decoder.configure_optimizer(std::dynamic_pointer_cast(decoder_optimizer)); } net::layer::Sequence encoder { @@ -38,16 +38,16 @@ struct Autoencoder : public net::Model { } void step() { - encoder_optimizer.step(); - decoder_optimizer.step(); + encoder_optimizer->step(); + decoder_optimizer->step(); } - /* you can add diferent optimizers to different layers + /* you can add different optimizers to different layers or the same, doesn't matter, the optimizer has a shared pointer to it's implementation so you can pass instances of it with value semantics without making deep copies */ - net::optimizer::SGD encoder_optimizer {/*learning rate*/ 0.1}; - net::optimizer::SGD decoder_optimizer {/*learning rate*/ 0.2}; + std::shared_ptr encoder_optimizer = std::make_shared(/*learning rate*/ 0.1); + std::shared_ptr decoder_optimizer = std::make_shared(/*learning rate*/ 0.2); }; int main() { diff --git a/examples/model.cpp b/examples/model.cpp index 9f557e8..acee910 100644 --- a/examples/model.cpp +++ b/examples/model.cpp @@ -1,9 +1,9 @@ #include struct Network : public net::Model { - Network() { - layers.configure_optimizer(optimizer); - } + Network() : net::Model( + std::make_shared(/*learning rate*/ 0.1) + ) {} net::layer::Sequence layers { net::layer::Linear(784, 128), @@ -15,8 +15,6 @@ struct Network : public net::Model { net::Tensor forward(net::Tensor x) { return layers(x); } - - net::optimizer::SGD optimizer {/*learning rate*/ 0.1}; }; int main() { @@ -39,7 +37,7 @@ int main() { std::cout << "Epoch: " << epoch + 1 << std::endl; - for(int batch = 0; batch < dataset.lenght(); ++batch) { + for(int batch = 0; batch < dataset.length(); ++batch) { input.copy(dataset.features()[batch].internal()); // I will fix this in the future so it will be prettier and without copies. targets.copy(dataset.targets()[batch].internal()); @@ -49,4 +47,4 @@ int main() { } } -} +} \ No newline at end of file diff --git a/include/CaberNet/dataset.h b/include/CaberNet/dataset.h index 8ebdfd1..3688872 100644 --- a/include/CaberNet/dataset.h +++ b/include/CaberNet/dataset.h @@ -93,7 +93,7 @@ class Dataset { targets_.clear(); } - std::size_t lenght() { + std::size_t length() { return features_.size(); } diff --git a/include/CaberNet/layers.h b/include/CaberNet/layers.h index 164389f..789b794 100644 --- a/include/CaberNet/layers.h +++ b/include/CaberNet/layers.h @@ -9,7 +9,6 @@ namespace internal { class Tensor; - class Optimizer; }; namespace net::layer { @@ -23,7 +22,7 @@ class Linear : public Model { initializer distribution = initializer::He ); Tensor forward(Tensor x); - void set_optimizer(internal::Optimizer* optimizer); + void set_optimizer(std::shared_ptr optimizer); private: Tensor weight_; @@ -33,21 +32,21 @@ class Linear : public Model { struct ReLU : public Model { ReLU() = default; Tensor forward(Tensor input); - void set_optimizer(internal::Optimizer* optimizer) { return; } + void set_optimizer(std::shared_ptr optimizer) { return; } }; struct Softmax : public Model { int axis; Softmax(int axis); Tensor forward(Tensor input); - void set_optimizer(internal::Optimizer* optimizer) { return; } + void set_optimizer(std::shared_ptr optimizer) { return; } }; struct LogSoftmax : public Model { int axis; LogSoftmax(int axis); Tensor forward(Tensor input); - void set_optimizer(internal::Optimizer* optimizer) { return; } + void set_optimizer(std::shared_ptr optimizer) { return; } }; class Sequence : public Model { @@ -71,8 +70,7 @@ class Sequence : public Model { } return input; } - - void set_optimizer(internal::Optimizer* optimizer) { + void set_optimizer(std::shared_ptr optimizer) { for (auto& layer : layers_) { std::cout << "visited" << std::endl; std::visit([optimizer](auto&& argument) { argument.set_optimizer(optimizer); }, layer); diff --git a/include/CaberNet/model.h b/include/CaberNet/model.h index 7ae0ea2..d65e7d3 100644 --- a/include/CaberNet/model.h +++ b/include/CaberNet/model.h @@ -6,18 +6,10 @@ #include "tensor.h" #include "optimizers.h" -namespace internal { - class Optimizer; -} - namespace net { template class Model { - using optimizer_variant = std::variant< - optimizer::SGD - >; - public: using size_type = std::size_t; using shape_type = std::vector; @@ -26,24 +18,19 @@ class Model { return static_cast(this)->forward(input); } - void configure_optimizer(optimizer_variant instance) { - optimizer_ = std::visit([](auto&& argument) { return argument.get(); }, instance); - static_cast(this)->set_optimizer(optimizer_); - } - - void set_optimizer(internal::Optimizer* optimizer) { + void configure_optimizer(std::shared_ptr optimizer) { static_cast(this)->set_optimizer(optimizer); + optimizer_ = optimizer; } - internal::Optimizer* optimizer() const { - return optimizer_; - } + private: + std::shared_ptr optimizer_ = std::make_shared(); + protected: Model() = default; - - private: - internal::Optimizer* optimizer_; - + Model(std::shared_ptr optimizer) : optimizer_(optimizer) { + static_cast(this)->set_optimizer(optimizer); + } }; } // namespace net diff --git a/include/CaberNet/optimizers.h b/include/CaberNet/optimizers.h index e3789e9..1d280bf 100644 --- a/include/CaberNet/optimizers.h +++ b/include/CaberNet/optimizers.h @@ -4,39 +4,56 @@ #include #include -#include "tensor.h" -namespace internal { - -class Tensor; -class Optimizer; - -} // namespace internal +namespace internal { class Tensor; } namespace net::base { -class Optimizer { +struct Optimizer { + virtual ~Optimizer() = default; + virtual void add_parameter(internal::Tensor* parameter) = 0; + virtual void step() = 0; +}; + +template +class Optimize : public Optimizer { public: - ~Optimizer(); - void add_parameter(internal::Tensor* parameter); - void step(); + ~Optimize() override = default; - internal::Optimizer* get() const; + void add_parameter(internal::Tensor* parameter) override final { + parameters_.push_back(parameter); + } - protected: - Optimizer() = default; - std::shared_ptr optimizer_ = nullptr; + void step() override final { + for(internal::Tensor* parameter : parameters_) { + static_cast(this)->update(parameter); + } + } + + private: + std::vector parameters_; }; } namespace net::optimizer { -class SGD : public base::Optimizer { - public: - SGD() = default; - ~SGD(); - SGD(float learning_rate); +class NoOptimization : public base::Optimize { + public: + ~NoOptimization() = default; + void update(internal::Tensor* parameter) {return;} +}; + + +class SGD : public base::Optimize { + public: + SGD(float learning_rate): learning_rate_{learning_rate} {} + ~SGD() = default; + + void update(internal::Tensor* parameter); + + protected: + const float learning_rate_; }; } // namespace net::optimizer \ No newline at end of file diff --git a/src/internals/optimizers/internal_optimizer_sgd.cpp b/src/internals/optimizers/internal_optimizer_sgd.cpp deleted file mode 100644 index 0ff29df..0000000 --- a/src/internals/optimizers/internal_optimizer_sgd.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include "../config.h" -#include "../internal_tensor.hpp" -#include "internal_optimizers.hpp" - -#if defined(USE_EIGEN_BACKEND) - -namespace internal { - -SGD::SGD(float learning_rate) { - learning_rate_ = learning_rate; -} - -void SGD::step() { - for (auto parameter : parameters_) { - Eigen::Map> parameter_map(parameter->data(), parameter->size()); - Eigen::Map> parameter_gradient_map(parameter->gradient()->data(), parameter->size()); - parameter_map -= learning_rate_ * parameter_gradient_map; - parameter_gradient_map = 0; - } -} - -} - -#endif \ No newline at end of file diff --git a/src/internals/optimizers/internal_optimizers.hpp b/src/internals/optimizers/internal_optimizers.hpp deleted file mode 100644 index b846c47..0000000 --- a/src/internals/optimizers/internal_optimizers.hpp +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef INTERNAL_OPTIMIZERS_HPP -#define INTERNAL_OPTIMIZERS_HPP - -#include -#include - -namespace internal { - -class Tensor; - -class Optimizer { - public: - Optimizer() = default; - virtual ~Optimizer() = default; - virtual void add_parameter(Tensor* parameter) = 0; - virtual void step() = 0; -}; - -class SGD : public Optimizer { - public: - SGD() = default; - SGD(float learning_rate); - ~SGD() final = default; - - void add_parameter(Tensor* parameter) final { - parameters_.push_back(parameter); - } - - void step() final; - - private: - std::vector parameters_; - float learning_rate_; -}; - -} - -#endif \ No newline at end of file diff --git a/src/layers.cpp b/src/layers.cpp index 604b257..f9864e1 100644 --- a/src/layers.cpp +++ b/src/layers.cpp @@ -1,7 +1,6 @@ #include "CaberNet/tensor.h" #include "CaberNet/layers.h" -#include "internals/optimizers/internal_optimizers.hpp" #include "internals/functions/internal_functions.hpp" @@ -23,7 +22,7 @@ LogSoftmax::LogSoftmax(int axis) : axis(axis) {} /// settings -void Linear::set_optimizer(internal::Optimizer* optimizer) { +void Linear::set_optimizer(std::shared_ptr optimizer) { optimizer->add_parameter(weight_.internal()); optimizer->add_parameter(bias_.internal()); } diff --git a/src/optimizers.cpp b/src/optimizers.cpp index c6f4bfb..ebd95a1 100644 --- a/src/optimizers.cpp +++ b/src/optimizers.cpp @@ -1,31 +1,20 @@ #include "CaberNet/optimizers.h" -#include "internals/optimizers/internal_optimizers.hpp" -namespace net::base { +#include "internals/config.h" +#include "internals/internal_tensor.hpp" -Optimizer::~Optimizer() {} -internal::Optimizer* Optimizer::get() const { - return optimizer_.get(); -} - -void Optimizer::add_parameter(internal::Tensor* parameter) { - optimizer_->add_parameter(parameter); - std::cout << "Parameter added" << std::endl; -} - -void Optimizer::step() { - if(optimizer_) optimizer_->step(); -} - -} +#if defined(USE_EIGEN_BACKEND) namespace net::optimizer { -SGD::~SGD() {} - -SGD::SGD(float learning_rate) { - optimizer_ = std::make_shared(learning_rate); +void SGD::update(internal::Tensor* parameter) { + Eigen::Map> parameter_map(parameter->data(), parameter->size()); + Eigen::Map> parameter_gradient_map(parameter->gradient()->data(), parameter->size()); + parameter_map -= learning_rate_ * parameter_gradient_map; + parameter_gradient_map = 0; } } + +#endif