aymara · kleag · May 2, 2023
diff --git a/deeplima/include/deeplima/eigen_wrp/bilstm_and_dense.h b/deeplima/include/deeplima/eigen_wrp/bilstm_and_dense.h
@@ -23,6 +23,25 @@ struct params_bilstm_dense_argmax_t : public param_base_t
   std::vector<params_linear_t<M, V>> linear;
 };
 
+/**
+ * Fusion of several torch modules implemented for inference in Eigen
+ *
+ * Precomputed inputs. The x of each gate is fasttext embeddings. They can be computed during model loading. This
+ * is done in precompute_inputs. This takes a very long time. This can be done in the first layer only
+ *
+ * forward_pass is different for fixed point and non-fixed point
+ *
+ * The implementation with factorization is less efficient while it should be more.
+ *
+ * In linear layer on top of RNN outputs.
+ * - Could bequicker in fixed point than in floating point but not sure. Must be tested
+ * - For pos tagging, could avoid to compute everything because it is very sparse but the code must stay generic and
+ *    cannot use information specific to morphologic model.
+ *
+ * In fixed, only last step is in fixed because some necessary functions (tanh…) are not implemented in fixed but
+ * quicker nevertheless
+ *
+ */
 template<class M, class V, class T>
 class Op_BiLSTM_Dense_ArgMax : public Op_Base
 {

diff --git a/deeplima/include/deeplima/nets/birnn_seq_cls.h b/deeplima/include/deeplima/nets/birnn_seq_cls.h
@@ -27,6 +27,17 @@ std::ostream& operator<< (std::ostream& out, const std::vector<T>& v) {
   return out;
 }
 
+/**
+ * Handles multithreading
+ *
+ * buffers from elsewhere are called slots here
+ *
+ * Starts and stops threads.
+ * Each classifier has its own threadpool
+ * Each thread has its own stack so on machines with low memory, could be a problem.
+ *
+ *
+ */
 template <class Model, class InputVectorizer/*=TorchMatrix<int64_t>*/, class Out>
 class RnnSequenceClassifier : public InputVectorizer,
                               public ThreadPool< RnnSequenceClassifier<Model, InputVectorizer, Out> >,

diff --git a/deeplima/include/deeplima/segmentation.h b/deeplima/include/deeplima/segmentation.h
@@ -15,6 +15,13 @@
 
 #include "nets/birnn_seq_cls.h"
 
+/**
+ * This is the only inference task implemented both with torch and eigen, even if the structure exist for others.
+ * But the eigen implementation is the only one used in reality and it is way more efficient. So, the torch
+ * implementation could be removed later.
+ *
+ * CharNgramEncoder is not well suited for transformers.
+ */
 #if DEEPLIMA_INFERENCE_ENGINE == IE_TORCH
 
 #include "torch_wrp/dict_vectorizer.h"

diff --git a/deeplima/include/deeplima/token_sequence_analyzer.h b/deeplima/include/deeplima/token_sequence_analyzer.h
@@ -18,6 +18,18 @@
 namespace deeplima
 {
 
+
+/**
+ * Handle multithread processing of token sequence.
+ *
+ * Uses several buffers. There is no dependency between buffers because we compute a little bit more than strictly
+ * necessary to serve as the context. After computing, not all the buffer is used as output.
+ *
+ * Loads all necessary models
+ *
+ * Precompute should be separated from inference during conversion of torch model to ours. But currently, the torch
+ * model is loaded during initialization
+ */
 template <class Matrix=eigen_wrp::EigenMatrixXf>
 class TokenSequenceAnalyzer
 {

diff --git a/deeplima/libs/static_graph/static_graph.h b/deeplima/libs/static_graph/static_graph.h
@@ -20,6 +20,11 @@ namespace deeplima
 namespace nets
 {
 
+/**
+ * Implementation for Torch of the Tensorflow execution graph
+ * Used in training only. Only loaded in inference
+ * In pytorch, the graph is implied from the computing and then saved. This is not implemented in libtorch.
+ */
 class StaticGraphImpl : public torch::nn::Module
 {
   struct step_descr_t