diff --git a/deeplima/apps/deeplima.cpp b/deeplima/apps/deeplima.cpp
index 818bcb58e..7fa99bc0d 100644
--- a/deeplima/apps/deeplima.cpp
+++ b/deeplima/apps/deeplima.cpp
@@ -48,8 +48,8 @@ using namespace deeplima;
 class file_parser
 {
 public:
-std::shared_ptr<segmentation::ISegmentation> psegm = nullptr;
-std::shared_ptr< ITokenSequenceAnalyzer > panalyzer = nullptr;
+std::shared_ptr<segmentation::ISegmentation> psegm = nullptr; // tokenizer
+std::shared_ptr< ITokenSequenceAnalyzer > panalyzer = nullptr; // tagger
 std::shared_ptr< dumper::AbstractDumper > pdumper_segm_only = nullptr; // used when using segmentation only
 std::shared_ptr< dumper::DumperBase > pdumper_complete = nullptr; // used when using tagger
 std::shared_ptr<DependencyParser> parser = nullptr;
@@ -241,6 +241,8 @@ void init(const std::map<std::string, std::string>& models_fn,
       });
     }
 
+    // NOTE Commented out because psegm is now instantiated for each file in
+    // parse_file. This is a temporary solution while reusing it fails.
     // psegm->register_handler([panalyzer]
     //                         (const std::vector<segmentation::token_pos>& tokens,
     //                          uint32_t len)
@@ -289,7 +291,8 @@ void parse_file(std::istream& input,
     }
     catch (std::runtime_error& e)
     {
-      std::cerr << "In parse_file: failed to load model file " << models_fn.find("tok")->second << ": "
+      std::cerr << "In parse_file: failed to load model file "
+                << models_fn.find("tok")->second << ": "
                 << e.what() << std::endl;
       throw;
     }
@@ -340,7 +343,7 @@ void parse_file(std::istream& input,
     // std::cerr << "Waiting for PoS tagger to stop. Calling panalyzer->finalize" << std::endl;
     panalyzer->finalize();
     pdumper_complete->flush();
-    std::cerr << "Analyzer stopped. panalyzer->finalize returned" << std::endl;
+    // std::cerr << "Analyzer stopped. panalyzer->finalize returned" << std::endl;
   }
 
   if (parser)
@@ -356,10 +359,14 @@ void parse_file(std::istream& input,
 
   uint64_t token_counter = 0;
   if(nullptr != pdumper_segm_only)
+  {
     token_counter = pdumper_segm_only->get_token_counter();
+    pdumper_segm_only->reset();
+  }
   else if (nullptr != pdumper_complete)
   {
     token_counter = pdumper_complete->get_token_counter();
+    pdumper_complete->reset();
   }
   else
   {
diff --git a/deeplima/include/deeplima/dependency_parser.h b/deeplima/include/deeplima/dependency_parser.h
index a20725de7..b17b596a3 100644
--- a/deeplima/include/deeplima/dependency_parser.h
+++ b/deeplima/include/deeplima/dependency_parser.h
@@ -16,6 +16,7 @@
 #include "utils/str_index.h"
 #include "helpers/path_resolver.h"
 #include "deeplima/graph_dp.h"
+#include "deeplima/token_type.h"
 // #include "graph_dp/impl/graph_dp_impl.h"
 #include "segmentation/impl/segmentation_decoder.h"
 #include "token_sequence_analyzer.h"
@@ -76,7 +77,7 @@ class DependencyParser
         m_ptoken(nullptr)
     { }
 
-    inline typename tokens_with_analysis_t::token_t::token_flags_t flags() const
+    inline token_flags_t flags() const
     {
       assert(nullptr != m_ptoken);
       return m_ptoken->m_flags;
@@ -85,7 +86,7 @@ class DependencyParser
     inline bool eos() const
     {
       assert(nullptr != m_ptoken);
-      return flags() & DependencyParser::tokens_with_analysis_t::token_t::token_flags_t::sentence_brk;
+      return flags() & token_flags_t::sentence_brk;
     }
 
     inline uint32_t cls(size_t idx) const
@@ -151,7 +152,7 @@ class DependencyParser
       return m_current >= m_end;
     }
 
-    inline impl::token_t::token_flags_t flags() const
+    inline token_flags_t flags() const
     {
       assert(! end());
       return m_buffer[m_current].m_flags;
@@ -235,8 +236,9 @@ class DependencyParser
     : m_buffer_size(buffer_size),
       m_current_buffer(0),
       m_current_timepoint(0),
-      m_stridx_ptr(stridx)//,
-      // m_stridx(*stridx)
+      m_stridx_ptr(stridx),
+      // m_stridx(*stridx),
+      m_impl()
   {
     assert(m_buffer_size > 0);
     assert(num_buffers > 0);
@@ -307,9 +309,10 @@ class DependencyParser
     }
   }
 
+  // Apply the model to the sequence of tokens given by iter from the tagger
   void operator()(TokenSequenceAnalyzer<>::TokenIterator& iter)
   {
-    // std::cerr << "DependencyParser::operator()" << std::endl;
+    // std::cerr << "DependencyParser::operator(TokenSequenceAnalyzer<>::TokenIterator& iter)" << std::endl;
     if (m_current_timepoint >= m_buffer_size)
     {
       acquire_buffer();
@@ -334,7 +337,7 @@ class DependencyParser
         token.m_len = 0;
         token.m_form_idx = m_stridx_ptr->get_idx("<ROOT>");
         // std::cerr << "<ROOT>" << std::endl;
-        token.m_flags = impl::token_t::token_flags_t(segmentation::token_pos::flag_t::none);
+        token.m_flags = token_flags_t::none;
         token.m_lemm_idx = token.m_form_idx;
         insert_root = false;
         tokens_to_process--;
@@ -360,8 +363,8 @@ class DependencyParser
           token.m_classes[i] = iter.token_class(i);
         }
 
-        if (iter.flags() & segmentation::token_pos::flag_t::sentence_brk ||
-            iter.flags() & segmentation::token_pos::flag_t::paragraph_brk)
+        if (iter.flags() & token_flags_t::sentence_brk ||
+            iter.flags() & token_flags_t::paragraph_brk)
         {
           insert_root = true;
         }
@@ -480,8 +483,8 @@ class DependencyParser
       //           << "; m_buffer_size=" << m_buffer_size
       //           << "; token=" << iter.form() << std::endl;
 
-      if (iter.flags() & segmentation::token_pos::flag_t::sentence_brk ||
-          iter.flags() & segmentation::token_pos::flag_t::paragraph_brk)
+      if (iter.flags() & token_flags_t::sentence_brk ||
+          iter.flags() & token_flags_t::paragraph_brk)
       {
         break;
         // lengths.push_back(this_sentence_tokens);
@@ -554,16 +557,16 @@ class GraphDpImpl: public deeplima::graph_dp::impl::GraphDependencyParser
       m_curr_buff_idx(0)
   {}
 
-  GraphDpImpl(
-      size_t threads,
-      size_t buffer_size_per_thread
-    )
-    : deeplima::graph_dp::impl::GraphDependencyParser(
-        0 /* TODO: FIX ME */, 4, threads * 2, buffer_size_per_thread, threads),
-      m_fastText(std::make_shared<FastTextVectorizer<eigen_wrp::EigenMatrixXf::matrix_t, Eigen::Index>>()),
-      m_current_timepoint(deeplima::graph_dp::impl::GraphDependencyParser::get_start_timepoint())
-  {
-  }
+  // GraphDpImpl(
+  //     size_t threads,
+  //     size_t buffer_size_per_thread
+  //   )
+  //   : deeplima::graph_dp::impl::GraphDependencyParser(
+  //       0 /* TODO: FIX ME */, 4, threads * 2, buffer_size_per_thread, threads),
+  //     m_fastText(std::make_shared<FastTextVectorizer<eigen_wrp::EigenMatrixXf::matrix_t, Eigen::Index>>()),
+  //     m_current_timepoint(deeplima::graph_dp::impl::GraphDependencyParser::get_start_timepoint())
+  // {
+  // }
 
   std::shared_ptr<EmbdUInt64Float> convert(const EmbdStrFloat& src)
   {
diff --git a/deeplima/include/deeplima/dumper_conllu.h b/deeplima/include/deeplima/dumper_conllu.h
index 0daf4bd64..aab45a0b2 100644
--- a/deeplima/include/deeplima/dumper_conllu.h
+++ b/deeplima/include/deeplima/dumper_conllu.h
@@ -10,6 +10,8 @@
 
 // #include "deeplima/segmentation/impl/segmentation_impl.h"
 
+#include "deeplima/token_type.h"
+
 namespace deeplima
 {
 namespace dumper
@@ -158,6 +160,11 @@ class AbstractDumper
     : m_token_counter(0) { }
 
   virtual ~AbstractDumper() { }
+
+  void reset()
+  {
+    m_token_counter = 0;
+  }
 };
 
 class Horizontal : public AbstractDumper
@@ -199,8 +206,8 @@ class Horizontal : public AbstractDumper
       }
       std::cout << str << " ";
 
-      if (tokens[i].m_flags & deeplima::segmentation::token_pos::flag_t::sentence_brk ||
-          tokens[i].m_flags & deeplima::segmentation::token_pos::flag_t::paragraph_brk)
+      if (tokens[i].m_flags & token_flags_t::sentence_brk ||
+          tokens[i].m_flags & token_flags_t::paragraph_brk)
       {
         // std::cerr << "Horizontal endl" << std::endl;
         std::cout << std::endl;
@@ -265,8 +272,8 @@ class TokensToConllU : public AbstractDumper
       increment_token_counter();
 
       m_next_token_idx += 1;
-      if (tokens[i].m_flags & deeplima::segmentation::token_pos::flag_t::sentence_brk ||
-          tokens[i].m_flags & deeplima::segmentation::token_pos::flag_t::paragraph_brk)
+      if (tokens[i].m_flags & token_flags_t::sentence_brk ||
+          tokens[i].m_flags & token_flags_t::paragraph_brk)
       {
         // std::cerr << "TokensToConllU end of sentence" << std::endl;
         std::cout << std::endl;
@@ -285,6 +292,7 @@ class DumperBase
   virtual ~DumperBase() = default;
   virtual uint64_t get_token_counter() const = 0;
   virtual void flush() = 0;
+  virtual void reset() = 0;
 };
 
 template <class I>
@@ -296,6 +304,11 @@ class AnalysisToConllU : public DumperBase
   std::vector<ConllToken> m_tokens;
   uint32_t m_root;
 
+  void reset()
+  {
+    m_token_counter = 0;
+  }
+
   inline void increment_token_counter()
   {
     ++m_token_counter;
@@ -315,13 +328,14 @@ class AnalysisToConllU : public DumperBase
       m_has_feats(false),
       m_first_feature_to_print(0)
   {
+    // std::cerr << "AnalysisToConllU()" << (void*)this << std::endl;
   }
 
   virtual ~AnalysisToConllU()
   {
+    // std::cerr << "~AnalysisToConllU " << (void*)this << std::endl;
     // if (m_next_token_idx > 1)
     // {
-    // std::cerr << "on AnalysisToConllU destructor" << std::endl;
     //   std::cout << std::endl;
     // }
   }
@@ -557,8 +571,8 @@ class AnalysisToConllU : public DumperBase
       increment_token_counter();
 
       m_next_token_idx += 1;
-      if (iter.flags() & deeplima::segmentation::token_pos::flag_t::sentence_brk ||
-          iter.flags() & deeplima::segmentation::token_pos::flag_t::paragraph_brk)
+      if (iter.flags() & token_flags_t::sentence_brk ||
+          iter.flags() & token_flags_t::paragraph_brk)
       {
         // std::cerr << "AnalysisToConllU::operator() on sent/para break. m_next_token_idx="
         //           << m_next_token_idx << std::endl;
diff --git a/deeplima/include/deeplima/eigen_wrp/bilstm_and_dense.h b/deeplima/include/deeplima/eigen_wrp/bilstm_and_dense.h
index 7cd4a0988..4a124dc6f 100644
--- a/deeplima/include/deeplima/eigen_wrp/bilstm_and_dense.h
+++ b/deeplima/include/deeplima/eigen_wrp/bilstm_and_dense.h
@@ -98,10 +98,11 @@ class Op_BiLSTM_Dense_ArgMax : public Op_Base
 
     bool precompute()
     {
-      std::cerr << "fw weights size: " << bilstm.fw.weight_hh.rows() << " x " << bilstm.fw.weight_hh.cols() << std::endl;
+      // std::cerr << "fw weights size: " << bilstm.fw.weight_hh.rows()
+      //           << " x " << bilstm.fw.weight_hh.cols() << std::endl;
 
       size_t hidden_size = bilstm.fw.weight_hh.cols();
-      std::cerr << "precompute(fw.input):" << std::endl;
+      // std::cerr << "precompute(fw.input):" << std::endl;
       // /*
       mul_fw.matmul_input = bilstm.fw.weight_hh.block(0, 0, hidden_size, hidden_size).inverse().partialPivLu();
       mul_fw.matmul_forget = bilstm.fw.weight_hh.block(hidden_size, 0, hidden_size, hidden_size).inverse().partialPivLu();
@@ -110,7 +111,7 @@ class Op_BiLSTM_Dense_ArgMax : public Op_Base
       // */
 
       hidden_size = bilstm.bw.weight_hh.cols();
-      std::cerr << "precompute(bw.input):" << std::endl;
+      // std::cerr << "precompute(bw.input):" << std::endl;
       // /*
       mul_bw.matmul_input = bilstm.bw.weight_hh.block(0, 0, hidden_size, hidden_size).inverse().partialPivLu();
       mul_bw.matmul_forget = bilstm.bw.weight_hh.block(hidden_size, 0, hidden_size, hidden_size).inverse().partialPivLu();
@@ -118,7 +119,7 @@ class Op_BiLSTM_Dense_ArgMax : public Op_Base
       mul_bw.matmul_output = bilstm.bw.weight_hh.block(hidden_size*3, 0, hidden_size, hidden_size).inverse().partialPivLu();
       // */
 
-      std::cerr << "end of precomputing" << std::endl;
+      // std::cerr << "end of precomputing" << std::endl;
       return true;
     }
 #else
@@ -142,18 +143,18 @@ class Op_BiLSTM_Dense_ArgMax : public Op_Base
     {
       if constexpr (std::is_integral_v<AuxScalar> && std::is_signed_v<AuxScalar>)
       {
-        std::cerr << "Converting hh to fixed_point" << std::endl;
-        std::cerr << "min(fw_weight_hh) = " << bilstm.fw.weight_hh.minCoeff() << " "
-                  << "max(fw_weight_hh) = " << bilstm.fw.weight_hh.maxCoeff() << std::endl;
+        // std::cerr << "Converting hh to fixed_point" << std::endl;
+        // std::cerr << "min(fw_weight_hh) = " << bilstm.fw.weight_hh.minCoeff() << " "
+        //           << "max(fw_weight_hh) = " << bilstm.fw.weight_hh.maxCoeff() << std::endl;
         convert_matrix(bilstm.fw.weight_hh, weight_fw_hh_fixed_point);
-        std::cerr << "min(fw_weight_hh) = " << static_cast<T>(weight_fw_hh_fixed_point.minCoeff()) / WEIGHT_FRACTION_MULT << " "
-                  << "max(fw_weight_hh) = " << static_cast<T>(weight_fw_hh_fixed_point.maxCoeff()) / WEIGHT_FRACTION_MULT << std::endl;
+        // std::cerr << "min(fw_weight_hh) = " << static_cast<T>(weight_fw_hh_fixed_point.minCoeff()) / WEIGHT_FRACTION_MULT << " "
+        //           << "max(fw_weight_hh) = " << static_cast<T>(weight_fw_hh_fixed_point.maxCoeff()) / WEIGHT_FRACTION_MULT << std::endl;
 
-        std::cerr << "min(bw_weight_hh) = " << bilstm.bw.weight_hh.minCoeff() << " "
-                  << "max(bw_weight_hh) = " << bilstm.bw.weight_hh.maxCoeff() << std::endl;
+        // std::cerr << "min(bw_weight_hh) = " << bilstm.bw.weight_hh.minCoeff() << " "
+        //           << "max(bw_weight_hh) = " << bilstm.bw.weight_hh.maxCoeff() << std::endl;
         convert_matrix(bilstm.bw.weight_hh, weight_bw_hh_fixed_point);
-        std::cerr << "min(bw_weight_hh) = " << static_cast<T>(weight_bw_hh_fixed_point.minCoeff()) / WEIGHT_FRACTION_MULT << " "
-                  << "max(bw_weight_hh) = " << static_cast<T>(weight_bw_hh_fixed_point.maxCoeff()) / WEIGHT_FRACTION_MULT << std::endl;
+        // std::cerr << "min(bw_weight_hh) = " << static_cast<T>(weight_bw_hh_fixed_point.minCoeff()) / /*WEIGHT_FRACTION_MULT << " "
+                  // << "max(bw_weight_hh) = " << static_cast<T>(weight_bw_hh_fixed_point.maxCoeff()) / WEIGHT_FRACTION_MULT << std::endl;*/
       }
 
       return true;
diff --git a/deeplima/include/deeplima/eigen_wrp/lstm_beam_decoder.h b/deeplima/include/deeplima/eigen_wrp/lstm_beam_decoder.h
index a07eeb463..5ab5d1fa6 100644
--- a/deeplima/include/deeplima/eigen_wrp/lstm_beam_decoder.h
+++ b/deeplima/include/deeplima/eigen_wrp/lstm_beam_decoder.h
@@ -168,11 +168,11 @@ class Op_LSTM_Beam_Decoder : public Op_Base
     decoding_step++;
 
     M& states_c = wb->states_c;
-    if (states_c.cols() != beam_size)
+    if ((size_t)states_c.cols() != beam_size)
       states_c = M::Zero(hidden_size, beam_size);
     for (size_t i = 0; i < beam_size; ++i) states_c.col(i) = c;
     M& states_h = wb->states_h;
-    if (states_h.cols() != beam_size)
+    if ((size_t)states_h.cols() != beam_size)
       states_h = M::Zero(hidden_size, beam_size);
     for (size_t i = 0; i < beam_size; ++i) states_h.col(i) = h;
 
diff --git a/deeplima/include/deeplima/eigen_wrp/word_seq_embd_vectorizer.h b/deeplima/include/deeplima/eigen_wrp/word_seq_embd_vectorizer.h
index 57a04842d..49266330b 100644
--- a/deeplima/include/deeplima/eigen_wrp/word_seq_embd_vectorizer.h
+++ b/deeplima/include/deeplima/eigen_wrp/word_seq_embd_vectorizer.h
@@ -532,7 +532,7 @@ class WordSeqEmbdVectorizerWithPrecomputing
       m_pModel->precompute_inputs(input, Parent::m_precomputed_vectors[Parent::m_curr_bucket_id], 0);
       Parent::m_curr_bucket_id++;
 
-      if (Parent::m_curr_bucket_id >= Parent::m_precomputed_vectors.size())
+      if ((size_t)Parent::m_curr_bucket_id >= Parent::m_precomputed_vectors.size())
       {
         Parent::m_precomputed_vectors.resize(Parent::m_curr_bucket_id + 1);
       }
diff --git a/deeplima/include/deeplima/ner.h b/deeplima/include/deeplima/ner.h
index b7d21a43e..d6357c49d 100644
--- a/deeplima/include/deeplima/ner.h
+++ b/deeplima/include/deeplima/ner.h
@@ -120,9 +120,31 @@ namespace impl
 #error Unknown inference engine
 #endif
 
+  /**
+   * A kind of RnnSequenceClassifier, used for named entities tagging (?), but
+   * also the parent of TaggingImpl, used as member in TokenSequenceAnalyzer
+   */
   template <typename AuxScalar=float>
   class EntityTaggingClassifier: public RnnSequenceClassifier<Model<AuxScalar>, BaseMatrix, uint8_t>
-  {};
+  {
+  public:
+    EntityTaggingClassifier() :
+        RnnSequenceClassifier<Model<AuxScalar>, BaseMatrix, uint8_t>()
+    {
+    }
+
+    // EntityTaggingClassifier(uint32_t max_feat,
+    //                         uint32_t overlap,
+    //                         uint32_t num_slots,
+    //                         uint32_t slot_len,
+    //                         uint32_t num_threads) :
+    //     RnnSequenceClassifier<Model<AuxScalar>, BaseMatrix, uint8_t>(
+    //       max_feat, overlap, num_slots, slot_len, num_threads)
+    // {
+    // }
+
+    virtual ~EntityTaggingClassifier() = default;
+  };
 
 } // namespace impl
 
diff --git a/deeplima/include/deeplima/nets/birnn_seq_cls.h b/deeplima/include/deeplima/nets/birnn_seq_cls.h
index 1ad15891b..18d081fed 100644
--- a/deeplima/include/deeplima/nets/birnn_seq_cls.h
+++ b/deeplima/include/deeplima/nets/birnn_seq_cls.h
@@ -28,14 +28,52 @@ std::ostream& operator<< (std::ostream& out, const std::vector<T>& v) {
   return out;
 }
 
+/**
+ * The RnnSequenceClassifier is a Model, able to infer but also a thread pool
+ * to dispatch the work between several threads. And also a vectorizer, here
+ * a matrix.
+ */
 template <class Model, class InputVectorizer/*=TorchMatrix<int64_t>*/, class Out>
 class RnnSequenceClassifier : public InputVectorizer,
                               public ThreadPool< RnnSequenceClassifier<Model, InputVectorizer, Out> >,
                               public Model
 {
+public:
+  RnnSequenceClassifier()
+    : m_overlap(0),
+      m_num_slots(0),
+      m_slot_len(0),
+      m_slots(),
+      m_lengths(),
+      m_output(std::make_shared< StdMatrix<Out> >())
+  {}
+
+  // RnnSequenceClassifier(uint32_t max_feat,
+  //           uint32_t overlap,
+  //           uint32_t num_slots,
+  //           uint32_t slot_len,
+  //           uint32_t num_threads)
+  //   : m_overlap(0),
+  //     m_num_slots(0),
+  //     m_slot_len(0),
+  //     m_slots(),
+  //     m_lengths(),
+  //     m_output(std::make_shared< StdMatrix<Out> >())
+  // {
+  //   init(max_feat, overlap, num_slots, slot_len, num_threads);
+  // }
+
+  virtual ~RnnSequenceClassifier()
+  {
+    // std::cerr << "-> ~RnnSequenceClassifier" << std::endl;
+    RnnSequenceClassifierThreadPool::stop();
+    // std::cerr << "<- ~RnnSequenceClassifier" << std::endl;
+  }
+
+protected:
   typedef RnnSequenceClassifier<Model, InputVectorizer, Out> ThisClass;
-  typedef ThreadPool< RnnSequenceClassifier<Model, InputVectorizer, Out> > ThreadPoolParent;
-  friend ThreadPoolParent;
+  typedef ThreadPool< ThisClass > RnnSequenceClassifierThreadPool;
+  friend RnnSequenceClassifierThreadPool;
 
   enum slot_flags_t : uint8_t
   {
@@ -94,18 +132,23 @@ class RnnSequenceClassifier : public InputVectorizer,
         m_next(nullptr),
         m_lengths(s.m_lengths)
     { }
+    ~slot_t() = default;
+    slot_t& operator=(const slot_t&s)
+    {
+      m_input_begin = s.m_input_begin;
+      m_input_end = s.m_input_end;
+      m_output_begin = s.m_output_begin;
+      m_output_end = s.m_output_end;
+      m_flags = s.m_flags;
+      m_work_started = s.m_work_started;
+      m_lock_count = 0;
+      m_prev = nullptr;
+      m_next = nullptr;
+      m_lengths = s.m_lengths;
+      return *this;
+    }
   };
 
-protected:
-  uint32_t m_overlap;
-  uint32_t m_num_slots;
-  uint32_t m_slot_len;
-
-  std::vector<slot_t> m_slots;
-  std::vector<std::vector<size_t>> m_lengths;
-  std::shared_ptr< StdMatrix<Out> > m_output; // external - classifier id, internal - time position
-
-
   inline int32_t prev_slot(uint32_t idx)
   {
     assert(idx < m_num_slots);
@@ -129,6 +172,7 @@ class RnnSequenceClassifier : public InputVectorizer,
     }
   }
 
+  /** Push the slot @ref idx in the thread pool for starting the job on it. */
   inline void start_job_impl(uint32_t idx)
   {
     assert(idx < m_num_slots);
@@ -142,7 +186,7 @@ class RnnSequenceClassifier : public InputVectorizer,
     if (! slot.m_work_started)
     {
       slot.m_work_started = true;
-      ThreadPoolParent::push(&slot);
+      RnnSequenceClassifierThreadPool::push(&slot);
     }
   }
 
@@ -178,8 +222,8 @@ class RnnSequenceClassifier : public InputVectorizer,
     //           << "; flags= " << int(slot.m_flags)
     //           << "; prev=" << (void*)slot.m_prev
     //           << "; next=" << (void*)slot.m_next
-    //           // << "; output=" << (*(this_ptr->m_output))[0]
-    //           << std::endl;
+              // << "; output=" << (*(this_ptr->m_output))[0]
+              // << std::endl;
     // this_ptr->pretty_print();
 
     assert(slot.m_lock_count > 0);
@@ -210,37 +254,17 @@ class RnnSequenceClassifier : public InputVectorizer,
     return m_output;
   }
 
-  RnnSequenceClassifier()
-    : m_overlap(0),
-      m_num_slots(0),
-      m_slot_len(0),
-      m_slots(),
-      m_lengths(),
-      m_output(std::make_shared< StdMatrix<Out> >())
-  {}
-
-  RnnSequenceClassifier(uint32_t max_feat,
-            uint32_t overlap,
-            uint32_t num_slots,
-            uint32_t slot_len,
-            uint32_t num_threads)
-    : m_overlap(0),
-      m_num_slots(0),
-      m_slot_len(0),
-      m_slots(),
-      m_lengths(),
-      m_output(std::make_shared< StdMatrix<Out> >())
-  {
-    init(max_feat, overlap, num_slots, slot_len, num_threads);
-  }
-
   /**
    * Need to be called to be able to reuse this classifier on several sequences
    */
-  void reset()
+  virtual void reset()
   {
+    // std::cerr << "RnnSequenceClassifier::reset()" << (void*)this << std::endl;
+    m_slots.clear();
+    m_slots.resize(m_num_slots);
     for (size_t i = 0; i < m_num_slots; i++)
     {
+      m_slots[i] = slot_t();
       slot_t& slot = m_slots[i];
 
       slot.m_output_begin = m_overlap + i * m_slot_len;
@@ -273,11 +297,10 @@ class RnnSequenceClassifier : public InputVectorizer,
       //           << " output begin=" << slot.m_output_begin << ", end=" << slot.m_output_end
       //           << std::endl;
     }
-
-
   }
 
-  void init(uint32_t max_feat,
+
+  virtual void init(uint32_t max_feat,
             uint32_t overlap,
             uint32_t num_slots,
             uint32_t slot_len,
@@ -288,7 +311,7 @@ class RnnSequenceClassifier : public InputVectorizer,
     // RnnSequenceClassifier::init 1024, 16, 8, 1024, 1, true
     // RnnSequenceClassifier::init 464, 0, 8, 1024, 1, false
 
-    // std::cerr << "RnnSequenceClassifier::init max_feat=" << max_feat << ", overlap=" << overlap
+    // std::cerr << "RnnSequenceClassifier::init "<<(void*)this<<" max_feat=" << max_feat << ", overlap=" << overlap
     //           << ", num_slots=" << num_slots
     //           << ", slot_len=" << slot_len
     //           << ", num_threads=" << num_threads
@@ -303,10 +326,8 @@ class RnnSequenceClassifier : public InputVectorizer,
     {
       Model::init_new_worker(m_slot_len + m_overlap * 2, precomputed_input); // skip id - all workers are identical
     }
-    ThreadPoolParent::init(num_threads);
+    RnnSequenceClassifierThreadPool::init(num_threads);
 
-    m_slots.clear();
-    m_slots.resize(m_num_slots);
     reset(); // set up slots
 
     m_lengths.resize(m_num_slots);
@@ -331,13 +352,6 @@ class RnnSequenceClassifier : public InputVectorizer,
       Model::get_classes_from_fn(fn, classes_names, classes);
   }
 
-  virtual ~RnnSequenceClassifier()
-  {
-    // std::cerr << "-> ~RnnSequenceClassifier" << std::endl;
-    ThreadPoolParent::stop();
-    // std::cerr << "<- ~RnnSequenceClassifier" << std::endl;
-  }
-
   inline uint8_t get_output(uint64_t pos, uint8_t cls)
   {
     assert(cls < m_output->size());
@@ -375,7 +389,8 @@ class RnnSequenceClassifier : public InputVectorizer,
   {
     assert(idx < m_num_slots);
     m_slots[idx].m_lock_count += v;
-    // std::cerr << "RnnSequenceClassifier::increment_lock_count by " << int(v) << " for slot " << int(idx+1)
+    // std::cerr << "RnnSequenceClassifier::increment_lock_count by " << int(v)
+    //           << " for slot " << int(idx+1)
     //           << ". it is now: " << int(m_slots[idx].m_lock_count) << std::endl;
     // pretty_print();
   }
@@ -516,7 +531,7 @@ class RnnSequenceClassifier : public InputVectorizer,
     {
       // std::cerr << "RnnSequenceClassifier::wait_for_slot in while lock_count=" << int(slot.m_lock_count) << std::endl;
       // pretty_print();
-      ThreadPoolParent::wait_for_any_job_notification([&slot]() {
+      RnnSequenceClassifierThreadPool::wait_for_any_job_notification([&slot]() {
           return 1 == slot.m_lock_count;
         }
       );
@@ -525,13 +540,23 @@ class RnnSequenceClassifier : public InputVectorizer,
 
   void pretty_print() const
   {
-    std::cerr << "SLOTS: ";
+    std::cerr << (void*)this << " " << "SLOTS: ";
     for (size_t i = 0; i < m_num_slots; i++)
     {
       std::cerr << " | " << int(m_slots[i].m_lock_count);
     }
     std::cerr << " |" << std::endl;
   }
+
+protected:
+  uint32_t m_overlap;
+  uint32_t m_num_slots;
+  uint32_t m_slot_len;
+
+  std::vector<slot_t> m_slots;
+  std::vector<std::vector<size_t>> m_lengths;
+  std::shared_ptr< StdMatrix<Out> > m_output; // external - classifier id, internal - time position
+
 };
 
 } // namespace deeplima
diff --git a/deeplima/include/deeplima/reader_conllu.h b/deeplima/include/deeplima/reader_conllu.h
index 778a3c2f8..bbe9fc74e 100644
--- a/deeplima/include/deeplima/reader_conllu.h
+++ b/deeplima/include/deeplima/reader_conllu.h
@@ -90,7 +90,7 @@ class CoNLLUReader : public FormattedReaderBase
           if (token_idx > 0)
           {
             token_pos& token = tokens[token_idx - 1];
-            token.m_flags = token_pos::flag_t(token.m_flags | token_pos::flag_t::sentence_brk);
+            token.m_flags = token_flags_t(token.m_flags | token_flags_t::sentence_brk);
           }
           continue;
         }
@@ -112,7 +112,7 @@ class CoNLLUReader : public FormattedReaderBase
               const char* p_after_eol = p_eol + 1;
               if (*p_after_eol == '\n') {
                 token_pos& token = tokens[tokens.size() - 1];
-                token.m_flags = token_pos::flag_t(token.m_flags | token_pos::flag_t::sentence_brk);
+                token.m_flags = token_flags_t(token.m_flags | token_flags_t::sentence_brk);
               }
             }
 
@@ -133,7 +133,7 @@ class CoNLLUReader : public FormattedReaderBase
           p++;
 
           token_pos& token = tokens[token_idx - 1];
-          token.m_flags = token_pos::flag_t(token.m_flags | token_pos::flag_t::sentence_brk);
+          token.m_flags = token_flags_t(token.m_flags | token_flags_t::sentence_brk);
         }
         m_callback(tokens, token_idx);
         token_idx = 0;
@@ -197,7 +197,7 @@ class CoNLLUReader : public FormattedReaderBase
 
     if (eos)
     {
-      token.m_flags = token_pos::flag_t(token.m_flags | token_pos::flag_t::sentence_brk);
+      token.m_flags = token_flags_t(token.m_flags | token_flags_t::sentence_brk);
     }
 
     return true;
diff --git a/deeplima/include/deeplima/segmentation/impl/char_ngram_encoder.h b/deeplima/include/deeplima/segmentation/impl/char_ngram_encoder.h
index c3f01ecb7..9234f9482 100644
--- a/deeplima/include/deeplima/segmentation/impl/char_ngram_encoder.h
+++ b/deeplima/include/deeplima/segmentation/impl/char_ngram_encoder.h
@@ -138,7 +138,7 @@ class CharNgramEncoder : public StreamDecoder
           = ONE_POS_MASK(StreamDecoder::bits_per_position(nd.m_type), typename StreamDecoder::buffer_t);
 
 #ifndef NDEBUG
-      std::cerr << "one_pos_mask == " << pretty_bits_to_string(one_pos_mask) << std::endl;
+      // std::cerr << "one_pos_mask == " << pretty_bits_to_string(one_pos_mask) << std::endl;
 #endif
 
       typename StreamDecoder::buffer_t mask = 0;
@@ -157,8 +157,8 @@ class CharNgramEncoder : public StreamDecoder
       m_shift[i] = StreamDecoder::bits_per_position(nd.m_type) * (m_lookahead - l);
 
 #ifndef NDEBUG
-      std::cerr << "mask  [" << i << "]    == " << pretty_bits_to_string(m_mask[i]) << std::endl;
-      std::cerr << "shift [" << i << "]    == " << (uint32_t)m_shift[i] << std::endl;
+      // std::cerr << "mask  [" << i << "]    == " << pretty_bits_to_string(m_mask[i]) << std::endl;
+      // std::cerr << "shift [" << i << "]    == " << (uint32_t)m_shift[i] << std::endl;
 #endif
     }
 
diff --git a/deeplima/include/deeplima/segmentation/impl/segmentation_decoder.h b/deeplima/include/deeplima/segmentation/impl/segmentation_decoder.h
index 9f6d416e0..7fd198b95 100644
--- a/deeplima/include/deeplima/segmentation/impl/segmentation_decoder.h
+++ b/deeplima/include/deeplima/segmentation/impl/segmentation_decoder.h
@@ -15,6 +15,7 @@
 #include <unicode/uchar.h>
 
 #include "deeplima/utils/std_matrix.h"
+#include "deeplima/token_type.h"
 
 namespace deeplima
 {
@@ -23,27 +24,19 @@ namespace segmentation
 
 struct token_pos
 {
-  enum flag_t : uint8_t
-  {
-    none = 0x00,
-    sentence_brk = 0x01,
-    paragraph_brk = 0x02,
-    max_flags
-  };
-
   uint16_t m_offset; // offset from previous token end
   uint16_t m_len;    // length of this token in bytes
   const char* m_pch;
-  flag_t m_flags;
+  token_flags_t m_flags;
 
   token_pos()
-    : m_offset(0), m_len(0), m_pch(nullptr), m_flags(none) {}
+    : m_offset(0), m_len(0), m_pch(nullptr), m_flags(token_flags_t::none) {}
 
   inline void clear()
   {
     m_offset = m_len = 0;
     m_pch = nullptr;
-    m_flags = none;
+    m_flags = token_flags_t::none;
   }
 
   inline bool empty() const
@@ -319,7 +312,7 @@ class SegmentationDecoder : public CharReader<>
 
         // TODO insert the marker for case continuing [[case_]]
         case segm_tag_t::E_EOS:
-          m_tokens[pos].m_flags = token_pos::flag_t(m_tokens[pos].m_flags | token_pos::flag_t::sentence_brk);
+          m_tokens[pos].m_flags = token_flags_t(m_tokens[pos].m_flags | token_flags_t::sentence_brk);
           [[fallthrough]];
 
         case segm_tag_t::E:
@@ -351,7 +344,7 @@ class SegmentationDecoder : public CharReader<>
             assert(0 == m_tokens[pos].m_len);
             m_tokens[pos].m_pch = *pch;
             m_tokens[pos].m_len += m_len[from];
-            m_tokens[pos].m_flags = token_pos::flag_t(m_tokens[pos].m_flags | token_pos::flag_t::sentence_brk);
+            m_tokens[pos].m_flags = token_flags_t(m_tokens[pos].m_flags | token_flags_t::sentence_brk);
             save_current_token(pos, temp_token_len, start);
           }
           break;
diff --git a/deeplima/include/deeplima/segmentation/impl/segmentation_impl.h b/deeplima/include/deeplima/segmentation/impl/segmentation_impl.h
index 54f320233..36439d95f 100644
--- a/deeplima/include/deeplima/segmentation/impl/segmentation_impl.h
+++ b/deeplima/include/deeplima/segmentation/impl/segmentation_impl.h
@@ -49,24 +49,29 @@ namespace eigen_impl
   typedef DictEmbdVectorizer<EmbdUInt64FloatHolder, EmbdUInt64Float, eigen_wrp::EigenMatrixXf> EmbdVectorizer;
 }
 
-namespace impl {
+namespace impl
+{
   using CharNgramEncoderFromUtf8 = CharNgramEncoder< Utf8Reader<> > ;
 
   using SegmentationClassifier = RnnSequenceClassifier<eigen_impl::Model, eigen_impl::EmbdVectorizer, uint8_t> ;
   using InputEncoder = CharNgramEncoderFromUtf8;
   using OutputDecoder = SegmentationDecoder;
 
+/**
+ * The implementation of the segmenter, a SegmentationClassifier, itself a
+ * RnnSequenceClassifier
+ */
 class SegmentationImpl: public ISegmentation, public SegmentationClassifier
 {
 public:
 
   SegmentationImpl();
 
-  SegmentationImpl(
-      const std::vector<ngram_descr_t>& ngram_descr,
-      size_t threads,
-      size_t buffer_size_per_thread
-    );
+  // SegmentationImpl(
+  //     const std::vector<ngram_descr_t>& ngram_descr,
+  //     size_t threads,
+  //     size_t buffer_size_per_thread
+  //   );
 
   virtual ~SegmentationImpl() = default;
 
diff --git a/deeplima/include/deeplima/tagging/impl/tagging_impl.h b/deeplima/include/deeplima/tagging/impl/tagging_impl.h
index 9f0f19ce2..1e07f6d90 100644
--- a/deeplima/include/deeplima/tagging/impl/tagging_impl.h
+++ b/deeplima/include/deeplima/tagging/impl/tagging_impl.h
@@ -40,7 +40,7 @@ class enriched_token_t
       m_ptoken(nullptr)
   { }
 
-  inline token_buffer_t<>::token_t::token_flags_t flags() const
+  inline token_flags_t flags() const
   {
     assert(nullptr != m_ptoken);
     return m_ptoken->m_flags;
@@ -49,7 +49,7 @@ class enriched_token_t
   inline bool eos() const
   {
     assert(nullptr != m_ptoken);
-    return flags() & token_buffer_t<>::token_t::token_flags_t::sentence_brk;
+    return flags() & token_flags_t::sentence_brk;
   }
 
   inline const std::string& form() const
@@ -83,6 +83,12 @@ class enriched_token_buffer_t
   }
 };
 
+
+/**
+ * Class implementing the tagger, used as member in TokenSequenceAnalyzer, the
+ * main tagger class
+ * Son of EntityTaggingClassifier (defined in ner.h), itself a RnnSequenceClassifier
+ */
 template <typename TaggingAuxScalar>
 class TaggingImpl: public EntityTaggingClassifier<TaggingAuxScalar>
 {
@@ -94,21 +100,59 @@ class TaggingImpl: public EntityTaggingClassifier<TaggingAuxScalar>
 public:
 
   TaggingImpl() :
+      Classifier(),
       m_fastText(std::make_shared<FastTextVectorizer<typename eigen_wrp::EigenMatrixXf::matrix_t, Eigen::Index>>()),
+      m_current_timepoint(Classifier::get_start_timepoint()),
       m_current_slot_timepoints(0),
-      m_current_slot_no(-1),
-      m_last_completed_slot(-1),
-      m_curr_buff_idx(0)
+      // m_current_slot_no(-1),
+      m_last_completed_slot(-1) //,
+      // m_curr_buff_idx(0)
   {}
 
-  TaggingImpl(
-      size_t threads,
-      size_t buffer_size_per_thread
-    )
-    : Classifier(0 /* TODO: FIX ME */, 4, threads * 2, buffer_size_per_thread, threads),
-      m_fastText(std::make_shared<FastTextVectorizer<typename eigen_wrp::EigenMatrixXf::matrix_t, Eigen::Index>>()),
-      m_current_timepoint(Classifier::get_start_timepoint())
+  // TaggingImpl(
+  //     size_t threads,
+  //     size_t buffer_size_per_thread
+  //   ) :
+  //     Classifier(),
+  //     m_fastText(std::make_shared<FastTextVectorizer<typename eigen_wrp::EigenMatrixXf::matrix_t, Eigen::Index>>()),
+  //     m_current_timepoint(Classifier::get_start_timepoint()),
+  //     m_current_slot_timepoints(0),
+  //     // m_current_slot_no(-1),
+  //     m_last_completed_slot(-1) //,
+  //     // m_curr_buff_idx(0)
+  // {
+  // }
+
+  virtual ~TaggingImpl()
+  {
+    // std::cerr << "~TaggingImpl" << std::endl;
+  }
+
+  virtual void init(size_t threads, size_t num_buffers,
+                    size_t buffer_size_per_thread, StringIndex& stridx)
+  {
+    m_fastText->get_words([&stridx](const std::string& word){ stridx.get_idx(word); });
+
+    m_vectorizer.init_features({
+                                 { Vectorizer::str_feature, "form", m_fastText }
+                               });
+
+    m_vectorizer.set_model(this);
+
+    Classifier::init(m_vectorizer.dim(),
+                          16, num_buffers, buffer_size_per_thread, threads,
+                          m_vectorizer.is_precomputing());
+
+    m_current_timepoint = Classifier::get_start_timepoint();
+  }
+
+  virtual void reset()
   {
+    // std::cerr << "TaggingImpl::reset" << std::endl;
+    Classifier::reset();
+    m_current_timepoint = Classifier::get_start_timepoint();
+    m_current_slot_timepoints = 0;
+    m_last_completed_slot = -1;
   }
 
   virtual void load(const std::string& fn, const PathResolver& path_resolver)
@@ -129,23 +173,6 @@ class TaggingImpl: public EntityTaggingClassifier<TaggingAuxScalar>
     m_fastText->load(fastText_fn);
   }
 
-  void init(size_t threads, size_t num_buffers, size_t buffer_size_per_thread, StringIndex& stridx)
-  {
-    m_fastText->get_words([&stridx](const std::string& word){ stridx.get_idx(word); });
-
-    m_vectorizer.init_features({
-                                 { Vectorizer::str_feature, "form", m_fastText }
-                               });
-
-    m_vectorizer.set_model(this);
-
-    Classifier::init(m_vectorizer.dim(),
-                          16, num_buffers, buffer_size_per_thread, threads,
-                          m_vectorizer.is_precomputing());
-
-    m_current_timepoint = Classifier::get_start_timepoint();
-  }
-
   void precompute_inputs(const typename Vectorizer::dataset_t& buffer)
   {
     m_vectorizer.precompute(buffer);
@@ -156,14 +183,10 @@ class TaggingImpl: public EntityTaggingClassifier<TaggingAuxScalar>
 
   virtual void register_handler(const tagging_callback_t fn)
   {
+    // std::cerr << "TaggingImpl::register_handler" << std::endl;
     m_callback = fn;
   }
 
-  virtual ~TaggingImpl()
-  {
-    // std::cerr << "~TaggingImpl" << std::endl;
-  }
-
 protected:
 
   inline void increment_timepoint(uint64_t& timepoint)
@@ -239,7 +262,7 @@ class TaggingImpl: public EntityTaggingClassifier<TaggingAuxScalar>
       while (lock_count > 1)
       {
         // Worker still uses this slot. Waiting...
-        // std::cerr << "TaggingImpl::send_all_results: waiting for slot " << slot_idx+1
+        // std::cerr << "TaggingImpl::send_all_results: Worker still uses this slot. Waiting... " << slot_idx+1
         //      << " (lock_count==" << int(lock_count) << ")\n";
         // Classifier::pretty_print();
         Classifier::wait_for_slot(slot_idx);
@@ -283,7 +306,7 @@ class TaggingImpl: public EntityTaggingClassifier<TaggingAuxScalar>
     while (lock_count > 1)
     {
       // Worker still uses this slot. Waiting...
-      // std::cerr << "tagging handle_timepoint, waiting for slot " << slot_no
+      // std::cerr << "TaggingImpl::acquire_slot tagging handle_timepoint, waiting for slot " << slot_no
       //      << " lock_count=" << int(lock_count) << std::endl;
       // Classifier::pretty_print();
       Classifier::wait_for_slot(slot_no);
@@ -296,12 +319,14 @@ class TaggingImpl: public EntityTaggingClassifier<TaggingAuxScalar>
     }
 
     Classifier::increment_lock_count(slot_no);
+    m_current_slot_timepoints = Classifier::get_slot_size();
   }
 
 public:
   virtual void handle_token_buffer(size_t slot_no, const typename Vectorizer::dataset_t& buffer, int timepoints_to_analyze = -1)
   {
-    // std::cerr << "TaggingImpl::handle_token_buffer " << slot_no << ", " << timepoints_to_analyze << std::endl;
+    // std::cerr << "TaggingImpl::handle_token_buffer " << slot_no << ", "
+    //           << timepoints_to_analyze << std::endl;
     send_results_if_available();
     acquire_slot(slot_no);
     size_t offset = slot_no * buffer.size() + Classifier::get_start_timepoint();
@@ -337,10 +362,10 @@ class TaggingImpl: public EntityTaggingClassifier<TaggingAuxScalar>
   uint64_t m_current_timepoint;
   uint32_t m_current_slot_timepoints;
 
-  int32_t m_current_slot_no;
+  // int32_t m_current_slot_no;
   int32_t m_last_completed_slot;
 
-  size_t m_curr_buff_idx;
+  // size_t m_curr_buff_idx;
 };
 
 } // namespace impl
diff --git a/deeplima/include/deeplima/token_sequence_analyzer.h b/deeplima/include/deeplima/token_sequence_analyzer.h
index e3a52e6da..02cbdcf48 100644
--- a/deeplima/include/deeplima/token_sequence_analyzer.h
+++ b/deeplima/include/deeplima/token_sequence_analyzer.h
@@ -27,6 +27,7 @@
 #include "deeplima/lemmatization/impl/lemmatization_impl.h"
 #include "deeplima/segmentation/impl/segmentation_decoder.h"
 #include "deeplima/tagging/impl/tagging_impl.h"
+#include "deeplima/token_type.h"
 
 
 template<> struct std::hash<deeplima::morph_model::morph_feats_t> {
@@ -82,7 +83,7 @@ class TokenSequenceAnalyzer : public ITokenSequenceAnalyzer
       : m_stridx(stridx), m_buffer(buffer), m_lemm_buffer(lemm_buffer), m_classes(classes),
         m_current(0), m_offset(offset), m_end(end - offset)
     {
-      assert(end > offset + 1);
+      assert(end >= offset + 1);
     }
 
     inline bool end() const
@@ -90,7 +91,7 @@ class TokenSequenceAnalyzer : public ITokenSequenceAnalyzer
       return m_current >= m_end;
     }
 
-    inline impl::token_t::token_flags_t flags() const
+    inline token_flags_t flags() const
     {
       assert(! end());
       return m_buffer[m_current].m_flags;
@@ -143,7 +144,7 @@ class TokenSequenceAnalyzer : public ITokenSequenceAnalyzer
 
     inline void reset(size_t position = 0)
     {
-      std::cerr << "TokenSequenceAnalyzer::reset" << std::endl;
+      // std::cerr << "TokenSequenceAnalyzer::reset" << std::endl;
       m_current = position;
     }
 
@@ -184,7 +185,7 @@ class TokenSequenceAnalyzer : public ITokenSequenceAnalyzer
         m_ptoken(nullptr)
     { }
 
-    inline token_buffer_t<>::token_t::token_flags_t flags() const
+    inline token_flags_t flags() const
     {
       assert(nullptr != m_ptoken);
       return m_ptoken->m_flags;
@@ -193,7 +194,7 @@ class TokenSequenceAnalyzer : public ITokenSequenceAnalyzer
     inline bool eos() const
     {
       assert(nullptr != m_ptoken);
-      return flags() & token_buffer_t<>::token_t::token_flags_t::sentence_brk;
+      return flags() & token_flags_t::sentence_brk;
     }
 
     inline const std::string& form() const
@@ -276,13 +277,14 @@ class TokenSequenceAnalyzer : public ITokenSequenceAnalyzer
       m_current_timepoint(0),
       m_stridx_ptr(std::make_shared<StringIndex>()),
       m_stridx(*m_stridx_ptr),
+      m_cls(),
       m_classes(std::make_shared<StdMatrix<uint8_t>>())
-  {
-    std::cerr << "TokenSequenceAnalyzer::TokenSequenceAnalyzer " << model_fn << ", "
-              << lemm_model_fn << ", " << lemm_dict_fn << ", "
-              << fixed_ini_fn << ", " << lower_ini_fn  << ", "
-              << fixed_lemm_fn
-              << std::endl;
+{
+    // std::cerr << "TokenSequenceAnalyzer::TokenSequenceAnalyzer " << model_fn << ", "
+    //           << lemm_model_fn << ", " << lemm_dict_fn << ", "
+    //           << fixed_ini_fn << ", " << lower_ini_fn  << ", "
+    //           << fixed_lemm_fn
+    //           << std::endl;
     assert(m_buffer_size > 0);
     assert(num_buffers > 0);
     m_buffers.resize(num_buffers);
@@ -324,7 +326,7 @@ class TokenSequenceAnalyzer : public ITokenSequenceAnalyzer
       m_cls.register_handler([this](
                              std::shared_ptr< StdMatrix<uint8_t> > classes,
                              size_t begin, size_t end, size_t slot_idx){
-        std::cerr << "handler called: " << slot_idx << std::endl;
+        // std::cerr << "handler called: " << slot_idx << std::endl;
 
         lemmatize(m_buffers[slot_idx], m_lemm_buffers[slot_idx], classes, begin, end);
 
@@ -344,7 +346,7 @@ class TokenSequenceAnalyzer : public ITokenSequenceAnalyzer
                              std::shared_ptr< StdMatrix<uint8_t> > classes,
                              size_t begin, size_t end, size_t slot_idx)
       {
-        std::cerr << "handler called: " << slot_idx << std::endl;
+        // std::cerr << "handler called: " << slot_idx << std::endl;
         m_classes = classes;
         m_output_callback(m_stridx_ptr,
                           m_buffers[slot_idx],
@@ -388,6 +390,7 @@ class TokenSequenceAnalyzer : public ITokenSequenceAnalyzer
 
   virtual void register_handler(const output_callback_t fn) override
   {
+    // std::cerr << "TokenSequenceAnalyzer::register_handler" << std::endl;
     m_output_callback = fn;
   }
 
@@ -415,6 +418,7 @@ class TokenSequenceAnalyzer : public ITokenSequenceAnalyzer
    */
   virtual void finalize() override
   {
+    // std::cerr << "TokenSequenceAnalyzer::finalize" << std::endl;
     if (m_current_timepoint > 0)
     {
       if (m_current_timepoint < m_buffer_size)
@@ -430,6 +434,8 @@ class TokenSequenceAnalyzer : public ITokenSequenceAnalyzer
     m_cls.send_all_results();
     m_current_timepoint = 0;
     m_current_buffer = 0;
+
+    m_cls.reset();
   }
 
   virtual void operator()(const std::vector<deeplima::segmentation::token_pos>& tokens, uint32_t len) override
@@ -449,7 +455,7 @@ class TokenSequenceAnalyzer : public ITokenSequenceAnalyzer
       token.m_offset = src.m_offset;
       token.m_len = src.m_len;
       token.m_form_idx = m_stridx.get_idx(src.m_pch, src.m_len);
-      token.m_flags = impl::token_t::token_flags_t(src.m_flags);
+      token.m_flags = token_flags_t(src.m_flags);
 
       m_current_timepoint++;
       if (m_current_timepoint >= m_buffer_size)
@@ -468,10 +474,10 @@ class TokenSequenceAnalyzer : public ITokenSequenceAnalyzer
 
   void acquire_buffer()
   {
-    std::cerr << "acquire_buffer" << std::endl;
+    // std::cerr << "acquire_buffer" << std::endl;
     size_t next_buffer_idx = (m_current_buffer + 1 < m_buffers.size()) ? (m_current_buffer + 1) : 0;
     const token_buffer_t<>& next_buffer = m_buffers[next_buffer_idx];
-
+//
     // wait for buffer
     while (next_buffer.locked())
     {
@@ -485,7 +491,7 @@ class TokenSequenceAnalyzer : public ITokenSequenceAnalyzer
 
   void start_analysis(size_t buffer_idx, int count = -1)
   {
-    std::cerr << "TokenSequenceAnalyzer::start_analysis " << buffer_idx << ", " << count << std::endl;
+    // std::cerr << "TokenSequenceAnalyzer::start_analysis " << buffer_idx << ", " << count << std::endl;
     assert(!m_buffers[buffer_idx].locked());
     m_buffers[buffer_idx].lock();
 
@@ -583,7 +589,7 @@ class TokenSequenceAnalyzer : public ITokenSequenceAnalyzer
 
       std::map<std::string, std::set<std::string>> feats;
       morph_model::morph_feats_t encoded_feats = mm.convert(line, feats);
-      std::cerr << "load_pos_cache add " << line << " " << encoded_feats.toBaseType() << std::endl;
+      // std::cerr << "load_pos_cache add " << line << " " << encoded_feats.toBaseType() << std::endl;
       result.insert(encoded_feats);
     }
     return result;
diff --git a/deeplima/include/deeplima/token_type.h b/deeplima/include/deeplima/token_type.h
index bea5e41a1..5e5211f99 100644
--- a/deeplima/include/deeplima/token_type.h
+++ b/deeplima/include/deeplima/token_type.h
@@ -15,17 +15,18 @@
 namespace deeplima
 {
 
+enum token_flags_t : uint8_t
+{
+  none = 0x00,
+  sentence_brk = 0x01,
+  paragraph_brk = 0x02,
+  max_flags
+};
+
 namespace impl
 {
 struct token_t
 {
-  enum token_flags_t : uint8_t
-  {
-    none = 0x00,
-    sentence_brk = 0x01,
-    paragraph_brk = 0x02,
-    max_flags
-  };
 
   inline bool eos() const
   {
diff --git a/deeplima/include/deeplima/utils/locked_buffer.h b/deeplima/include/deeplima/utils/locked_buffer.h
index 09882a2f4..35e421b9e 100644
--- a/deeplima/include/deeplima/utils/locked_buffer.h
+++ b/deeplima/include/deeplima/utils/locked_buffer.h
@@ -26,22 +26,22 @@ struct locked_buffer_t
       m_lock_count(0),
       m_char_aligned_data(nullptr)
   {
-    std::cerr << "locked_buffer_t::locked_buffer_t()"
-              << (void*)this  << std::endl;
+    // std::cerr << "locked_buffer_t::locked_buffer_t()"
+    //           << (void*)this  << std::endl;
   }
 
   ~locked_buffer_t()
   {
-    std::cerr << "locked_buffer_t::~locked_buffer_t() "
-              << (void*)this << std::endl;
+    // std::cerr << "locked_buffer_t::~locked_buffer_t() "
+    //           << (void*)this << std::endl;
     m_data = nullptr;
     m_char_aligned_data = nullptr;
   }
 
   locked_buffer_t(const locked_buffer_t& other)
   {
-    std::cerr << "locked_buffer_t::locked_buffer_t(other)"
-              << (void*)this  << std::endl;
+    // std::cerr << "locked_buffer_t::locked_buffer_t(other)"
+    //           << (void*)this  << std::endl;
     m_data = other.m_data;
     m_char_aligned_data = other.m_char_aligned_data;
     m_len = other.m_len;
@@ -66,19 +66,19 @@ struct locked_buffer_t
 
   inline void lock()
   {
-    std::cerr << "locked_buffer_t::lock " << (void*)this << " " << m_lock_count;
+    // std::cerr << "locked_buffer_t::lock " << (void*)this << " " << m_lock_count;
     m_lock_count++;
-    std::cerr << " -> " << m_lock_count << std::endl;
+    // std::cerr << " -> " << m_lock_count << std::endl;
   }
 
   inline void unlock()
   {
-    std::cerr << "locked_buffer_t::unlock " << (void*)this << " " << m_lock_count;
+    // std::cerr << "locked_buffer_t::unlock " << (void*)this << " " << m_lock_count;
     m_len = 0;
     m_char_aligned_data = nullptr;
     assert(m_lock_count > 0);
     m_lock_count--;
-    std::cerr << " -> " << m_lock_count << std::endl;
+    // std::cerr << " -> " << m_lock_count << std::endl;
   }
 
   inline void set_read_start(const char* new_start)
@@ -118,7 +118,7 @@ struct locked_buffer_set_t
 
   void init(size_t n, uint32_t buffer_size)
   {
-    std::cerr << "locked_buffer_set_t::init" << std::endl;
+    // std::cerr << "locked_buffer_set_t::init" << std::endl;
     assert(n > 0);
     assert(buffer_size > 0);
 
@@ -178,7 +178,7 @@ struct locked_buffer_set_t
 
   void pretty_print()
   {
-    std::cerr << "BUFFS: ";
+    std::cerr << (void*)this << " BUFFS: ";
     for (size_t i = 0; i < m_data.size(); i++)
     {
       std::cerr << " | " << m_data[i].m_lock_count;
diff --git a/deeplima/include/deeplima/utils/thread_pool.h b/deeplima/include/deeplima/utils/thread_pool.h
index 1c47dce5f..816848bd7 100644
--- a/deeplima/include/deeplima/utils/thread_pool.h
+++ b/deeplima/include/deeplima/utils/thread_pool.h
@@ -59,6 +59,7 @@ class ThreadPool
   void stop()
   {
     m_stop = true;
+    // push null jobs to ensure having enough joinable jobs
     for (size_t i = 0 ; i < m_workers.size(); i++)
     {
       push(nullptr);
@@ -80,7 +81,7 @@ class ThreadPool
       }
       else
       {
-        throw std::runtime_error("All workers must be joinable here.");
+        throw std::runtime_error("All workers must be unjoinable (inactive threads) here.");
       }
     }
   }
@@ -99,6 +100,9 @@ class ThreadPool
 
 protected:
 
+  /** This will wait until a job is available and then @ref job parameter will
+   * be set to this available which will be removed from the list.
+   */
   inline bool wait_for_new_job(void** job)
   {
     std::unique_lock<std::mutex> l(m_mutex);
@@ -124,6 +128,8 @@ class ThreadPool
   void thread_fn(size_t worker_id)
   {
     void* job = nullptr;
+    // loop to dispatch pushed jobs to the threads of this pool
+    // wait_for_new_job is blocking until a job becomes available
     while (true)
     {
       // std::cerr << "thread_fn " << worker_id << " main loop" << std::endl;
@@ -132,11 +138,13 @@ class ThreadPool
         // std::cerr << "wait_for_new_job is true" << std::endl;
         if (nullptr == job)
         {
+          // we should get a null job only when stopping
+          // std::cerr << "wait_for_new_job: we should get a null job only when stopping" << std::endl;
           break;
         }
-        // std::cerr << "worker: " << (void*) job << " started" << std::endl;
+        // std::cerr << "worker: running job " << (void*) job << std::endl;
         P::run_one_job(static_cast<P*>(this), worker_id, job);
-        // std::cerr << "worker: " << (void*) job << " completed" << std::endl;
+        // std::cerr << "worker: completed job " << (void*) job << std::endl;
         m_cv_notify.notify_all();
         // std::cerr << "notify_all done" << std::endl;
       }
diff --git a/deeplima/libs/tasks/segmentation/inference/segmentation_impl.cpp b/deeplima/libs/tasks/segmentation/inference/segmentation_impl.cpp
index 0329505c3..7348dd789 100644
--- a/deeplima/libs/tasks/segmentation/inference/segmentation_impl.cpp
+++ b/deeplima/libs/tasks/segmentation/inference/segmentation_impl.cpp
@@ -7,8 +7,9 @@
 
 namespace deeplima::segmentation::impl {
 
-SegmentationImpl::SegmentationImpl()
-  : m_decoder(SegmentationClassifier::get_output(), m_char_len),
+SegmentationImpl::SegmentationImpl() :
+    SegmentationClassifier(),
+    m_decoder(SegmentationClassifier::get_output(), m_char_len),
     m_current_slot_timepoints(0),
     m_current_slot_no(-1),
     m_last_completed_slot(-1),
@@ -16,20 +17,20 @@ SegmentationImpl::SegmentationImpl()
     m_curr_buff_idx(0)
 {}
 
-SegmentationImpl::SegmentationImpl(
-    const std::vector<ngram_descr_t>& ngram_descr,
-    size_t threads,
-    size_t buffer_size_per_thread
-  )
-  : SegmentationClassifier(
-      ngram_descr.size() * 2, 4, threads * 2, buffer_size_per_thread, threads),
-    m_input_encoder(ngram_descr),
-    m_decoder(SegmentationClassifier::get_output(), m_char_len),
-    m_current_timepoint(SegmentationClassifier::get_start_timepoint()),
-    m_buff_set(SegmentationClassifier::get_num_threads() * 2, SegmentationClassifier::get_slot_size() * 4)
-{
-  m_char_len.resize(SegmentationClassifier::size());
-}
+// SegmentationImpl::SegmentationImpl(
+//     const std::vector<ngram_descr_t>& ngram_descr,
+//     size_t threads,
+//     size_t buffer_size_per_thread
+//   )
+//   : SegmentationClassifier(
+//       ngram_descr.size() * 2, 4, threads * 2, buffer_size_per_thread, threads),
+//     m_input_encoder(ngram_descr),
+//     m_decoder(SegmentationClassifier::get_output(), m_char_len),
+//     m_current_timepoint(SegmentationClassifier::get_start_timepoint()),
+//     m_buff_set(SegmentationClassifier::get_num_threads() * 2, SegmentationClassifier::get_slot_size() * 4)
+// {
+//   m_char_len.resize(SegmentationClassifier::size());
+// }
 
 void SegmentationImpl::load(const std::string& fn)
 {
@@ -59,7 +60,7 @@ void SegmentationImpl::init(size_t threads, size_t buffer_size_per_thread)
 
 void SegmentationImpl::parse_from_stream(const read_callback_t fn)
 {
-  std::cerr << "SegmentationImpl::parse_from_stream" << std::endl;
+  // std::cerr << "SegmentationImpl::parse_from_stream" << std::endl;
   size_t n = 0;
   bool just_started = true;
   bool continue_reading = true;
@@ -85,13 +86,13 @@ void SegmentationImpl::parse_from_stream(const read_callback_t fn)
       break;
     }
     counter += bytes_read;
-    std::cerr << "SegmentationImpl::parse_from_stream Reading callback: "
-              << bytes_read << " bytes, continue_reading="
-              << continue_reading << " counter=" << counter << std::endl;
+    // std::cerr << "SegmentationImpl::parse_from_stream Reading callback: "
+    //           << bytes_read << " bytes, continue_reading="
+    //           << continue_reading << " counter=" << counter << std::endl;
     buff.m_char_aligned_data = (const char*)(buff.m_data);
     buff.m_len = bytes_read;
-    std::cerr << "SegmentationImpl::parse_from_stream locking (m_buff_set) buff "
-              << n << std::endl;
+    // std::cerr << "SegmentationImpl::parse_from_stream locking (m_buff_set) buff "
+    //           << n << std::endl;
     buff.lock();
 
     int32_t pos = 0;
@@ -158,8 +159,8 @@ void SegmentationImpl::parse_from_stream(const read_callback_t fn)
       send_next_results();
     }
 
-    m_buff_set.pretty_print();
-    SegmentationClassifier::pretty_print();
+    // m_buff_set.pretty_print();
+    // SegmentationClassifier::pretty_print();
 
     n = m_buff_set.next(n);
   }
@@ -276,15 +277,15 @@ void SegmentationImpl::acquire_slot()
   if (0 == m_current_slot_timepoints || m_current_slot_no < 0)
   {
     m_current_slot_no = SegmentationClassifier::get_slot_idx(m_current_timepoint);
-    // std::cerr << "SegmentationImpl::acquire_slot: got " << m_current_slot_no << " for timepoint "
-    //           << m_current_timepoint << std::endl;
+    // std::cerr << "SegmentationImpl::acquire_slot: got " << m_current_slot_no
+    //           << " for timepoint " << m_current_timepoint << std::endl;
     uint8_t lock_count = SegmentationClassifier::get_lock_count(m_current_slot_no);
 
     while (lock_count > 1)
     {
       // Worker still uses this slot. Waiting...
-      // std::cerr << "handle_timepoint, waiting for slot " << m_current_slot_no
-      //      << " lock_count=" << lock_count << std::endl;
+      // std::cerr << "SegmentationImpl::acquire_slot, waiting for slot "
+      //           << m_current_slot_no << " / " << lock_count << std::endl;
       SegmentationClassifier::wait_for_slot(m_current_slot_no);
       lock_count = SegmentationClassifier::get_lock_count(m_current_slot_no);
     }
@@ -327,8 +328,11 @@ void SegmentationImpl::no_more_data()
 
 void SegmentationImpl::finalize()
 {
-  std::cerr << "SegmentationImpl::finalize" << std::endl;
+  // std::cerr << "SegmentationImpl::finalize" << std::endl;
 
+  SegmentationClassifier::reset();
+  m_char_len.resize(SegmentationClassifier::size());
+  m_current_timepoint = SegmentationClassifier::get_start_timepoint();
   // no_more_data();
   //
   // for (size_t i = 0; i < m_buff_set.size(); i++)
diff --git a/lima_linguisticprocessing/src/linguisticProcessing/core/DeepLimaUnits/RnnNER/RnnNER.cpp b/lima_linguisticprocessing/src/linguisticProcessing/core/DeepLimaUnits/RnnNER/RnnNER.cpp
index f5f9a07ad..215dc9a3c 100644
--- a/lima_linguisticprocessing/src/linguisticProcessing/core/DeepLimaUnits/RnnNER/RnnNER.cpp
+++ b/lima_linguisticprocessing/src/linguisticProcessing/core/DeepLimaUnits/RnnNER/RnnNER.cpp
@@ -205,7 +205,8 @@ Lima::LimaStatusCode RnnNER::process(Lima::AnalysisContent &analysis) const
           token.m_offset = src->position();
           token.m_len = src->length();
           token.m_pch = v[k].c_str();
-          token.m_flags = segmentation::token_pos::flag_t(src->status().getStatus() & StatusType::T_SENTENCE_BRK);
+          token.m_flags = token_flags_t(src->status().getStatus()
+                                        & StatusType::T_SENTENCE_BRK);
       }
   }
   m_d->tagger(buffer);
diff --git a/lima_linguisticprocessing/src/linguisticProcessing/core/DeepLimaUnits/RnnTokenizer/RnnTokenizer.cpp b/lima_linguisticprocessing/src/linguisticProcessing/core/DeepLimaUnits/RnnTokenizer/RnnTokenizer.cpp
index fdbad84b1..d30ca4287 100644
--- a/lima_linguisticprocessing/src/linguisticProcessing/core/DeepLimaUnits/RnnTokenizer/RnnTokenizer.cpp
+++ b/lima_linguisticprocessing/src/linguisticProcessing/core/DeepLimaUnits/RnnTokenizer/RnnTokenizer.cpp
@@ -24,6 +24,7 @@
 #include "RnnTokenizer.h"
 
 #include "deeplima/segmentation.h"
+#include "deeplima/token_type.h"
 
 
 
@@ -60,6 +61,7 @@ CONFIGURATIONHELPER_LOGGING_INIT(TOKENIZERLOGINIT);
 
 class RnnTokenizerPrivate : public DeepTokenizerBase, public ConfigurationHelper
 {
+  friend RnnTokenizer;
 public:
   RnnTokenizerPrivate();
   virtual ~RnnTokenizerPrivate();
@@ -81,16 +83,16 @@ class RnnTokenizerPrivate : public DeepTokenizerBase, public ConfigurationHelper
   void init(GroupConfigurationStructure& unitConfiguration);
   void tokenize(const QString& text, std::vector<std::vector<TPrimitiveToken>>& sentences);
 
-  MediaId m_language;
-  FsaStringsPool* m_stringsPool;
-  LinguisticGraphVertex m_currentVx;
-  QString m_data;
-
 protected:
   void append_new_word(std::vector< TPrimitiveToken >& current_sentence,
                        const QString& current_token,
                        int current_token_offset) const;
 
+  MediaId m_language;
+  FsaStringsPool* m_stringsPool;
+  LinguisticGraphVertex m_currentVx;
+  QString m_data;
+
   size_t m_max_seq_len;
 
   std::map<QString, std::vector<QString>> m_trrules;
@@ -109,6 +111,7 @@ RnnTokenizerPrivate::RnnTokenizerPrivate() :
   m_stringsPool(nullptr),
   m_currentVx(0),
   m_ignoreEOL(false),
+  m_segm(),
   m_loaded(false)
 {
 }
@@ -337,7 +340,7 @@ void RnnTokenizerPrivate::tokenize(const QString& text, std::vector<std::vector<
       }
       append_new_word(current_sentence, QString::fromUtf8(tok.m_pch, tok.m_len), current_token_offset);
       current_token_offset += (tok.m_offset + tok.m_len);
-      if (tok.m_flags & segmentation::token_pos::flag_t::sentence_brk)
+      if (tok.m_flags & token_flags_t::sentence_brk)
       {
         sentences.push_back(current_sentence);
         current_sentence.clear();
diff --git a/lima_linguisticprocessing/src/linguisticProcessing/core/DeepLimaUnits/RnnTokensAnalyzer/RnnTokensAnalyzer.cpp b/lima_linguisticprocessing/src/linguisticProcessing/core/DeepLimaUnits/RnnTokensAnalyzer/RnnTokensAnalyzer.cpp
index 3587d4075..b8f656eb6 100644
--- a/lima_linguisticprocessing/src/linguisticProcessing/core/DeepLimaUnits/RnnTokensAnalyzer/RnnTokensAnalyzer.cpp
+++ b/lima_linguisticprocessing/src/linguisticProcessing/core/DeepLimaUnits/RnnTokensAnalyzer/RnnTokensAnalyzer.cpp
@@ -198,7 +198,8 @@ Lima::LimaStatusCode RnnTokensAnalyzer::process(Lima::AnalysisContent &analysis)
             token.m_offset = src->position();
             token.m_len = src->length();
             token.m_pch = v[k].c_str();
-            token.m_flags = segmentation::token_pos::flag_t(src->status().getStatus() & StatusType::T_SENTENCE_BRK);
+            token.m_flags = token_flags_t(src->status().getStatus()
+                                            & StatusType::T_SENTENCE_BRK);
         }
     }
     m_d->analyzer(buffer);