code formatting and refactor

oOTigger · Nov 16, 2024 · 878cff1 · 878cff1
1 parent 51c3eb7
commit 878cff1
Show file tree

Hide file tree

Showing 44 changed files with 417 additions and 276 deletions.
diff --git a/lib/kernels/include/kernels/accessor.h b/lib/kernels/include/kernels/accessor.h
@@ -11,8 +11,6 @@
 
 namespace FlexFlow {
 
-struct Allocator;
-
 class GenericTensorAccessorR {
 public:
   template <DataType DT>
@@ -42,19 +40,39 @@ class GenericTensorAccessorR {
   bool operator!=(GenericTensorAccessorR const &) const;
 
   template <DataType DT>
-  real_type_t<DT> const &at(std::vector<size_t> const &indices) const {
+  real_type_t<DT> const &at(std::vector<int> const &indices) const {
     if (this->device_type != DeviceType::CPU) {
       throw mk_runtime_error("Calling at() on non-CPU allocated tensor");
     }
     if (this->data_type != DT) {
       throw mk_runtime_error(fmt::format(
           "Invalid access data type ({} != {})", this->data_type, DT));
     }
+    if (indices.size() != this->shape.num_dims()) {
+      throw mk_runtime_error(fmt::format("Number of indices ({}) does not "
+                                         "match the number of dimensions ({}).",
+                                         indices.size(),
+                                         this->shape.num_dims()));
+    }
 
     using T = real_type_t<DT>;
-
     T const *data_ptr = static_cast<T const *>(this->ptr);
-    size_t offset = calculate_index_offset(indices);
+
+    int offset = 0;
+    int multiplier = 1;
+    for (int i = 0; i < this->shape.num_dims(); i++) {
+      if (indices.at(i) >= this->shape.at(legion_dim_t{i})) {
+        throw mk_runtime_error(
+            fmt::format("In {} dimension, attempting to access index {} "
+                        "when only {} indexes exist",
+                        i,
+                        indices.at(i),
+                        this->shape.at(legion_dim_t{i})));
+      }
+
+      offset += indices.at(i) * multiplier;
+      multiplier *= this->shape.at(legion_dim_t{i});
+    }
 
     return data_ptr[offset];
   }
@@ -71,8 +89,6 @@ class GenericTensorAccessorR {
              decltype(ptr) const &,
              decltype(device_type) const &>
       tie() const;
-
-  size_t calculate_index_offset(std::vector<size_t> const &indices) const;
 };
 
 std::string format_as(GenericTensorAccessorR const &);
@@ -109,37 +125,77 @@ class GenericTensorAccessorW {
   operator GenericTensorAccessorR() const;
 
   template <DataType DT>
-  real_type_t<DT> &at(std::vector<size_t> const &indices) {
+  real_type_t<DT> &at(std::vector<int> const &indices) {
     if (this->device_type != DeviceType::CPU) {
       throw mk_runtime_error("Calling at() on non-CPU allocated tensor");
     }
     if (this->data_type != DT) {
       throw mk_runtime_error(fmt::format(
           "Invalid access data type ({} != {})", this->data_type, DT));
     }
+    if (indices.size() != this->shape.num_dims()) {
+      throw mk_runtime_error(fmt::format("Number of indices ({}) does not "
+                                         "match the number of dimensions ({}).",
+                                         indices.size(),
+                                         this->shape.num_dims()));
+    }
 
     using T = real_type_t<DT>;
 
     T *data_ptr = static_cast<T *>(this->ptr);
-    size_t offset = calculate_index_offset(indices);
+    int offset = 0;
+    int multiplier = 1;
+    for (int i = 0; i < this->shape.num_dims(); i++) {
+      if (indices.at(i) >= this->shape.at(legion_dim_t{i})) {
+        throw mk_runtime_error(
+            fmt::format("In {} dimension, attempting to access index {} "
+                        "when only {} indexes exist",
+                        i,
+                        indices.at(i),
+                        this->shape.at(legion_dim_t{i})));
+      }
+
+      offset += indices.at(i) * multiplier;
+      multiplier *= this->shape.at(legion_dim_t{i});
+    }
 
     return data_ptr[offset];
   }
 
   template <DataType DT>
-  real_type_t<DT> &at(std::vector<size_t> const &indices) const {
+  real_type_t<DT> &at(std::vector<int> const &indices) const {
     if (this->device_type != DeviceType::CPU) {
       throw mk_runtime_error("Calling at() on non-CPU allocated tensor");
     }
     if (this->data_type != DT) {
       throw mk_runtime_error(fmt::format(
           "Invalid access data type ({} != {})", this->data_type, DT));
     }
+    if (indices.size() != this->shape.num_dims()) {
+      throw mk_runtime_error(fmt::format("Number of indices ({}) does not "
+                                         "match the number of dimensions ({}).",
+                                         indices.size(),
+                                         this->shape.num_dims()));
+    }
 
     using T = real_type_t<DT>;
 
     T const *data_ptr = static_cast<T const *>(this->ptr);
-    size_t offset = calculate_index_offset(indices);
+    int offset = 0;
+    int multiplier = 1;
+    for (int i = 0; i < this->shape.num_dims(); i++) {
+      if (indices.at(i) >= this->shape.at(legion_dim_t{i})) {
+        throw mk_runtime_error(
+            fmt::format("In {} dimension, attempting to access index {} "
+                        "when only {} indexes exist",
+                        i,
+                        indices.at(i),
+                        this->shape.at(legion_dim_t{i})));
+      }
+
+      offset += indices.at(i) * multiplier;
+      multiplier *= this->shape.at(legion_dim_t{i});
+    }
 
     return data_ptr[offset];
   }
@@ -156,8 +212,6 @@ class GenericTensorAccessorW {
              decltype(ptr) const &,
              decltype(device_type) const &>
       tie() const;
-
-  size_t calculate_index_offset(std::vector<size_t> const &indices) const;
 };
 
 std::string format_as(GenericTensorAccessorW const &);
@@ -213,6 +267,21 @@ std::vector<double const *>
 std::vector<half const *>
     get_half_ptrs(std::vector<GenericTensorAccessorR> const &);
 
+int32_t *get_int32_ptr(GenericTensorAccessorW const &);
+int64_t *get_int64_ptr(GenericTensorAccessorW const &);
+float *get_float_ptr(GenericTensorAccessorW const &);
+double *get_double_ptr(GenericTensorAccessorW const &);
+half *get_half_ptr(GenericTensorAccessorW const &);
+std::vector<int32_t *>
+    get_int32_ptrs(std::vector<GenericTensorAccessorW> const &);
+std::vector<int64_t *>
+    get_int64_ptrs(std::vector<GenericTensorAccessorW> const &);
+std::vector<float *>
+    get_float_ptrs(std::vector<GenericTensorAccessorW> const &);
+std::vector<double *>
+    get_double_ptrs(std::vector<GenericTensorAccessorW> const &);
+std::vector<half *> get_half_ptrs(std::vector<GenericTensorAccessorW> const &);
+
 template <DataType DT>
 std::vector<real_type_t<DT> const *>
     get(std::vector<GenericTensorAccessorR> const &accs) {
@@ -239,14 +308,6 @@ std::pair<ArrayShape, DataType>
 void copy_accessor_data_to_l_from_r(GenericTensorAccessorW &dst_accessor,
                                     GenericTensorAccessorR const &src_accessor);
 
-GenericTensorAccessorR
-    copy_tensor_accessor_r(GenericTensorAccessorR const &src_accessor,
-                           Allocator &allocator);
-
-GenericTensorAccessorW
-    copy_tensor_accessor_w(GenericTensorAccessorW const &src_accessor,
-                           Allocator &allocator);
-
 } // namespace FlexFlow
 
 namespace FlexFlow {

diff --git a/lib/kernels/include/kernels/copy_tensor_accessor.h b/lib/kernels/include/kernels/copy_tensor_accessor.h
@@ -0,0 +1,19 @@
+#ifndef _FLEXFLOW_KERNELS_COPY_TENSOR_ACCESSOR_H
+#define _FLEXFLOW_KERNELS_COPY_TENSOR_ACCESSOR_H
+
+#include "kernels/accessor.h"
+#include "kernels/allocation.h"
+
+namespace FlexFlow {
+
+GenericTensorAccessorR
+    copy_tensor_accessor_r(GenericTensorAccessorR const &src_accessor,
+                           Allocator &allocator);
+
+GenericTensorAccessorW
+    copy_tensor_accessor_w(GenericTensorAccessorW const &src_accessor,
+                           Allocator &allocator);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/kernels/include/kernels/managed_ff_stream.h b/lib/kernels/include/kernels/managed_ff_stream.h
@@ -19,6 +19,8 @@ struct ManagedFFStream {
 
   ffStream_t const &raw_stream() const;
 
+  void cleanup();
+
 private:
   ffStream_t *stream;
 };

diff --git a/lib/kernels/include/kernels/managed_per_device_ff_handle.h b/lib/kernels/include/kernels/managed_per_device_ff_handle.h
@@ -24,6 +24,8 @@ struct ManagedPerDeviceFFHandle {
 
   PerDeviceFFHandle const &raw_handle() const;
 
+  void cleanup();
+
 private:
   PerDeviceFFHandle *handle;
 };

diff --git a/lib/kernels/src/accessor.cc b/lib/kernels/src/accessor.cc
@@ -26,7 +26,7 @@ void copy_accessor_data_to_l_from_r(
         dst_accessor.ptr, src_accessor.ptr, num_bytes, cudaMemcpyDeviceToHost));
   } else {
     assert(src_device_type == DeviceType::GPU);
-    assert(src_device_type == DeviceType::CPU);
+    assert(dst_device_type == DeviceType::GPU);
     checkCUDA(cudaMemcpy(dst_accessor.ptr,
                          src_accessor.ptr,
                          num_bytes,
@@ -53,36 +53,6 @@ std::tuple<DataType const &,
   return std::tie(this->data_type, this->shape, this->ptr, this->device_type);
 }
 
-size_t GenericTensorAccessorW::calculate_index_offset(
-    std::vector<size_t> const &indices) const {
-
-  if (indices.size() != this->shape.num_dims()) {
-    throw mk_runtime_error(fmt::format(
-        "Number of indices ({}) does not match the number of dimensions ({}).",
-        indices.size(),
-        this->shape.num_dims()));
-  }
-
-  size_t offset = 0;
-  size_t multiplier = 1;
-
-  for (size_t i = 0; i < this->shape.num_dims(); i++) {
-    if (indices[i] >= this->shape.at(legion_dim_t(i))) {
-      throw mk_runtime_error(
-          fmt::format("In {} dimension, attempting to access index {} "
-                      "when only {} indexes exist",
-                      i,
-                      indices[i],
-                      this->shape.at(legion_dim_t(i))));
-    }
-
-    offset += indices[i] * multiplier;
-    multiplier *= this->shape.at(legion_dim_t(i));
-  }
-
-  return offset;
-}
-
 bool GenericTensorAccessorW::operator==(
     GenericTensorAccessorW const &other) const {
   return this->tie() == other.tie();
@@ -139,36 +109,6 @@ std::tuple<DataType const &,
   return std::tie(this->data_type, this->shape, this->ptr, this->device_type);
 }
 
-size_t GenericTensorAccessorR::calculate_index_offset(
-    std::vector<size_t> const &indices) const {
-
-  if (indices.size() != this->shape.num_dims()) {
-    throw mk_runtime_error(fmt::format(
-        "Number of indices ({}) does not match the number of dimensions ({}).",
-        indices.size(),
-        this->shape.num_dims()));
-  }
-
-  ssize_t offset = 0;
-  size_t multiplier = 1;
-
-  for (size_t i = 0; i < this->shape.num_dims(); i++) {
-    if (indices[i] >= this->shape.at(legion_dim_t(i))) {
-      throw mk_runtime_error(
-          fmt::format("In {} dimension, attempting to access index {} "
-                      "when only {} indexes exist",
-                      i,
-                      indices[i],
-                      this->shape.at(legion_dim_t(i))));
-    }
-
-    offset += indices[i] * multiplier;
-    multiplier *= this->shape.at(legion_dim_t(i));
-  }
-
-  return offset;
-}
-
 bool GenericTensorAccessorR::operator==(
     GenericTensorAccessorR const &other) const {
   return this->tie() == other.tie();
@@ -280,46 +220,4 @@ std::pair<ArrayShape, DataType>
   return std::make_pair(accessor.shape, accessor.data_type);
 }
 
-template <DataType DT>
-struct CopyTensorAccessorW {
-  GenericTensorAccessorW operator()(GenericTensorAccessorW const &src_accessor,
-                                    Allocator &allocator) {
-    TensorShape shape =
-        get_tensor_shape(src_accessor.shape, src_accessor.data_type);
-    GenericTensorAccessorW dst_accessor = allocator.allocate_tensor(shape);
-
-    copy_accessor_data_to_l_from_r(dst_accessor, src_accessor);
-
-    return dst_accessor;
-  }
-};
-
-GenericTensorAccessorW
-    copy_tensor_accessor_w(GenericTensorAccessorW const &src_accessor,
-                           Allocator &allocator) {
-  return DataTypeDispatch1<CopyTensorAccessorW>{}(
-      src_accessor.data_type, src_accessor, allocator);
-}
-
-template <DataType DT>
-struct CopyTensorAccessorR {
-  GenericTensorAccessorR operator()(GenericTensorAccessorR const &src_accessor,
-                                    Allocator &allocator) {
-    TensorShape shape =
-        get_tensor_shape(src_accessor.shape, src_accessor.data_type);
-    GenericTensorAccessorW dst_accessor = allocator.allocate_tensor(shape);
-
-    copy_accessor_data_to_l_from_r(dst_accessor, src_accessor);
-
-    return read_only_accessor_from_write_accessor(dst_accessor);
-  }
-};
-
-GenericTensorAccessorR
-    copy_tensor_accessor_r(GenericTensorAccessorR const &src_accessor,
-                           Allocator &allocator) {
-  return DataTypeDispatch1<CopyTensorAccessorR>{}(
-      src_accessor.data_type, src_accessor, allocator);
-}
-
 } // namespace FlexFlow