Skip to content

Commit

Permalink
Kernel build (#1366)
Browse files Browse the repository at this point in the history
* Kernels

* Add none to embedding

* Datatype sizeof

* Move embedding

* Add activation none

* Format

* Linear activation optional

* Format

* Delete unnecessary comments

* Fix kernels

* Test device.h

* Device h
  • Loading branch information
reyna-abhyankar authored Apr 17, 2024
1 parent 318152a commit 8e0625f
Show file tree
Hide file tree
Showing 72 changed files with 1,106 additions and 1,103 deletions.
2 changes: 1 addition & 1 deletion lib/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ file(GLOB_RECURSE SRC
CONFIGURE_DEPENDS
LIST_DIRECTORIES False
src/*.cc
# src/*.cu
src/cuda/ops/*.cu
)

add_library(
Expand Down
6 changes: 4 additions & 2 deletions lib/kernels/include/kernels/allocation.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef _FLEXFLOW_KERNELS_ALLOCATION_H
#define _FLEXFLOW_KERNELS_ALLOCATION_H

#include "accessor.h"
#include <cstddef>
#include <memory>

Expand All @@ -16,8 +17,8 @@ struct IAllocator {
struct Allocator {
Allocator() = delete;

void *allocate(size_t);
void deallocate(void *);
void *allocate(size_t mem_size);
void deallocate(void *ptr);

template <typename T, typename... Args>
static typename std::enable_if<std::is_base_of<IAllocator, T>::value,
Expand All @@ -27,6 +28,7 @@ struct Allocator {
}

private:
Allocator(std::shared_ptr<IAllocator> ptr) : i_allocator(ptr){};
std::shared_ptr<IAllocator> i_allocator;
};

Expand Down
3 changes: 3 additions & 0 deletions lib/kernels/include/kernels/array_shape.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define _FLEXFLOW_KERNELS_ARRAY_SHAPE_H

#include "legion_dim.h"
#include "op-attrs/tensor_shape.h"
#include "utils/stack_vector.h"
#include "utils/visitable.h"
#include <cstddef>
Expand All @@ -13,6 +14,7 @@ struct ArrayShape {
public:
ArrayShape() = delete;
ArrayShape(size_t *dims, size_t num_dims);
ArrayShape(TensorShape const &shape);
ArrayShape(std::vector<std::size_t> const &);

/**
Expand All @@ -31,6 +33,7 @@ struct ArrayShape {

std::size_t operator[](legion_dim_t) const;
std::size_t at(legion_dim_t) const;
std::size_t at(ff_dim_t) const;

legion_dim_t last_idx() const;
legion_dim_t neg_idx(int) const;
Expand Down
2 changes: 1 addition & 1 deletion lib/kernels/include/kernels/attention_kernels.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#ifndef _FLEXFLOW_OPS_KERNELS_ATTENTION_KERNELS_H
#define _FLEXFLOW_OPS_KERNELS_ATTENTION_KERNELS_H

#include "device.h"
#include "kernels/allocation.h"
#include "kernels/device.h"
#include "kernels/ff_handle.h"
#include "op-attrs/ops/attention.h"
#include <memory>
Expand Down
2 changes: 1 addition & 1 deletion lib/kernels/include/kernels/batch_matmul_kernels.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#ifndef _FLEXFLOW_OPS_KERNELS_BATCH_MATMUL_KERNELS_H
#define _FLEXFLOW_OPS_KERNELS_BATCH_MATMUL_KERNELS_H

#include "device.h"
#include "kernels/allocation.h"
#include "kernels/device.h"
#include "kernels/ff_handle.h"
#include "utils/visitable.h"

Expand Down
8 changes: 3 additions & 5 deletions lib/kernels/include/kernels/batch_norm_kernels.h
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
#ifndef _FLEXFLOW_KERNELS_BATCH_NORM_KERNELS_H
#define _FLEXFLOW_KERNELS_BATCH_NORM_KERNELS_H

#include "device.h"
#include "kernels/allocation.h"
#include "kernels/device.h"
#include "kernels/ff_handle.h"
#include <memory>

namespace FlexFlow {

struct BatchNormPerDeviceState {
PerDeviceFFHandle handle;
Allocator allocator;
ffTensorDescriptor_t inputTensor;
ffTensorDescriptor_t outputTensor;
ffTensorDescriptor_t biasTensor;
Expand All @@ -29,7 +28,6 @@ struct BatchNormPerDeviceState {

FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION(BatchNormPerDeviceState,
handle,
allocator,
inputTensor,
outputTensor,
biasTensor,
Expand Down Expand Up @@ -58,14 +56,14 @@ BatchNormPerDeviceState init_kernel(PerDeviceFFHandle handle,
bool relu);

void forward_kernel(ffStream_t stream,
BatchNormPerDeviceState &m,
BatchNormPerDeviceState const &m,
float const *input_ptr,
float *output_ptr,
float const *scale_ptr,
float const *bias_ptr);

void backward_kernel(ffStream_t stream,
BatchNormPerDeviceState &m,
BatchNormPerDeviceState const &m,
float const *input_ptr,
float *output_grad_ptr,
float const *output_ptr,
Expand Down
2 changes: 1 addition & 1 deletion lib/kernels/include/kernels/cast_kernels.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#ifndef _FLEXFLOW_OPS_KERNELS_CAST_KERNELS_H
#define _FLEXFLOW_OPS_KERNELS_CAST_KERNELS_H

#include "device.h"
#include "kernels/accessor.h"
#include "kernels/device.h"
#include "kernels/ff_handle.h"
#include "op-attrs/activation.h"

Expand Down
2 changes: 1 addition & 1 deletion lib/kernels/include/kernels/combine_kernels.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#ifndef _FLEXFLOW_OPS_KERNELS_COMBINE_KERNELS_H
#define _FLEXFLOW_OPS_KERNELS_COMBINE_KERNELS_H

#include "device.h"
#include "kernels/accessor.h"
#include "kernels/device.h"

namespace FlexFlow {
namespace Kernels {
Expand Down
2 changes: 1 addition & 1 deletion lib/kernels/include/kernels/concat_kernels.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#ifndef _FLEXFLOW_OPS_KERNELS_CONCAT_KERNELS_H
#define _FLEXFLOW_OPS_KERNELS_CONCAT_KERNELS_H

#include "device.h"
#include "kernels/accessor.h"
#include "kernels/device.h"

namespace FlexFlow {
namespace Kernels {
Expand Down
8 changes: 4 additions & 4 deletions lib/kernels/include/kernels/conv_2d_kernels.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#ifndef _FLEXFLOW_OPS_KERNELS_CONV_2D_KERNELS_H
#define _FLEXFLOW_OPS_KERNELS_CONV_2D_KERNELS_H

#include "device.h"
#include "kernels/accessor.h"
#include "kernels/device.h"
#include "kernels/ff_handle.h"
#include "op-attrs/activation.h"
#include "utils/visitable.h"
Expand Down Expand Up @@ -38,7 +38,7 @@ namespace Kernels {
namespace Conv2D {

Conv2DPerDeviceState init_kernel(PerDeviceFFHandle handle,
optional<Activation> activation,
std::optional<Activation> activation,
int kernel_h,
int kernel_w,
int groups,
Expand All @@ -57,7 +57,7 @@ void forward_kernel(ffStream_t stream,
float *output_ptr,
float const *filter_ptr,
float const *bias_ptr,
optional<Activation> activation);
std::optional<Activation> activation);

void backward_kernel(ffStream_t stream,
Conv2DPerDeviceState const &m,
Expand All @@ -68,7 +68,7 @@ void backward_kernel(ffStream_t stream,
float const *filter_ptr,
float *filter_grad_ptr,
float *bias_grad_ptr,
optional<Activation> activation);
std::optional<Activation> activation);

} // namespace Conv2D
} // namespace Kernels
Expand Down
2 changes: 2 additions & 0 deletions lib/kernels/include/kernels/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ typedef hipError_t ffError_t;

namespace FlexFlow {

using coord_t = long long;

#define FatalError(s) \
do { \
std::stringstream _where, _message; \
Expand Down
8 changes: 3 additions & 5 deletions lib/kernels/include/kernels/dropout_kernels.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#ifndef _FLEXFLOW_OPS_KERNELS_DROPOUT_KERNELS_H
#define _FLEXFLOW_OPS_KERNELS_DROPOUT_KERNELS_H

#include "device.h"
#include "kernels/allocation.h"
#include "kernels/array_shape.h"
#include "kernels/device.h"
#include "kernels/ff_handle.h"
#include <cstddef>

Expand All @@ -12,7 +12,6 @@ namespace FlexFlow {
struct DropoutPerDeviceState {
public:
PerDeviceFFHandle handle;
Allocator allocator;
ffTensorDescriptor_t inputTensor;
ffTensorDescriptor_t outputTensor;
ffDropoutDescriptor_t dropoutDesc;
Expand All @@ -24,7 +23,6 @@ struct DropoutPerDeviceState {

FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION(DropoutPerDeviceState,
handle,
allocator,
inputTensor,
outputTensor,
dropoutDesc,
Expand All @@ -43,12 +41,12 @@ DropoutPerDeviceState init_kernel(PerDeviceFFHandle handle,
Allocator allocator);

void forward_kernel(ffStream_t stream,
DropoutPerDeviceState &m,
DropoutPerDeviceState const &m,
float const *input_ptr,
float *output_ptr);

void backward_kernel(ffStream_t stream,
DropoutPerDeviceState &m,
DropoutPerDeviceState const &m,
float const *output_grad_ptr,
float *input_grad_ptr);

Expand Down
2 changes: 1 addition & 1 deletion lib/kernels/include/kernels/element_binary_kernels.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#ifndef _FLEXFLOW_OPS_KERNELS_ELEMENT_BINARY_KERNELS_H
#define _FLEXFLOW_OPS_KERNELS_ELEMENT_BINARY_KERNELS_H

#include "device.h"
#include "ff_handle.h"
#include "kernels/array_shape.h"
#include "kernels/device.h"
#include "op-attrs/datatype.h"
#include "op-attrs/op.h"

Expand Down
6 changes: 3 additions & 3 deletions lib/kernels/include/kernels/element_unary_kernels.h
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
#ifndef _FLEXFLOW_OPS_KERNELS_ELEMENT_UNARY_KERNELS_H
#define _FLEXFLOW_OPS_KERNELS_ELEMENT_UNARY_KERNELS_H

#include "device.h"
#include "kernels/accessor.h"
#include "kernels/device.h"
#include "kernels/ff_handle.h"
#include "op-attrs/ops/element_unary.h"
#include <cstddef>

namespace FlexFlow {

using ElementUnaryUnifiedAttrs =
variant<ElementUnaryAttrs, ElementScalarUnaryAttrs>;
std::variant<ElementUnaryAttrs, ElementScalarUnaryAttrs>;

struct ElementUnaryPerDeviceState {
ffTensorDescriptor_t inputTensor, outputTensor;
ffActivationDescriptor_t actiDesc;
req<ffActivationDescriptor_t> actiDesc;
};

FF_VISITABLE_STRUCT_NO_EQ(ElementUnaryPerDeviceState,
Expand Down
7 changes: 4 additions & 3 deletions lib/kernels/include/kernels/embedding_kernels.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#ifndef _FLEXFLOW_OPS_KERNELS_EMBEDDING_KERNELS_H
#define _FLEXFLOW_OPS_KERNELS_EMBEDDING_KERNELS_H

#include "device.h"
#include "kernels/accessor.h"
#include "kernels/device.h"
#include "op-attrs/ops/embedding.h"

namespace FlexFlow {
namespace Kernels {
Expand All @@ -13,7 +14,7 @@ void forward_kernel(ffStream_t stream,
GenericTensorAccessorR const &weight,
DataType input_data_type,
DataType output_data_type,
AggrMode aggr,
std::optional<AggregateOp> aggr,
int in_dim,
int out_dim,
int batch_size);
Expand All @@ -23,7 +24,7 @@ void backward_kernel(ffStream_t stream,
GenericTensorAccessorW const &weight_grad,
DataType input_data_type,
DataType output_data_type,
AggrMode aggr,
std::optional<AggregateOp> aggr,
int in_dim,
int out_dim,
int batch_size);
Expand Down
2 changes: 1 addition & 1 deletion lib/kernels/include/kernels/ff_handle.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include <nccl.h>
#endif

#include "kernels/device.h"
#include "device.h"
#include "utils/visitable.h"

namespace FlexFlow {
Expand Down
2 changes: 1 addition & 1 deletion lib/kernels/include/kernels/flat_kernels.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#ifndef _FLEXFLOW_OPS_KERNELS_FLAT_KERNELS_H
#define _FLEXFLOW_OPS_KERNELS_FLAT_KERNELS_H

#include "device.h"
#include "kernels/accessor.h"
#include "kernels/device.h"

namespace FlexFlow {
namespace Kernels {
Expand Down
15 changes: 8 additions & 7 deletions lib/kernels/include/kernels/gather_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,30 @@
#define _FLEXFLOW_OPS_KERNELS_GATHER_KERNELS_H

#include "accessor.h"
#include "kernels/device.h"
#include "device.h"

namespace FlexFlow {

class GatherPerDeviceState : public PerDeviceOpState {
public:
GatherPerDeviceState(FFHandler handler);
struct GatherPerDeviceState {
int legion_dim;
DataType index_data_type;
req<DataType> index_data_type;
};
FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION(GatherPerDeviceState,
legion_dim,
index_data_type);

namespace Kernels {
namespace Gather {
void forward_kernel(ffStream_t stream,
GatherPerDeviceState const *m,
GatherPerDeviceState const &m,
GenericTensorAccessorR const &input,
GenericTensorAccessorR const &index,
GenericTensorAccessorW const &output,
size_t stride,
size_t input_dim_size,
size_t output_dim_size);
void backward_kernel(ffStream_t stream,
GatherPerDeviceState const *m,
GatherPerDeviceState const &m,
GenericTensorAccessorR const &output_grad,
GenericTensorAccessorR const &index,
GenericTensorAccessorW const &input_grad,
Expand Down
Loading

0 comments on commit 8e0625f

Please sign in to comment.