Skip to content

Commit

Permalink
test utils logic cleanup, reverse cpu_kernel pedagogical implmentatio…
Browse files Browse the repository at this point in the history
…n, other minor fixes
  • Loading branch information
oOTigger committed Jul 31, 2024
1 parent ba586ae commit e6e2161
Show file tree
Hide file tree
Showing 40 changed files with 590 additions and 476 deletions.
26 changes: 18 additions & 8 deletions lib/kernels/include/kernels/accessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,20 @@ class GenericTensorAccessorW {
double *get_double_ptr() const;
half *get_half_ptr() const;

GenericTensorAccessorW(DataType dt,

Check warning on line 32 in lib/kernels/include/kernels/accessor.h

View check run for this annotation

Codecov / codecov/patch

lib/kernels/include/kernels/accessor.h#L32

Added line #L32 was not covered by tests
ArrayShape sh,
req<void *> p,
bool on_dev = true)
: data_type(dt), shape(sh), ptr(p), on_device(on_dev) {}

Check warning on line 36 in lib/kernels/include/kernels/accessor.h

View check run for this annotation

Codecov / codecov/patch

lib/kernels/include/kernels/accessor.h#L36

Added line #L36 was not covered by tests

public:
DataType data_type;
ArrayShape shape;
req<void *> ptr;
bool on_device;
};
FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION(GenericTensorAccessorW,
data_type,
shape,
ptr);
FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION(

Check warning on line 44 in lib/kernels/include/kernels/accessor.h

View check run for this annotation

Codecov / codecov/patch

lib/kernels/include/kernels/accessor.h#L44

Added line #L44 was not covered by tests
GenericTensorAccessorW, data_type, shape, ptr, on_device);

class GenericTensorAccessorR {
public:
Expand All @@ -57,15 +62,20 @@ class GenericTensorAccessorR {
double const *get_double_ptr() const;
half const *get_half_ptr() const;

GenericTensorAccessorR(DataType dt,

Check warning on line 65 in lib/kernels/include/kernels/accessor.h

View check run for this annotation

Codecov / codecov/patch

lib/kernels/include/kernels/accessor.h#L65

Added line #L65 was not covered by tests
ArrayShape sh,
req<void const *> p,
bool on_dev = true)
: data_type(dt), shape(sh), ptr(p), on_device(on_dev) {}

Check warning on line 69 in lib/kernels/include/kernels/accessor.h

View check run for this annotation

Codecov / codecov/patch

lib/kernels/include/kernels/accessor.h#L69

Added line #L69 was not covered by tests

public:
DataType data_type;
ArrayShape shape;
req<void const *> ptr;
bool on_device;
};
FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION(GenericTensorAccessorR,
data_type,
shape,
ptr);
FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION(
GenericTensorAccessorR, data_type, shape, ptr, on_device);

int32_t *get_int32_ptr(GenericTensorAccessorW const &);
int64_t *get_int64_ptr(GenericTensorAccessorW const &);
Expand Down
9 changes: 9 additions & 0 deletions lib/kernels/include/kernels/allocation.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
#include <cstddef>
#include <memory>

enum class AllocLocation { HOST, DEVICE };

namespace FlexFlow {

struct IAllocator {
virtual void *allocate(size_t) = 0;
virtual void *allocate_and_zero(size_t) = 0;
virtual void deallocate(void *) = 0;

virtual ~IAllocator() = default;
Expand All @@ -18,7 +21,11 @@ struct Allocator {
Allocator() = delete;

GenericTensorAccessorW allocate_tensor(TensorShape const &tensor_shape);
GenericTensorAccessorW
allocate_tensor_and_zero(TensorShape const &tensor_shape);

void *allocate(size_t mem_size);
void *allocate_and_zero(size_t mem_size);
void deallocate(void *ptr);

template <typename T, typename... Args>
Expand All @@ -30,6 +37,8 @@ struct Allocator {

Allocator(std::shared_ptr<IAllocator> ptr) : i_allocator(ptr){};

AllocLocation alloc_location;

private:
std::shared_ptr<IAllocator> i_allocator;
};
Expand Down
18 changes: 8 additions & 10 deletions lib/kernels/include/kernels/cast_kernels_cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,17 @@
namespace FlexFlow {
namespace Kernels {
namespace Cast {
namespace CPU {

void forward_kernel(GenericTensorAccessorR const &input,
GenericTensorAccessorW const &output,
DataType input_type,
DataType output_type);
void cpu_forward_kernel(GenericTensorAccessorR const &input,
GenericTensorAccessorW const &output,
DataType input_type,
DataType output_type);

void backward_kernel(GenericTensorAccessorR const &input,
GenericTensorAccessorW const &output,
DataType input_type,
DataType output_type);
void cpu_backward_kernel(GenericTensorAccessorR const &input,
GenericTensorAccessorW const &output,
DataType input_type,
DataType output_type);

} // namespace CPU
} // namespace Cast
} // namespace Kernels
} // namespace FlexFlow
Expand Down
10 changes: 4 additions & 6 deletions lib/kernels/include/kernels/combine_kernels_cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,13 @@
namespace FlexFlow {
namespace Kernels {
namespace Combine {
namespace CPU {

void forward_kernel(GenericTensorAccessorR const &input,
GenericTensorAccessorW const &output);
void cpu_forward_kernel(GenericTensorAccessorR const &input,
GenericTensorAccessorW const &output);

void backward_kernel(GenericTensorAccessorR const &output_grad,
GenericTensorAccessorW const &input_grad);
void cpu_backward_kernel(GenericTensorAccessorR const &output_grad,
GenericTensorAccessorW const &input_grad);

} // namespace CPU
} // namespace Combine
} // namespace Kernels
} // namespace FlexFlow
Expand Down
1 change: 1 addition & 0 deletions lib/kernels/include/kernels/local_cpu_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ struct LocalCPUAllocator : public IAllocator {
~LocalCPUAllocator() override;

void *allocate(size_t) override;
void *allocate_and_zero(size_t) override;
void deallocate(void *) override;

private:
Expand Down
1 change: 1 addition & 0 deletions lib/kernels/include/kernels/local_cuda_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ struct LocalCudaAllocator : public IAllocator {
~LocalCudaAllocator() override;

void *allocate(size_t) override;
void *allocate_and_zero(size_t) override;
void deallocate(void *) override;

private:
Expand Down
12 changes: 5 additions & 7 deletions lib/kernels/include/kernels/replicate_kernels_cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,14 @@
namespace FlexFlow {
namespace Kernels {
namespace Replicate {
namespace CPU {

void forward_kernel(GenericTensorAccessorR const &input,
GenericTensorAccessorW const &output);
void cpu_forward_kernel(GenericTensorAccessorR const &input,
GenericTensorAccessorW const &output);

void backward_kernel(GenericTensorAccessorW const &input,
GenericTensorAccessorR const &output,
size_t num_replicas);
void cpu_backward_kernel(GenericTensorAccessorW const &input,
GenericTensorAccessorR const &output,
size_t num_replicas);

} // namespace CPU
} // namespace Replicate
} // namespace Kernels
} // namespace FlexFlow
Expand Down
26 changes: 12 additions & 14 deletions lib/kernels/include/kernels/reverse_kernels_cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,20 @@
namespace FlexFlow {
namespace Kernels {
namespace Reverse {
namespace CPU {

void forward_kernel(float const *in_ptr,
float *out_ptr,
coord_t num_out_blks,
coord_t reverse_dim_size,
coord_t in_blk_size,
coord_t output_size);
void cpu_forward_kernel(float const *in_ptr,
float *out_ptr,
coord_t num_out_blks,
coord_t reverse_dim_size,
coord_t in_blk_size,
coord_t output_size);

void backward_kernel(float const *out_grad_ptr,
float *in_grad_ptr,
coord_t num_out_blks,
coord_t reverse_dim_size,
coord_t in_blk_size,
coord_t input_size);
} // namespace CPU
void cpu_backward_kernel(float const *out_grad_ptr,
float *in_grad_ptr,
coord_t num_out_blks,
coord_t reverse_dim_size,
coord_t in_blk_size,
coord_t input_size);
} // namespace Reverse
} // namespace Kernels
} // namespace FlexFlow
Expand Down
6 changes: 4 additions & 2 deletions lib/kernels/src/accessor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,10 @@ std::vector<half const *>

GenericTensorAccessorR read_only_accessor_from_write_accessor(
GenericTensorAccessorW const &writable) {
return GenericTensorAccessorR{
writable.data_type, writable.shape, req<void const *>(writable.ptr)};
return GenericTensorAccessorR{writable.data_type,

Check warning on line 137 in lib/kernels/src/accessor.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/accessor.cc#L137

Added line #L137 was not covered by tests
writable.shape,
req<void const *>(writable.ptr),
writable.on_device};

Check warning on line 140 in lib/kernels/src/accessor.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/accessor.cc#L139-L140

Added lines #L139 - L140 were not covered by tests
}

} // namespace FlexFlow
14 changes: 13 additions & 1 deletion lib/kernels/src/allocation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,26 @@ void *Allocator::allocate(size_t mem_size) {
return this->i_allocator->allocate(mem_size);
}

void *Allocator::allocate_and_zero(size_t mem_size) {
return this->i_allocator->allocate_and_zero(mem_size);

Check warning on line 10 in lib/kernels/src/allocation.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/allocation.cc#L9-L10

Added lines #L9 - L10 were not covered by tests
}

void Allocator::deallocate(void *ptr) {
this->i_allocator->deallocate(ptr);
}

GenericTensorAccessorW
Allocator::allocate_tensor(TensorShape const &tensor_shape) {
void *ptr = this->allocate(get_size_in_bytes(tensor_shape));
return {tensor_shape.data_type, tensor_shape, ptr};
bool on_device = this->alloc_location == AllocLocation::DEVICE;
return {tensor_shape.data_type, tensor_shape, ptr, on_device};

Check warning on line 21 in lib/kernels/src/allocation.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/allocation.cc#L20-L21

Added lines #L20 - L21 were not covered by tests
}

GenericTensorAccessorW
Allocator::allocate_tensor_and_zero(TensorShape const &tensor_shape) {
void *ptr = this->allocate_and_zero(get_size_in_bytes(tensor_shape));
bool on_device = this->alloc_location == AllocLocation::DEVICE;
return {tensor_shape.data_type, tensor_shape, ptr, on_device};

Check warning on line 28 in lib/kernels/src/allocation.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/allocation.cc#L25-L28

Added lines #L25 - L28 were not covered by tests
}

} // namespace FlexFlow
6 changes: 6 additions & 0 deletions lib/kernels/src/array_shape.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,10 @@ size_t get_volume(ArrayShape const &shape) {
return shape.get_volume();
}

TensorShape get_tensor_shape(ArrayShape const &shape, DataType DT) {
FFOrdered<size_t> ff_dims(shape.dims.begin(), shape.dims.end());
TensorDims tensor_shape_dims(ff_dims);
return TensorShape(tensor_shape_dims, DT);

Check warning on line 66 in lib/kernels/src/array_shape.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/array_shape.cc#L63-L66

Added lines #L63 - L66 were not covered by tests
}

} // namespace FlexFlow
35 changes: 17 additions & 18 deletions lib/kernels/src/cpu/cast_kernels.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,56 +4,55 @@
namespace FlexFlow {
namespace Kernels {
namespace Cast {
namespace CPU {

template <typename IDT, typename ODT>
void cast_forward(IDT const *input, ODT *output, size_t volume) {
void cpu_cast_forward(IDT const *input, ODT *output, size_t volume) {

Check warning on line 9 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L9

Added line #L9 was not covered by tests
for (size_t i = 0; i < volume; ++i) {
output[i] = static_cast<ODT>(input[i]);

Check warning on line 11 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L11

Added line #L11 was not covered by tests
}
}

template <typename IDT, typename ODT>
void cast_backward(IDT const *input, ODT *output, size_t volume, ODT beta) {
void cpu_cast_backward(IDT const *input, ODT *output, size_t volume, ODT beta) {

Check warning on line 16 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L16

Added line #L16 was not covered by tests
for (size_t i = 0; i < volume; i++) {
output[i] = static_cast<ODT>(input[i]) + beta * output[i];

Check warning on line 18 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L18

Added line #L18 was not covered by tests
}
}

template <DataType IDT, DataType ODT>
struct ForwardKernel {
struct CPUForwardKernel {
void operator()(GenericTensorAccessorR const &input,

Check warning on line 24 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L24

Added line #L24 was not covered by tests
GenericTensorAccessorW const &output) {
size_t volume = input.shape.get_volume();
cast_forward(input.get<IDT>(), output.get<ODT>(), volume);
cpu_cast_forward(input.get<IDT>(), output.get<ODT>(), volume);

Check warning on line 27 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L26-L27

Added lines #L26 - L27 were not covered by tests
}
};

template <DataType IDT, DataType ODT>
struct BackwardKernel {
struct CPUBackwardKernel {
void operator()(GenericTensorAccessorR const &input,

Check warning on line 33 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L33

Added line #L33 was not covered by tests
GenericTensorAccessorW const &output) {
size_t volume = input.shape.get_volume();
cast_backward(
cpu_cast_backward(
input.get<IDT>(), output.get<ODT>(), volume, cast_to<ODT>(1.0f));

Check warning on line 37 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L35-L37

Added lines #L35 - L37 were not covered by tests
}
};

void forward_kernel(GenericTensorAccessorR const &input,
GenericTensorAccessorW const &output,
DataType input_type,
DataType output_type) {
DataTypeDispatch2<ForwardKernel>{}(input_type, output_type, input, output);
void cpu_forward_kernel(GenericTensorAccessorR const &input,

Check warning on line 41 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L41

Added line #L41 was not covered by tests
GenericTensorAccessorW const &output,
DataType input_type,
DataType output_type) {
DataTypeDispatch2<CPUForwardKernel>{}(input_type, output_type, input, output);

Check warning on line 45 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L45

Added line #L45 was not covered by tests
}

void backward_kernel(GenericTensorAccessorR const &input,
GenericTensorAccessorW const &output,
DataType input_type,
DataType output_type) {
DataTypeDispatch2<BackwardKernel>{}(input_type, output_type, input, output);
void cpu_backward_kernel(GenericTensorAccessorR const &input,

Check warning on line 48 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L48

Added line #L48 was not covered by tests
GenericTensorAccessorW const &output,
DataType input_type,
DataType output_type) {
DataTypeDispatch2<CPUBackwardKernel>{}(

Check warning on line 52 in lib/kernels/src/cpu/cast_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/cast_kernels.cc#L52

Added line #L52 was not covered by tests
input_type, output_type, input, output);
}

} // namespace CPU
} // namespace Cast
} // namespace Kernels
} // namespace FlexFlow
18 changes: 8 additions & 10 deletions lib/kernels/src/cpu/combine_kernels.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@
namespace FlexFlow {
namespace Kernels {
namespace Combine {
namespace CPU {

template <DataType DT>
struct ForwardKernel {
struct CPUForwardKernel {
void operator()(GenericTensorAccessorR const &input,

Check warning on line 10 in lib/kernels/src/cpu/combine_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/combine_kernels.cc#L10

Added line #L10 was not covered by tests
GenericTensorAccessorW const &output) {
memcpy(output.get<DT>(),
Expand All @@ -17,7 +16,7 @@ struct ForwardKernel {
};

template <DataType DT>
struct BackwardKernel {
struct CPUBackwardKernel {
void operator()(GenericTensorAccessorR const &output_grad,

Check warning on line 20 in lib/kernels/src/cpu/combine_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/combine_kernels.cc#L20

Added line #L20 was not covered by tests
GenericTensorAccessorW const &input_grad) {
size_t num_elements = output_grad.shape.get_volume();

Check warning on line 22 in lib/kernels/src/cpu/combine_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/combine_kernels.cc#L22

Added line #L22 was not covered by tests
Expand All @@ -27,18 +26,17 @@ struct BackwardKernel {
}
};

void forward_kernel(GenericTensorAccessorR const &input,
GenericTensorAccessorW const &output) {
DataTypeDispatch1<ForwardKernel>{}(input.data_type, input, output);
void cpu_forward_kernel(GenericTensorAccessorR const &input,

Check warning on line 29 in lib/kernels/src/cpu/combine_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/combine_kernels.cc#L29

Added line #L29 was not covered by tests
GenericTensorAccessorW const &output) {
DataTypeDispatch1<CPUForwardKernel>{}(input.data_type, input, output);

Check warning on line 31 in lib/kernels/src/cpu/combine_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/combine_kernels.cc#L31

Added line #L31 was not covered by tests
}

void backward_kernel(GenericTensorAccessorR const &output_grad,
GenericTensorAccessorW const &input_grad) {
DataTypeDispatch1<BackwardKernel>{}(
void cpu_backward_kernel(GenericTensorAccessorR const &output_grad,

Check warning on line 34 in lib/kernels/src/cpu/combine_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/combine_kernels.cc#L34

Added line #L34 was not covered by tests
GenericTensorAccessorW const &input_grad) {
DataTypeDispatch1<CPUBackwardKernel>{}(
input_grad.data_type, output_grad, input_grad);

Check warning on line 37 in lib/kernels/src/cpu/combine_kernels.cc

View check run for this annotation

Codecov / codecov/patch

lib/kernels/src/cpu/combine_kernels.cc#L36-L37

Added lines #L36 - L37 were not covered by tests
}

} // namespace CPU
} // namespace Combine
} // namespace Kernels
} // namespace FlexFlow
Loading

0 comments on commit e6e2161

Please sign in to comment.