Skip to content
This repository has been archived by the owner on Jan 24, 2024. It is now read-only.

Commit

Permalink
Merge branch 'developing' into developing
Browse files Browse the repository at this point in the history
  • Loading branch information
cyj1986 authored Aug 3, 2018
2 parents 1374190 + 3a51e1b commit d4f37cf
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 117 deletions.
235 changes: 120 additions & 115 deletions framework/core/net/net.cpp
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ namespace anakin {

template<typename Ttype, DataType Dtype, Precision Ptype, OpRunType RunType>
Net<Ttype, Dtype, Ptype, RunType>::~Net() {
if(_graph_p) {
delete _graph_p;
_graph_p = nullptr;
}
if(_graph_p) {
delete _graph_p;
_graph_p = nullptr;
}
}

template<typename Ttype, DataType Dtype>
Expand All @@ -24,7 +24,7 @@ double tensor_average(Tensor4dPtr<Ttype, Dtype>& out_tensor_p) {
tensorptr.h_tensor().copy_from(*out_tensor_p);
hptr = tensorptr.h_tensor().data();
for (int i=0; i<out_tensor_p->valid_size(); i++) {
sum += hptr[i];
sum += hptr[i];
}
return sum/out_tensor_p->valid_size();
}
Expand Down Expand Up @@ -138,8 +138,8 @@ void Net<Ttype, Dtype, Ptype, RunType>::init(graph::Graph<Ttype, Dtype, Ptype>&
init_env(graph);
// shallow copy
_graph_p->CopyFrom(graph);
double curr_mem_in_mb_start = MemoryInfo<Ttype>::Global().get_used_mem_in_mb();
double curr_mem_in_mb_start = MemoryInfo<Ttype>::Global().get_used_mem_in_mb();

auto node_names_in_exec_order = graph.get_nodes_in_order();
// infer basic shape and parsing parameter from graph
Expand Down Expand Up @@ -190,18 +190,24 @@ void Net<Ttype, Dtype, Ptype, RunType>::init(graph::Graph<Ttype, Dtype, Ptype>&
if (node_ptr->get_op_name() == "ConvBatchnormScale" ||
node_ptr->get_op_name() == "ConvBatchnormScaleRelu" || node_ptr->get_op_name() == "ConvRelu" ||
node_ptr->get_op_name() == "Convolution") {
std::string group = "group";
std::string group = "group";
auto group_val = node_ptr->template get_attr<int>(group);
std::string dilation = "dilation_rate";
auto dilation_rate_val = node_ptr->template get_attr<PTuple<int> >(dilation);
using pblock_type = PBlock<typename DataTypeWarpper<Dtype>::type, Ttype>;
std::string weight_name = "weight_1";
auto weights = node_ptr->template get_attr<pblock_type>(weight_name);
//int c = weights.d_tensor().channel();

if ((group_val == 1)) {
node_ptr->set_op(OpFactory<Ttype, Dtype, Ptype>::Global()["Sass" + node_ptr->get_op_name()]);
node_ptr->get_op_name() = "Sass" + node_ptr->get_op_name();
} else {
LOG(ERROR) << "node_ptr->get_op_name() sass not support yet.";

int k_w = weights.d_tensor().width();
int k_h = weights.d_tensor().height();
int dil_h = dilation_rate_val.vector()[0];
int dil_w = dilation_rate_val.vector()[1];

if ((group_val == 1) && (k_w == 3 && k_h == 3 && dil_h == 1 && dil_w == 1)) {
node_ptr->set_op(OpFactory<Ttype, Dtype, Ptype>::Global()["Sass"+node_ptr->get_op_name()]);
node_ptr->get_op_name() = "Sass" + node_ptr->get_op_name();
} else {
LOG(ERROR) << "node_ptr->get_op_name() sass not support yet.";
auto *op_pointer = OpFactory<Ttype, Dtype, Ptype>::Global()[node_ptr->get_op_name()];
node_ptr->set_op(op_pointer);
}
Expand Down Expand Up @@ -285,16 +291,16 @@ void Net<Ttype, Dtype, Ptype, RunType>::init(graph::Graph<Ttype, Dtype, Ptype>&
#endif
}

double curr_mem_in_mb_end = MemoryInfo<Ttype>::Global().get_used_mem_in_mb();
this->_graph_p->statistics.template set_info<graph::SYSTEM_MEM>(curr_mem_in_mb_end - curr_mem_in_mb_start);
double curr_mem_in_mb_end = MemoryInfo<Ttype>::Global().get_used_mem_in_mb();
this->_graph_p->statistics.template set_info<graph::SYSTEM_MEM>(curr_mem_in_mb_end - curr_mem_in_mb_start);
// init memory of _graph_p
init_memory();
graph.statistics = _graph_p->statistics; // copy statistic back
LOG(INFO) << "Temp mem used: " << this->_graph_p->statistics.template get_info<graph::TEMP_MEM>() << " MB";
LOG(INFO) << "Original mem used: " << this->_graph_p->statistics.template get_info<graph::ORI_TEMP_MEM>() << " MB";
LOG(INFO) << "Model mem used: " << this->_graph_p->statistics.template get_info<graph::MODEL_MEM>() << " MB";
LOG(INFO) << "System mem used: " << this->_graph_p->statistics.template get_info<graph::SYSTEM_MEM>() << " MB";
graph.statistics = _graph_p->statistics; // copy statistic back
LOG(INFO) << "Temp mem used: " << this->_graph_p->statistics.template get_info<graph::TEMP_MEM>() << " MB";
LOG(INFO) << "Original mem used: " << this->_graph_p->statistics.template get_info<graph::ORI_TEMP_MEM>() << " MB";
LOG(INFO) << "Model mem used: " << this->_graph_p->statistics.template get_info<graph::MODEL_MEM>() << " MB";
LOG(INFO) << "System mem used: " << this->_graph_p->statistics.template get_info<graph::SYSTEM_MEM>() << " MB";
#ifdef ENABLE_OP_TIMER
_op_time = std::vector<float>(_exec_funcs.size(), 0.0f);
#endif
Expand All @@ -312,11 +318,11 @@ void Net<Ttype, Dtype, Ptype, RunType>::init(graph::Graph<Ttype, Dtype, Ptype>&
LOG(WARNING) << " Inspect memory of " << executer.name << " (" << executer.op_name << ") ";
executer.infer_shape();

for (auto out : executer.outs) {
LOG(INFO) << " |-- out tensor avg " << tensor_average(out);
}
for (auto out : executer.outs) {
LOG(INFO) << " |-- out tensor avg " << tensor_average(out);
}
#ifdef USE_CUDA
CUDA_CHECK(cudaDeviceSynchronize());
CUDA_CHECK(cudaDeviceSynchronize());
CUDA_CHECK(cudaPeekAtLastError());
#endif
}
Expand Down Expand Up @@ -344,15 +350,15 @@ void Net<Ttype, Dtype, Ptype, RunType>::prediction() {
<< " " << in->valid_shape()[1]
<< " " << in->valid_shape()[2]
<< " " << in->valid_shape()[3]
<< " valid_size: " << in->valid_size()
<< " realsize: " << in->size()
<< " valid_size: " << in->valid_size()
<< " realsize: " << in->size()
<< " offset_size "<<in->get_seq_offset().size();
}
#endif
#ifdef ENABLE_OP_TIMER
Context<Ttype> ctx(0, 0, 0);
saber::SaberTimer<Ttype> my_time;
my_time.start(ctx);
Context<Ttype> ctx(0, 0, 0);
saber::SaberTimer<Ttype> my_time;
my_time.start(ctx);
#endif
if (executer.op_name != "Input") {
executer.infer_shape();
Expand All @@ -368,35 +374,35 @@ void Net<Ttype, Dtype, Ptype, RunType>::prediction() {
executer.outs[i]->record_event(executer.ctx_p->get_compute_stream());
executer.outs[i]->sync();
}
my_time.end(ctx);
my_time.end(ctx);
_op_time[op_id++] += my_time.get_average_ms();
#endif
//LOG(INFO)<< "op: " << executer.name<<"(" << executer.op_name <<") === infer+launch time "<<my_time.get_average_ms() << " ms";
//LOG(INFO)<< "op: " << executer.name<<"(" << executer.op_name <<") === infer+launch time "<<my_time.get_average_ms() << " ms";
#ifdef ENABLE_DEBUG
#ifdef USE_CUDA
CUDA_CHECK(cudaDeviceSynchronize());
CUDA_CHECK(cudaPeekAtLastError());
#endif
for (auto out : executer.outs) {
std::vector<int> offset=out->get_seq_offset();
LOG(INFO)<<"print offset of "<<executer.name <<",size = "<<offset.size();
for(int i=0;i<offset.size();++i){
LOG(INFO)<<offset[i]<<",";
}
LOG(INFO)<<" end print offset of "<<executer.name;
for (auto out : executer.outs) {
std::vector<int> offset=out->get_seq_offset();
LOG(INFO)<<"print offset of "<<executer.name <<",size = "<<offset.size();
for(int i=0;i<offset.size();++i){
LOG(INFO)<<offset[i]<<",";
}
LOG(INFO)<<" end print offset of "<<executer.name;
#define RECORD_INNER
#if defined(RECORD_INNER) && defined(USE_X86_PLACE)
record_tensor_to_file(*out,("record_"+executer.name).c_str());
if(executer.name=="")
record_tensor_to_file(*out,("record_"+executer.name).c_str());
if(executer.name=="")
#endif
LOG(INFO) <<executer.name <<" d_tensor_out_p :" <<out->data();
#ifdef USE_X86_PLACE
// for (int i = 0; i < 10; ++i) {
// std::cout << out->data()[i]<<" ";
// }
#endif
LOG(ERROR) << " |---out avg " << tensor_average(out);
}
LOG(ERROR) << " |---out avg " << tensor_average(out);
}

#ifdef USE_ARM_PLACE
int idx = 0;
Expand Down Expand Up @@ -468,15 +474,15 @@ void Net<Ttype, Dtype, Ptype, RunType>::prediction() {

template<typename Ttype, DataType Dtype, Precision Ptype, OpRunType RunType>
void Net<Ttype, Dtype, Ptype, RunType>::execute_stop_at_node(std::string node_name) {
if(_suspended_point==-1) {
for(int i=0; i<_exec_funcs.size(); i++) {
if(_exec_funcs[i].name == node_name) {
_suspended_point = i;
}
}
}
for(int i=0; i<_suspended_point; i++) {
auto& executer = _exec_funcs[i];
if(_suspended_point==-1) {
for(int i=0; i<_exec_funcs.size(); i++) {
if(_exec_funcs[i].name == node_name) {
_suspended_point = i;
}
}
}
for(int i=0; i<_suspended_point; i++) {
auto& executer = _exec_funcs[i];
if (RunType == OpRunType::SYNC || executer.need_sync) {
for(int i = 0; i < executer.ins.size(); i++) {
// record
Expand All @@ -491,37 +497,37 @@ void Net<Ttype, Dtype, Ptype, RunType>::execute_stop_at_node(std::string node_na
<< " " << in->valid_shape()[1]
<< " " << in->valid_shape()[2]
<< " " << in->valid_shape()[3]
<< " valid_size: " << in->valid_size()
<< " realsize: " << in->size()
<< " offset_size "<<in->get_seq_offset().size();
<< " valid_size: " << in->valid_size()
<< " realsize: " << in->size()
<< " offset_size "<<in->get_seq_offset().size();
}
for (auto out : executer.outs) {
LOG(INFO) << " |-- out tensor avg " << tensor_average(out);
}
for (auto out : executer.outs) {
LOG(INFO) << " |-- out tensor avg " << tensor_average(out);
}

#endif
if (executer.op_name != "Input") {
executer.infer_shape();
executer.launch();
}
if (executer.op_name != "Input") {
executer.infer_shape();
executer.launch();
}

for(int i = 0; i < executer.outs.size(); i++) {
executer.outs[i]->record_event(executer.ctx_p->get_compute_stream());
}
}
for(int i = 0; i < executer.outs.size(); i++) {
executer.outs[i]->record_event(executer.ctx_p->get_compute_stream());
}
}
}

template<typename Ttype, DataType Dtype, Precision Ptype, OpRunType RunType>
void Net<Ttype, Dtype, Ptype, RunType>::execute_start_from_node(std::string node_name) {
if(_start_point == -1) {
for(int i=0; i<_exec_funcs.size(); i++) {
if(_exec_funcs[i].name == node_name) {
_start_point = i;
}
}
}
for(int i=_start_point; i<_exec_funcs.size(); i++) {
auto& executer = _exec_funcs[i];
if(_start_point == -1) {
for(int i=0; i<_exec_funcs.size(); i++) {
if(_exec_funcs[i].name == node_name) {
_start_point = i;
}
}
}
for(int i=_start_point; i<_exec_funcs.size(); i++) {
auto& executer = _exec_funcs[i];
if (RunType == OpRunType::SYNC || executer.need_sync) {
for(int i = 0; i < executer.ins.size(); i++) {
// record
Expand All @@ -536,24 +542,24 @@ void Net<Ttype, Dtype, Ptype, RunType>::execute_start_from_node(std::string node
<< " " << in->valid_shape()[1]
<< " " << in->valid_shape()[2]
<< " " << in->valid_shape()[3]
<< " valid_size: " << in->valid_size()
<< " realsize: " << in->size()
<< " offset_size "<<in->get_seq_offset().size();
<< " valid_size: " << in->valid_size()
<< " realsize: " << in->size()
<< " offset_size "<<in->get_seq_offset().size();
}
for (auto out : executer.outs) {
LOG(INFO) << " |-- out tensor avg " << tensor_average(out);
}
for (auto out : executer.outs) {
LOG(INFO) << " |-- out tensor avg " << tensor_average(out);
}

#endif
if (executer.op_name != "Input") {
executer.infer_shape();
executer.launch();
}
if (executer.op_name != "Input") {
executer.infer_shape();
executer.launch();
}

for(int i = 0; i < executer.outs.size(); i++) {
executer.outs[i]->record_event(executer.ctx_p->get_compute_stream());
}
}
for(int i = 0; i < executer.outs.size(); i++) {
executer.outs[i]->record_event(executer.ctx_p->get_compute_stream());
}
}
}

template<typename Ttype, DataType Dtype, Precision Ptype, OpRunType RunType>
Expand Down Expand Up @@ -607,27 +613,27 @@ Status Net<Ttype, Dtype, Ptype, RunType>::init_memory() {
auto share_memory = [this](graph::Edge<Ttype, Dtype>& edge) {
if(edge.shared()) {
auto& edge_name = edge.share_from();
bool continue_search = true;
while(continue_search) {
auto match_edge = [&](graph::Edge<Ttype, Dtype>& inner_edge) {
if(inner_edge.name() == edge_name) {
if(inner_edge.shared()) {
edge_name = inner_edge.share_from();
return Status::EXIT(" Continue to find next . ");
}
if (inner_edge.weight()->size() < edge.weight()->valid_size()) {
auto inner_original_shape = inner_edge.weight()->valid_shape();
inner_edge.weight()->re_alloc(edge.weight()->valid_shape());
inner_edge.weight()->set_shape(inner_original_shape, inner_edge.weight()->shape());
}
edge.weight()->share_from(*(inner_edge.weight()));
continue_search = false;
return Status::EXIT(" Find the matched target edge. ");
}
return Status::OK();
};
this->_graph_p->Scanner->BFS_Edge(match_edge);
}
bool continue_search = true;
while(continue_search) {
auto match_edge = [&](graph::Edge<Ttype, Dtype>& inner_edge) {
if(inner_edge.name() == edge_name) {
if(inner_edge.shared()) {
edge_name = inner_edge.share_from();
return Status::EXIT(" Continue to find next . ");
}
if (inner_edge.weight()->size() < edge.weight()->valid_size()) {
auto inner_original_shape = inner_edge.weight()->valid_shape();
inner_edge.weight()->re_alloc(edge.weight()->valid_shape());
inner_edge.weight()->set_shape(inner_original_shape, inner_edge.weight()->shape());
}
edge.weight()->share_from(*(inner_edge.weight()));
continue_search = false;
return Status::EXIT(" Find the matched target edge. ");
}
return Status::OK();
};
this->_graph_p->Scanner->BFS_Edge(match_edge);
}
}
};
_graph_p->Scanner->BFS_Edge(share_memory);
Expand All @@ -644,8 +650,8 @@ Status Net<Ttype, Dtype, Ptype, RunType>::init_memory() {
};
this->_graph_p->Scanner->BFS_Edge(analysis_used_of_temp_mem);

this->_graph_p->statistics.template set_info<graph::TEMP_MEM>(temp_mem_in_mbytes / 1e6);
this->_graph_p->statistics.template set_info<graph::ORI_TEMP_MEM>(ori_temp_mem_in_mbytes / 1e6);
this->_graph_p->statistics.template set_info<graph::TEMP_MEM>(temp_mem_in_mbytes / 1e6);
this->_graph_p->statistics.template set_info<graph::ORI_TEMP_MEM>(ori_temp_mem_in_mbytes / 1e6);
}
return Status::OK();
}
Expand Down Expand Up @@ -700,4 +706,3 @@ template class Net<ARM, AK_FLOAT, Precision::INT8, OpRunType::SYNC>;
#endif //arm

} /* namespace anakin */

9 changes: 7 additions & 2 deletions framework/operators/fusion_ops/conv_relu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ Status ConvReluHelper<Ttype, Dtype, Ptype>::Init(OpContext<Ttype>& ctx,
const std::vector<Tensor4dPtr<Ttype, Dtype> >& ins,
std::vector<Tensor4dPtr<Ttype, Dtype> >& outs) {

SABER_CHECK(_funcs_conv_relu.init(ins, outs, _param_conv_relu, SPECIFY, SABER_IMPL, ctx));
SABER_CHECK(_funcs_conv_relu.init(ins, outs, _param_conv_relu, SPECIFY, VENDER_IMPL, ctx));
return Status::OK();
}

Expand All @@ -95,7 +95,12 @@ template <>
Status ConvReluHelper<NV, AK_FLOAT, Precision::FP32>::Init(OpContext<NV>& ctx, \
const std::vector<Tensor4dPtr<NV, AK_FLOAT> >& ins, \
std::vector<Tensor4dPtr<NV, AK_FLOAT> >& outs) {
if (_param_conv_relu.conv_param.group == 1|| (_param_conv_relu.conv_param.group == ins[0]->channel() && \
bool use_saber = true;
use_saber = use_saber && (_param_conv_relu.conv_param.weight()->height()==3);
use_saber = use_saber && (_param_conv_relu.conv_param.weight()->width()==3);
use_saber = use_saber && (_param_conv_relu.conv_param.dilation_h == 1);
use_saber = use_saber && (_param_conv_relu.conv_param.dilation_w == 1);
if (((_param_conv_relu.conv_param.group == 1) && use_saber)|| (_param_conv_relu.conv_param.group == ins[0]->channel() && \
_param_conv_relu.conv_param.group == outs[0]->channel())) {
_funcs_conv_relu.init(ins, outs, _param_conv_relu, SPECIFY, SABER_IMPL, ctx);
} else {
Expand Down

0 comments on commit d4f37cf

Please sign in to comment.