Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions lib/api/blas/clBLAS.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ extern "C"
cl_uint numCommandQueues, cl_command_queue *commandQueues,\
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events)\
{\
if(incx < 0) return clblasSuccess;\
sc::array x((sc::int_t)N, TYPE_ISAAC, sc::driver::Buffer(mx,false), (sc::int_t)offx, incx);\
execute(sc::assign(x, alpha*x), x.context(), numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
return clblasSuccess;\
Expand Down Expand Up @@ -143,6 +144,11 @@ extern "C"
cl_mem /*scratchBuff*/, cl_uint numCommandQueues, cl_command_queue *commandQueues,\
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events)\
{\
if(incx <= 0) {\
sc::array sum((sc::int_t)offAsum + 1, TYPE_ISAAC, sc::driver::Buffer(asum, false), (sc::int_t)offAsum, 1);\
sum[0] = 0;\
return clblasSuccess;\
}\
sc::array x((sc::int_t)N, TYPE_ISAAC, sc::driver::Buffer(mx, false), (sc::int_t)offx, incx);\
sc::scalar s(TYPE_ISAAC, sc::driver::Buffer(asum, false), (sc::int_t)offAsum);\
execute(sc::assign(s, sum(abs(x))), s.context(), numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
Expand All @@ -167,7 +173,7 @@ extern "C"
{\
if(order==clblasRowMajor){\
std::swap(M, N);\
transA = (transA==clblasTrans)?clblasNoTrans:clblasTrans;\
transA = (transA==clblasTrans || transA==clblasConjTrans)?clblasNoTrans:clblasTrans;\
}\
sc::array A((sc::int_t)M, (sc::int_t)N, TYPE_ISAAC, sc::driver::Buffer(mA, false), (sc::int_t)offA, (sc::int_t)lda);\
\
Expand All @@ -177,7 +183,7 @@ extern "C"
sc::array y(sy, TYPE_ISAAC, sc::driver::Buffer(my, false), (sc::int_t)offy, incy);\
\
sc::driver::Context const & context = A.context();\
if(transA==clblasTrans)\
if(transA==clblasTrans || transA==clblasConjTrans)\
execute(sc::assign(y, alpha*dot(A.T, x) + beta*y), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
else\
execute(sc::assign(y, alpha*dot(A, x) + beta*y), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
Expand Down Expand Up @@ -226,11 +232,11 @@ extern "C"
sc::array C((sc::int_t)M, (sc::int_t)N, TYPE_ISAAC, sc::driver::Buffer(mC, false), (sc::int_t)offC, (sc::int_t)ldc);\
sc::driver::Context const & context = C.context();\
/*Operation*/\
if((transA==clblasTrans) && (transB==clblasTrans))\
if((transA==clblasTrans || transA==clblasConjTrans) && (transB==clblasTrans || transB==clblasConjTrans))\
execute(sc::assign(C, alpha*dot(A.T, B.T) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
else if((transA==clblasTrans) && (transB==clblasNoTrans))\
else if((transA==clblasTrans || transA==clblasConjTrans) && (transB==clblasNoTrans))\
execute(sc::assign(C, alpha*dot(A.T, B) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
else if((transA==clblasNoTrans) && (transB==clblasTrans))\
else if((transA==clblasNoTrans) && (transB==clblasTrans || transB==clblasConjTrans))\
execute(sc::assign(C, alpha*dot(A, B.T) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
else\
execute(sc::assign(C, alpha*dot(A, B) + beta*C), context, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
Expand Down
11 changes: 5 additions & 6 deletions lib/jit/generation/elementwise_1d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "tools/vector_types.hpp"
#include "tools/arguments.hpp"


#include <string>

namespace isaac
Expand Down Expand Up @@ -71,7 +72,7 @@ std::string elementwise_1d::generate_impl(std::string const & suffix, expression
stream << "{" << std::endl;
stream.inc_tab();
}

stream << tools::join(negative_inc_process(device, symbols, tree), " ") << std::endl;
element_wise_loop_1D(stream, vwidth_, "i", "N", "$GLOBAL_IDX_0", "$GLOBAL_SIZE_0", [&](unsigned int vwidth)
{
std::string dtype = append_width("#scalartype",vwidth);
Expand All @@ -83,27 +84,25 @@ std::string elementwise_1d::generate_impl(std::string const & suffix, expression
//Load to registers
for(symbolic::leaf* sym: symbolic::extract<symbolic::leaf>(tree, symbols, assignments_rhs, false))
stream << sym->process(dtype + " #name = " + append_width("loadv", vwidth) + "(i);") << std::endl;

//Compute
for(size_t idx: assignments)
for(unsigned int s = 0 ; s < vwidth ; ++s)
stream << symbols.at(idx)->evaluate({{"leaf", access_vector_type("#name", s, vwidth)}}) << ";" << std::endl;
stream << symbols.at(idx)->evaluate({{"leaf", access_vector_type("#name", s, vwidth)}}) << ";" << std::endl;

//Writes back
for(symbolic::leaf* sym: symbolic::extract<symbolic::leaf>(tree, symbols, assignments_lhs, false))
for(unsigned int s = 0 ; s < vwidth ; ++s)
stream << sym->process("at(i+" + tools::to_string(s)+") = " + access_vector_type("#name", s, vwidth) + ";") << std::endl;
stream << sym->process("at(i+" + tools::to_string(s)+") = " + access_vector_type("#name", s, vwidth) + ";") << std::endl;
});
//Close user-provided for-loops
if(sfors.size()){
stream.dec_tab();
stream << "}" << std::endl;
}

stream.dec_tab();
stream << "}" << std::endl;

// std::cout << stream.str() << std::endl;
// std::cout << stream.str() << std::endl;
return stream.str();
}

Expand Down
1 change: 1 addition & 0 deletions lib/jit/generation/elementwise_2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ std::string elementwise_2d::generate_impl(std::string const & suffix, expression
stream << "{" << std::endl;
stream.inc_tab();

stream << tools::join(negative_inc_process(device, symbols, tree), " ") << std::endl;
element_wise_loop_1D(stream, 1, "i", "M", "$GLOBAL_IDX_0", "$GLOBAL_SIZE_0", [&](unsigned int){
element_wise_loop_1D(stream, 1, "j", "N", "$GLOBAL_IDX_1", "$GLOBAL_SIZE_1", [&](unsigned int){
//Declares register to store results
Expand Down
4 changes: 3 additions & 1 deletion lib/jit/generation/reduce_1d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ std::string reduce_1d::generate_impl(std::string const & suffix, expression_tree
stream << rd->process("#scalartype #name_acc = " + neutral_element(rd->op(), backend, "#scalartype") + ";") << std::endl;
}
}
stream << tools::join(negative_inc_process(device, symbols, tree), " ") << std::endl;
element_wise_loop_1D(stream, vwidth_, "i", "N", "$GLOBAL_IDX_0", "$GLOBAL_SIZE_0", [&](unsigned int vwidth)
{
std::string dtype = append_width("#scalartype",vwidth);
Expand Down Expand Up @@ -195,6 +196,7 @@ std::string reduce_1d::generate_impl(std::string const & suffix, expression_tree
stream << "{" << std::endl;
stream.inc_tab();
unroll_tmp();
stream << tools::join(negative_inc_process(device, symbols, tree), " ") << std::endl;
//Declarations
stream << "unsigned int lid = $LOCAL_IDX_0;" << std::endl;
stream << "unsigned int lsize = $LOCAL_SIZE_0;" << std::endl;
Expand All @@ -210,7 +212,7 @@ std::string reduce_1d::generate_impl(std::string const & suffix, expression_tree
else
{
stream << rd->process("$LOCAL #scalartype #name_buf[" + tools::to_string(ls0_) + "];") << std::endl;
stream << rd->process("#scalartype #name_acc = " + neutral_element(rd->op(), backend, "#scalartype") + ";");
stream << rd->process("#scalartype #name_acc = " + neutral_element(rd->op(), backend, "#scalartype") + ";");
}
}
//Private reduction
Expand Down
4 changes: 3 additions & 1 deletion lib/jit/generation/reduce_2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ std::string reduce_2d::generate_impl(std::string const & suffix, expression_tree
std::ostringstream upper;
upper << "(M +" << ls1_ - 1 << ")/" << ls1_ << "*" << ls1_;

stream << tools::join(reduce_2d_negative_inc_process(device, symbols, tree), " ") << std::endl;
element_wise_loop_1D(stream, (reduction_type_==REDUCE_ROWS)?1:1, "r", upper.str(), "$GLOBAL_IDX_1", "$GLOBAL_SIZE_1", [&](unsigned int cwidth)
{
//Declare Buffers
Expand Down Expand Up @@ -212,6 +213,7 @@ std::string reduce_2d::generate_impl(std::string const & suffix, expression_tree
stream << "{" << std::endl;
stream.inc_tab();
unroll_tmp();
stream << tools::join(reduce_2d_negative_inc_process(device, symbols, tree), " ") << std::endl;
for (symbolic::reduce_2d* rd : reductions)
stream << rd->process("$LOCAL #scalartype #name_buf[" + to_string(ls1_*ldls) + "];") << std::endl;
stream << "for($SIZE_T r = $GLOBAL_IDX_1; r < (M +" << ls1_ - 1 << ")/" << ls1_ << "*" << ls1_ << "; r += " << GlobalSize1(backend) << "){" << std::endl;
Expand Down Expand Up @@ -265,7 +267,7 @@ std::string reduce_2d::generate_impl(std::string const & suffix, expression_tree
stream << "}" << std::endl;
}

// std::cout << stream.str() << std::endl;
// std::cout << stream.str() << std::endl;
return stream.str();
}

Expand Down
60 changes: 59 additions & 1 deletion lib/jit/generation/tools/arguments.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,11 @@ inline std::vector<std::string> kernel_arguments(driver::Device const &, symboli
result.push_back(sym->process("#scalartype #name_value"));
if(symbolic::buffer* sym = dynamic_cast<symbolic::buffer*>(obj))
{
result.push_back("$GLOBAL " + sym->process("#scalartype* #pointer"));
std::string pointer_name = sym->process("#scalartype* #pointer");
if(sym->hasattr("inc0") && !sym->hasattr("inc1"))
result.push_back("$GLOBAL " + pointer_name+"_bk");
else
result.push_back("$GLOBAL " + pointer_name);
if(sym->hasattr("off")) result.push_back("$SIZE_T " + sym->process("#off"));
if(sym->hasattr("inc0")) result.push_back("$SIZE_T " + sym->process("#inc0"));
if(sym->hasattr("inc1")) result.push_back("$SIZE_T " + sym->process("#inc1"));
Expand All @@ -59,5 +63,59 @@ inline std::vector<std::string> kernel_arguments(driver::Device const &, symboli
}


inline std::vector<std::string> negative_inc_process(driver::Device const &, symbolic::symbols_table const & symbols, expression_tree const & expressions)
{
std::vector<std::string> result;
for(symbolic::object* obj: symbolic::extract<symbolic::object>(expressions, symbols))
{
if(symbolic::buffer* sym = dynamic_cast<symbolic::buffer*>(obj))
if( sym->hasattr("inc0") && ! sym->hasattr("inc1"))
{
std::string pointer = sym->process("#scalartype* #pointer");
{
int pointer_pos = pointer.find_first_of(" ");
std::string pointer_name = pointer.substr(pointer_pos+1, pointer.length());
std::string inc0 = sym->process("#inc0");
std::string type = pointer.substr(0,pointer_pos);
std::string pointer_dec = "__global " + type + " " + pointer_name;
std::string pointer_def = pointer_dec + " = " + pointer_name + "_bk;";
std::string judge = " if(" + inc0 + " < 0)";
std::string re = pointer_def + "\n"+judge + "\n" + " " + pointer_name + " += (1-N) * " + inc0+";\n";
result.push_back(re);
}
}
}
return result;
}

inline std::vector<std::string> reduce_2d_negative_inc_process(driver::Device const &, symbolic::symbols_table const & symbols, expression_tree const & expressions)
{
std::vector<std::string> result;
for(symbolic::object* obj: symbolic::extract<symbolic::object>(expressions, symbols))
{
if(symbolic::buffer* sym = dynamic_cast<symbolic::buffer*>(obj))
if( sym->hasattr("inc0") && ! sym->hasattr("inc1"))
{
std::string pointer = sym->process("#scalartype* #pointer");
{
int pointer_pos = pointer.find_first_of(" ");
std::string pointer_name = pointer.substr(pointer_pos+1, pointer.length());
std::string inc0 = sym->process("#inc0");
std::string type = pointer.substr(0,pointer_pos);
std::string pointer_dec = "__global " + type + " " + pointer_name;
std::string pointer_def = pointer_dec + " = " + pointer_name + "_bk;";
std::string judge = " if(" + inc0 + " < 0)";
std::string re;
if(pointer.find("obj3") == std::string::npos )
re = pointer_def + "\n"+judge + "\n" + " " + pointer_name + " += (1-M) * " + inc0+";\n";
else
re = pointer_def + "\n"+judge + "\n" + " " + pointer_name + " += (1-N) * " + inc0+";\n";
result.push_back(re);
}
}
}
return result;
}

}
}
4 changes: 2 additions & 2 deletions lib/jit/generation/tools/loop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ inline void element_wise_loop_1D(kernel_generation_stream & stream, unsigned int
std::string init = domain_id + "*" + svwidth;
std::string lbound = bound + "/" + svwidth + "*" + svwidth;
std::string inc = domain_size + "*" + svwidth;
stream << "for(unsigned int " << i << " = " << init << "; " << i << " < " << lbound << "; " << i << " += " << inc << ")" << std::endl;
stream << "for(int " << i << " = " << init << "; " << i << " < " << lbound << "; " << i << " += " << inc << ")" << std::endl;
stream << "{" << std::endl;
stream.inc_tab();
generate_body(vwidth);
Expand All @@ -47,7 +47,7 @@ inline void element_wise_loop_1D(kernel_generation_stream & stream, unsigned int

if (vwidth>1)
{
stream << "for(unsigned int " << i << " = " << lbound << " + " << domain_id << "; " << i << " < " << bound << "; " << i << " += " + domain_size + ")" << std::endl;
stream << "for(int " << i << " = " << lbound << " + " << domain_id << "; " << i << " < " << bound << "; " << i << " += " + domain_size + ")" << std::endl;
stream << "{" << std::endl;
stream.inc_tab();
generate_body(1);
Expand Down
5 changes: 4 additions & 1 deletion lib/jit/syntax/engine/object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,10 @@ buffer::buffer(driver::Context const & context, std::string const & scalartype,
macros_.insert(make_broadcast(shape));

add_base("buffer");
add_load(strides[0]==1 && shape[0]>1);
// add_load(strides[0]==1 && shape[0]>1);
// stride==1 would result in "vloadn"'s use in kernel,if a kernel is generated with stride==1,
// it can't run samples whose stride<0,so vloadn can't be used in kernels.
add_load(false);
}

//
Expand Down