Skip to content

Commit

Permalink
test vectorizability of blas-callables with vector inputs
Browse files Browse the repository at this point in the history
  • Loading branch information
kaushikcfd committed Apr 1, 2022
1 parent 7fe71a2 commit 6ffe97a
Showing 1 changed file with 75 additions and 19 deletions.
94 changes: 75 additions & 19 deletions examples/python/call-external.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def with_types(self, arg_id_to_dtype, callables_table):

if vec_dtype.numpy_dtype == np.float32:
name_in_target = "cblas_sgemv"
elif vec_dtype. numpy_dtype == np.float64:
elif vec_dtype.numpy_dtype == np.float64:
name_in_target = "cblas_dgemv"
else:
raise LoopyError("GEMV is only supported for float32 and float64 "
Expand All @@ -47,30 +47,37 @@ def with_descrs(self, arg_id_to_descr, callables_table):
assert mat_descr.shape[0] == res_descr.shape[0]
assert len(vec_descr.shape) == len(res_descr.shape) == 1
# handling only the easy case when stride == 1
assert vec_descr.dim_tags[0].stride == 1
assert mat_descr.dim_tags[1].stride == 1
assert res_descr.dim_tags[0].stride == 1

return self.copy(arg_id_to_descr=arg_id_to_descr), callables_table

def emit_call_insn(self, insn, target, expression_to_code_mapper):
from pymbolic import var
from loopy.codegen import UnvectorizableError
mat_descr = self.arg_id_to_descr[0]
vec_descr = self.arg_id_to_descr[1]
res_descr = self.arg_id_to_descr[-1]
m, n = mat_descr.shape
ecm = expression_to_code_mapper

if ecm.codegen_state.vectorization_info is not None:
raise UnvectorizableError("cannot vectorize BLAS-gemv.")

mat, vec = insn.expression.parameters
result, = insn.assignees

c_parameters = [var("CblasRowMajor"),
var("CblasNoTrans"),
m, n,
1,
1, # alpha
ecm(mat).expr,
1,
1, # LDA
ecm(vec).expr,
1,
vec_descr.dim_tags[0].stride, # INCX
1, # beta
ecm(result).expr,
1]
res_descr.dim_tags[0].stride # INCY
]
return (var(self.name_in_target)(*c_parameters),
False # cblas_gemv does not return anything
)
Expand All @@ -83,17 +90,66 @@ def generate_preambles(self, target):
# }}}


n = 10
def transform_1(knl):
return knl


def transform_2(knl):
# A similar transformation is applied to kernels containing
# SLATE <https://www.firedrakeproject.org/firedrake.slate.html>
# callables.
knl = lp.split_iname(knl, "e", 4, inner_iname="e_inner", slabs=(0, 1))
knl = lp.privatize_temporaries_with_inames(knl, "e_inner")
knl = lp.tag_inames(knl, {"e_inner": "vec"})
if 0:
# Easy codegen exercise, but misses vectorizing certain instructions.
knl = lp.tag_array_axes(knl, "tmp3", "c,vec")
else:
knl = lp.tag_array_axes(knl, "tmp3,tmp2", "c,vec")
return knl


def main():

knl = lp.make_kernel(
"{:}",
knl = lp.make_kernel(
"{[e,i1,i2]: 0<=e<n and 0<=i1,i2<4}",
"""
y[:] = gemv(A[:, :], x[:])
""", [
lp.GlobalArg("A", dtype=np.float64, shape=(n, n)),
lp.GlobalArg("x", dtype=np.float64, shape=(n, )),
lp.GlobalArg("y", shape=(n, )), ...],
target=CTarget())

knl = lp.register_callable(knl, "gemv", CBLASGEMV(name="gemv"))
print(lp.generate_code_v2(knl).device_code())
for e
for i1
tmp1[i1] = 3*x[e, i1]
end
tmp2[:] = matvec(A[:, :], tmp1[:])
for i2
<> tmp3[i2] = 2 * tmp2[i2]
out[e, i2] = tmp3[i2]
end
end
""",
kernel_data=[
lp.TemporaryVariable("tmp1",
shape=(4, ),
dtype=None),
lp.TemporaryVariable("tmp2",
shape=(4, ),
dtype=None),
lp.GlobalArg("A",
shape=(4, 4),
dtype="float64"),
lp.GlobalArg("x",
shape=lp.auto,
dtype="float64"),
...],
target=lp.CVectorExtensionsTarget(),
lang_version=(2018, 2))

knl = lp.register_callable(knl, "matvec", CBLASGEMV("matvec"))

for transform_func in [transform_1, transform_2]:
knl = transform_func(knl)
print("Generated code from '{transform_func.__name__} -----'")
print(lp.generate_code_v2(knl).device_code())
print(75 * "-")


if __name__ == "__main__":
main()

0 comments on commit 6ffe97a

Please sign in to comment.