We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent acf84de commit bd01434Copy full SHA for bd01434
csrc/nv_internal/tensorrt_llm/thop/fp4Quantize.h
@@ -33,6 +33,7 @@ void fp4_quantize(Tensor self, Optional<Tensor> const& globalScale, Tensor value
33
34
void fp4_batched_quantize(Tensor self, Optional<Tensor> const& mask, Tensor globalScale,
35
Tensor valueE2M1, Tensor scaleFP8SF, int64_t sfVecSize, bool sfUseUE8M0);
36
+
37
void silu_and_mul_fp4_batched_quantize(Tensor const& self, Tensor const& mask,
38
Tensor const& globalScale, Tensor valueE2M1,
39
Tensor scaleFP8SF, int64_t sfVecSize);
0 commit comments