-
Notifications
You must be signed in to change notification settings - Fork 35
/
functions.h
1958 lines (1767 loc) · 123 KB
/
functions.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/**
* @file
* @author OpenComputeLab
* @copyright (c) 2023, OpenComputeLab.
*/
#ifndef _PROJECT_DIOPERATOR_INTERFACE_FUNCTIONS_H_
#define _PROJECT_DIOPERATOR_INTERFACE_FUNCTIONS_H_
#include <diopi/diopirt.h>
#if defined(__cplusplus)
extern "C" {
#endif // __cplusplus
typedef enum {
ReductionNone,
ReductionMean,
ReductionSum,
ReductionEND
} diopiReduction_t;
typedef enum {
RoundModeNone,
RoundModeTrunc,
RoundModeFloor,
RoundModeEND
} diopiRoundMode_t;
typedef struct {
diopiDtype_t stype;
union {
double fval;
int64_t ival;
};
} diopiScalar_t;
/**
* \brief get the vendor's name who implements the functions
*/
DIOPI_RT_API const char* diopiGetVendorName();
DIOPI_RT_API const char* diopiGetImplVersion();
DIOPI_RT_API const char* diopiGetLastErrorString();
/**
* @brief Applies a 2D convolution over an input image composed of several input planes.
* @param[in] ctx Context environment.
* @param input the input tensor. type = [float32, float16, float64].
* @param weight the weight tensor; dimension of kernel_size must match the number of input spatial dimensions.
* type = [float32, float16, float64].
* @param bias bias tensor. type = [float32, float16, float64].
* @param stride an array with dimension matching the number of input spatial dimensions. type = [int32, int64].
* @param padding an array with dimension matching the number of input spatial dimensions. type = [int32, int64].
* @param dilation an array with dimension matching the number of input spatial dimensions. type = [int32, int64].
* @param groups number of groups for grouped convolution. type = [int32, int64].
* @param[out] out the result tensor. type = [float32, float16, float64].
*/
DIOPI_API diopiError_t diopiConvolution2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiSize_t stride,
diopiSize_t padding, diopiSize_t dilation, int64_t groups);
/**
* @brief Backward pass for convolution2d. Computes gradients for input, weight, and bias.
* @param[in] grad_output the grad tensor of output. type = [float32, float16, float64].
* @param bias_sizes an array, indicates that a bias was used in the forward pass and contains the shape of the bias. type = [int32, int64].
* @param transposed indicating whether the convolution is transposed. type = [bool].
* @param output_padding an array, dimension == number of input spatial dimensions; only supported when transposed is true. type = [int32, int64].
* @param[out] grad_input the grad of input. type = [float32, float16, float64].
* @param grad_weight the grad of weight. type = [float32, float16, float64].
* @param grad_bias the grad of bias. type = [float32, float16, float64].
* @sa Other parameters refer to diopiConvolution2d().
*/
DIOPI_API diopiError_t diopiConvolution2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight,
diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input,
diopiConstTensorHandle_t weight, diopiSize_t *bias_sizes, diopiSize_t stride, diopiSize_t padding,
diopiSize_t dilation, bool transposed, diopiSize_t output_padding, int64_t groups);
/**
* @brief Applies Batch Normalization for each channel across a batch of data.
* @param[in] ctx Context environment.
* @param input input tensor. type = [float32, float16, float64].
* @param weight weight tensor. type = [float32, float16, float64].
* @param bias bias tensor. type = [float32, float16, float64].
* @param running_mean weighted average tensor. type = [float32, float16, float64].
* @param running_var weighted variance tensor. type = [float32, float16, float64].
* @param training check if in training mode. type = [bool].
* @param momentum Used to calculate the running mean and variance during runtime. type = [float32, float64]
* @param eps The value added to the denominator during batch normalization to ensure numerical stability. type = [float32, float64]
* @param[out] out normalized result. type = [float32, float16, float64].
* @param save_mean Mean tensor,the mean value for each feature channel of the input tensor. type = [float32, float16, float64].
* @param save_invstd Backup of inverse standard deviation computed during training. type = [float32, float16, float64].
*/
DIOPI_API diopiError_t diopiBatchNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t save_mean,
diopiTensorHandle_t save_invstd, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight,
diopiConstTensorHandle_t bias, diopiTensorHandle_t running_mean,
diopiTensorHandle_t running_var, bool training, double momentum, double eps);
/**
* @brief compute the backward pass of batch normalization
* @param[in] grad_output Gradient of normalized layer output, with the same shape as the forward pass output. type=[float32, float16, float64].
* @param[out] grad_input Gradient of the input data, with the same shape as the input data. type = [float32, float16, float64].
* @param grad_weight Gradient of the weight parameter, with the same shape as the weight parameter. type = [float32, float16, float64].
* @param grad_bias Gradient of the bias parameter, with the same shape as the bias parameter. type = [float32, float16, float64].
* @sa Other parameters refer to diopiBatchNorm().
*/
DIOPI_API diopiError_t diopiBatchNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight,
diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight,
diopiConstTensorHandle_t running_mean, diopiConstTensorHandle_t running_var, diopiConstTensorHandle_t save_mean,
diopiConstTensorHandle_t save_invstd, bool training, double eps);
/**
* @brief Applies the rectified linear unit function element-wise.
* @param[in] ctx Context environment.
* @param input the input tensor, type = [float32, float64].
* @param[out] out the result tensor. type = [float32, float64].
*/
DIOPI_API diopiError_t diopiRelu(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
/**
* @brief the in-place version of diopiRelu().
* @param[in] ctx Context environment.
* @param input the input tensor and will be stored result tensor.type = [float32, float64].
*/
DIOPI_API diopiError_t diopiReluInp(diopiContextHandle_t ctx, diopiTensorHandle_t input);
/**
* @brief It clips the tensor values within a range defined by the lower and upper bounds.
* Any values below the lower bound are set to the lower bound, and any values above the upper bound are set to the upper bound.
* @param[in] ctx Context environment.
* @param input the input tensor,type = [float32, float64].
* @param min_val scalar, the lower bound. type = [float32, float64].
* @param max_val scalar, the upper bound. type = [float32, float64].
* @param[out] out the output tensor. type = [float32, float64].
*/
DIOPI_API diopiError_t diopiHardtanh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
const diopiScalar_t* min_val, const diopiScalar_t* max_val);
/**
* @brief the in-place version of diopiHardtanh().
* @param input the input tensor and will be stored result tensor. type = [float32, float64].
* @sa Other parameters refer to diopiHardtanh().
*/
DIOPI_API diopiError_t diopiHardtanhInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* min_val, const diopiScalar_t* max_val);
/**
* @brief compute the backward pass of diopiHardtanhInp().
* @param[in] grad_output the grad of output. type = [float32, float64].
* @param[out] grad_input the grad of input. type = [float32, float64].
* @sa Other parameters refer to diopiHardtanh().
*/
DIOPI_API diopiError_t diopiHardtanhBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output,
diopiConstTensorHandle_t input, const diopiScalar_t* min_val, const diopiScalar_t* max_val);
DIOPI_API diopiError_t diopiHardswish(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
DIOPI_API diopiError_t diopiHardswishInp(diopiContextHandle_t ctx, diopiTensorHandle_t input);
DIOPI_API diopiError_t diopiHardswishBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input);
DIOPI_API diopiError_t diopiThreshold(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
const diopiScalar_t* threshold, const diopiScalar_t* value);
DIOPI_API diopiError_t diopiThresholdInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* threshold, const diopiScalar_t* value);
DIOPI_API diopiError_t diopiThresholdBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output,
diopiConstTensorHandle_t input, const diopiScalar_t* threshold);
/**
* @brief Applies the gaussian error linear unit function element-wise
* @param[in] ctx Context environment.
* @param input the input tensor. type = [float32, float64].
* @param approximate Whether to use an approximate estimation. If it equals to "tanh", it will use an approximate estimation.
* @param[out] out theout put tensor. type = [float32, float64].
*/
DIOPI_API diopiError_t diopiGelu(diopiContextHandle_t ctx, diopiTensorHandle_t out,
diopiConstTensorHandle_t input, const char* approximate);
/**
* @brief compute the backward pass of diopiGelu().
* @param[in] grad_output the grad of output. type = [float32, float64].
* @param[out] grad_input the grad of input. type = [float32, float64].
* @sa Other parameters refer to diopiHardtanh().
*/
DIOPI_API diopiError_t diopiGeluBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output,
diopiConstTensorHandle_t input, const char* approximate);
/**
* @brief Applies element-wise, LeakyReLU(x) = max(0,x) + negative_slope*min(0,x)
* @param[in] ctx Context environment.
* @param input the input tensor. type = [float32, float64].
* @param negative_slope Controls the angle of the negative slope.
* @param[out] out the output tensor. type = [float32, float64].
*/
DIOPI_API diopiError_t diopiLeakyRelu(diopiContextHandle_t ctx, diopiTensorHandle_t out,
diopiConstTensorHandle_t input, const diopiScalar_t* negative_slope);
/**
* @brief the in-place version of diopiLeakyRelu().
* @param[in] input the input and output tensor and will be stored result tensor. type = [float32, float64].
* @sa Other parameters refer to diopiLeakyRelu().
*/
DIOPI_API diopiError_t diopiLeakyReluInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* negative_slope);
/**
* @brief compute the backward pass of diopiLeakyRelu().
* @param[in] grad_output the grad of output. type = [float32, float64].
* @param input_is_result boolean, type = [bool].
* @param[out] grad_input the grad of input. type = [float32, float64].
* @sa Other parameters refer to diopiLeakyRelu().
*/
DIOPI_API diopiError_t diopiLeakyReluBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output,
diopiConstTensorHandle_t input, const diopiScalar_t* negative_slope, bool input_is_result);
/**
* @brief Applies 2D average-pooling operation in kH×kW regions by step size sH×sW steps.
* @param[in] ctx Context environment.
* @param input input tensor, type = [float32, float64]
* @param kernel_size an array, the size of the pooling region. type = [int32, int64].
* @param stride an array, the stride of the pooling operation. type = [int32, int64].
* @param padding an array. type = [int32, int64].
* @param ceil_mode boolean, when set to True, uses ceil instead of floor in the formula to compute the output shape. type = [bool].
* @param count_include_pad boolean, when True, zero-padding will be included in the mean calculation. type = [bool].
* @param divisor_override If specified, it will be used as the divisor when computing the average pooling,
* otherwise the default is to divide by the total number of pooling elements.
* @param[out] out the output tensor. type = [float32, float64].
*/
DIOPI_API diopiError_t diopiAvgPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode,
bool count_include_pad, const int64_t* divisor_override);
/**
* @brief compute the backward pass of diopiAvgPool2d().
* @param[in] grad_output the grad of output. type = [float32, float64].
* @param[out] grad_input the grad of input. type = [float32, float64].
* @sa Other parameters refer to diopiAvgPool2d().
*/
DIOPI_API diopiError_t diopiAvgPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input,
diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input,
diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode,
bool count_include_pad, const int64_t* divisor_override);
/**
* @brief Applies a 2D max pooling over an input signal composed of several input planes
* @param[in] ctx Context environment.
* @param input the input tensor. type = [float16, float32]
* @param kernel_size an array, size of the pooling region. type = [int32, int64].
* @param stride an array, stride of the pooling operation. type = [int32, int64].
* @param padding an array, implicit negative infinity padding on both sides of the input tensor, its value should be >= 0 and <= kernel_size / 2. type = [int32, int64].
* @param dilation an array, spacing between the elements within the sliding window, its value should be greater than 0. type = [int32, int64].
* @param ceil_mode boolean, if True, use ceil instead of the default floor operation when computing the output shape.
* This ensures that every element in the input tensor is covered by a sliding window. type = [bool].
* @param[out] out the output tensor. type = [float16, float32].
*/
DIOPI_API diopiError_t diopiMaxPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode);
/**
* @brief With indices, applies a 2D max pooling over an input signal composed of several input planes
* @param[in] ctx Context environment.
* @param indices It contains the flattened index positions of each maximum value in the max pooling operation. type = [int32, int64].
* @sa Other parameters refer to diopiMaxPool2d().
*/
DIOPI_API diopiError_t diopiMaxPool2dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices,
diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride,
diopiSize_t padding, diopiSize_t dilation, bool ceil_mode);
/**
* @brief compute the backward pass of diopiMaxPool2d().
* @param[in] grad_output the grad of output. type = [float16, float32].
* @param[out] grad_input the grad of input. type = [float16, float32].
* @sa Other parameters refer to diopiMaxPool2d().
*/
DIOPI_API diopiError_t diopiMaxPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output,
diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding,
diopiSize_t dilation, bool ceil_mode, diopiConstTensorHandle_t indices);
/**
* @brief Applies a 2D adaptive average pooling over an input signal composed of several input planes.
* @param[in] ctx Context environment.
* @param input the input tensor. type = [float16, float32, float64]
* @param output_size an array, the size of the output tensor. type = [int32, int64].
* @param[out] out the output tensor. type = [float16, float32, float64].
*/
DIOPI_API diopiError_t diopiAdaptiveAvgPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out,
diopiConstTensorHandle_t input, diopiSize_t output_size);
/**
* @brief compute the backward pass of diopiAdaptiveAvgPool2d().
* @param[in] grad_output the grad of output. type = [float16, float32, float64].
* @param[out] grad_input the grad of input. type = [float16, float32, float64].
* @sa Other parameters refer to diopiAdaptiveAvgPool2d().
*/
DIOPI_API diopiError_t diopiAdaptiveAvgPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input,
diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input);
/**
* @brief Applies a 2D adaptive max pooling over an input signal composed of several input planes.
* @param[in] ctx Context environment.
* @param input the input tensor. type = [float32, float16, float64]
* @param output_size an array, the size of the output tensor. type = [int32, int64].
* @param[out] out the output tensor. type = [float32, float16, float64].
*/
DIOPI_API diopiError_t diopiAdaptiveMaxPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out,
diopiConstTensorHandle_t input, diopiSize_t output_size);
DIOPI_API diopiError_t diopiAdaptiveMaxPool2dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices,
diopiConstTensorHandle_t input, diopiSize_t output_size);
/**
* @brief compute the backward pass of diopiAdaptiveMaxPool2d().
* @param[in] grad_output the grad of output. type = [float32, float16, float64].
* @param[out] grad_input the grad of input. type = [float32, float16, float64].
* @sa Other parameters refer to diopiAdaptiveMaxPool2d().
*/
DIOPI_API diopiError_t diopiAdaptiveMaxPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output,
diopiConstTensorHandle_t input, diopiConstTensorHandle_t indices);
/**
* @brief Randomly zeroes some of the elements of the input tensor with probability p using samples from a Bernoulli distribution.
* @param[in] ctx Context environment.
* @param input the input tensor, type = [float32, float64].
* @param p the probability of an element in the input tensor being zeroed out. type = [float32, float64].
* @param train boolean, whether the module is in training mode. When set to False, the dropout operation will not be performed. type = [bool].
* @param[out] out the output tensor. type = [float32, float64].
* @param mask A binary mask tensor of the same shape as the input tensor, where each element's value is either 0 or 1,
* indicating whether the corresponding neuron at that position is dropped or not. type = [int32].
*/
DIOPI_API diopiError_t diopiDropout(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t mask,
diopiConstTensorHandle_t input, double p, bool train);
/**
* @brief the in-place version of diopiDropout().
* @param[in] input the input tensor and will be stored result tensor. type = [float32, float64].
* @sa Other parameters refer to diopiDropout().
*/
DIOPI_API diopiError_t diopiDropoutInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiTensorHandle_t mask,
double p, bool train);
/**
* @brief Measures the element-wise mean squared error
* @param[in] ctx Context environment.
* @param input the input tensor. type = [float32, float64].
* @param target the target tensor. type = [float32, float64].
* @param reduction Specifies the reduction to apply to the output.
* @param[out] out the result tensor. type = [float32, float64].
*/
DIOPI_API diopiError_t diopiMSELoss(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
diopiConstTensorHandle_t target, diopiReduction_t reduction);
/**
* @brief Measures the element-wise mean squared error
* @param[in] input the input tensor. type = [float32, float64].
* @param grad_output the grad tensor of output. type = [float32, float64].
* @param target the target tensor. type = [float32, float64].
* @param reduction Specifies the reduction to apply to the output.
* @param[out] grad_input the grad of input. type = [float32, float64].
*/
DIOPI_API diopiError_t diopiMSELossBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output,
diopiConstTensorHandle_t input, diopiConstTensorHandle_t target, diopiReduction_t reduction);
/**
* \brief
*/
DIOPI_API diopiError_t diopiSigmoidFocalLoss(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t inputs,
diopiConstTensorHandle_t targets, float alpha, float gamma, diopiReduction_t reduction);
DIOPI_API diopiError_t diopiSigmoidFocalLossBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_output,
diopiConstTensorHandle_t input, diopiConstTensorHandle_t target,
diopiTensorHandle_t grad_input, float gamma, float alpha, diopiReduction_t reduction);
/**
* @brief Measures thee Cross Entropy between the target and input probabilities.
* @param[in] ctx Context environment.
* @param input Input tensor representing the unnormalized scores, often referred to as logits. type = [float32, float64].
* @param target Target tensor representing the true class index or class probabilities. type = [float32, float64].
* @param weight Manual rescaling weight for each class. type = [float32, float64].
* @param reduction Specifies the reduction to apply to the output.
* @param ignore_index Specifies a target value that is to be ignored and does not contribute to the input gradient.
* Only used when targets are class indices. type = [int32, int64].
* @param label_smoothing Float value in [0.0, 1.0]. Specifies the amount of smoothing to be applied while computing the loss. type = [float32, float64]
* @param[out] out the output tensor. type = [float32, float64].
*/
DIOPI_API diopiError_t diopiCrossEntropyLoss(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
diopiConstTensorHandle_t target, diopiConstTensorHandle_t weight, diopiReduction_t reduction,
int64_t ignore_index, double label_smoothing);
/**
* @brief compute the backward pass of diopiCrossEntropyLoss().
* @param[in] grad_output the grad of output. type = [float32, float64].
* @param[out] grad_input the grad of input. type = [float32, float64].
* @sa Other parameters refer to diopiCrossEntropyLoss().
*/
DIOPI_API diopiError_t diopiCrossEntropyLossBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output,
diopiConstTensorHandle_t input, diopiConstTensorHandle_t target, diopiConstTensorHandle_t weight,
diopiReduction_t reduction, int64_t ignore_index, double label_smoothing);
/**
* @brief Measures thee nll loss between the target and input probabilities.
* @param[in] ctx Context environment.
* @param input Input tensor, usually representing log probabilities. type = [float32, float64]
* @param target Target tensor representing class indices, with values in the range of [0, C). type = [int64]
* @param weight weights manually assigned to each class. type = [float32, float64]
* @param reduction Loss reduction mode, which can be none, sum, or mean.
* @param ignore_index Specifies a target value to be ignored and does not contribute to the input gradient.
* This parameter can only be used when the target contains class indices. type = [int32, int64].
* @param[out] out the output tensor. type = [float32, float64].
*/
DIOPI_API diopiError_t diopiNLLLoss(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
diopiConstTensorHandle_t target, diopiConstTensorHandle_t weight, diopiReduction_t reduction,
int64_t ignore_index);
/**
* @brief compute the backward pass of diopiNLLLoss().
* @param[in] grad_output the grad of output. type = [float32, float64].
* @param[out] grad_input the grad of input. type = [float32, float64].
* @sa Other parameters refer to diopiNLLLoss().
*/
DIOPI_API diopiError_t diopiNLLLossBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output,
diopiConstTensorHandle_t input, diopiConstTensorHandle_t target, diopiConstTensorHandle_t weight,
diopiReduction_t reduction, int64_t ignore_index);
/**
* @brief Measures the Binary Cross Entropy between the target and input probabilities.
* @param[in] ctx Context environment.
* @param input Tensor of arbitrary shape as unnormalized scores (often referred to as logits). type = [float32, float64].
* @param target Tensor of the same shape as input with values between 0 and 1. type = [float32, float64].
* @param weight a manual rescaling weight given to the loss of each batch element. If given, has to be a Tensor of size nbatch. type = [float32, float64].
* @param pos_weight a weight of positive examples. Must be a vector with length equal to the number of classes. type = [int64].
* @param reduction Specifies the reduction to apply to the output
* @param[out] out the output tensor. type = [float32, float64].
*/
DIOPI_API diopiError_t diopiBCEWithLogits(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
diopiConstTensorHandle_t target, diopiConstTensorHandle_t weight,
diopiConstTensorHandle_t pos_weight, diopiReduction_t reduction);
/**
* @brief compute the backward pass of diopiBCEWithLogits().
* @param[in] grad_output the grad of output. type = [float32, float64].
* @param[out] grad_input the grad of input. type = [float32, float64].
* @sa Other parameters refer to diopiBCEWithLogits().
*/
DIOPI_API diopiError_t diopiBCEWithLogitsBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output,
diopiConstTensorHandle_t input, diopiConstTensorHandle_t target, diopiConstTensorHandle_t weight,
diopiConstTensorHandle_t pos_weight, diopiReduction_t reduction);
DIOPI_API diopiError_t diopiBCELoss(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t target,
diopiConstTensorHandle_t weight, diopiReduction_t reduction);
DIOPI_API diopiError_t diopiBCELossBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output,
diopiConstTensorHandle_t input, diopiConstTensorHandle_t target, diopiConstTensorHandle_t weight, diopiReduction_t reduction);
/**
* \brief Element-wise math functions
*/
DIOPI_API diopiError_t diopiSign(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
/**
* @brief the in-place version of diopiAbs().
* @param[in] input the input and output tensor and will be stored result tensor.
*/
DIOPI_API diopiError_t diopiAbsInp(diopiContextHandle_t ctx, diopiTensorHandle_t input);
/**
* @brief Computes the absolute value of each element in the input tensor element-wise.
* @param[in] ctx Context environment.
* @param input Input tensor, type = [float16, float32, float64, int16, int32, int64, uint8, int8].
* @param[out] out the output tensor. type = [float16, float32, float64, int16, int32, int64, uint8, int8].
*/
DIOPI_API diopiError_t diopiAbs(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
/**
* @brief the in-place version of diopiNeg().
* @param[in] input the input and output tensor and will be stored result tensor. type = [float16, float32, float64, int16, int32, int64, uint8, int8].
*/
DIOPI_API diopiError_t diopiNegInp(diopiContextHandle_t ctx, diopiTensorHandle_t input);
/**
* @brief Returns a new tensor with the negative of the elements of input.
* @param[in] ctx Context environment.
* @param input Input tensor, type = [float16, float32, float64, int16, int32, int64, uint8, int8].
* @param[out] out the output tensor. type = [float16, float32, float64, int16, int32, int64, uint8, int8].
*/
DIOPI_API diopiError_t diopiNeg(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
/**
* @brief the in-place version of floor.
* @param[in] ctx Context environment.
* @param input the input tensor, and will be stored result tensor. type = [float16, float32, float64].
*/
DIOPI_API diopiError_t diopiFloorInp(diopiContextHandle_t ctx, diopiTensorHandle_t input);
/**
* @brief Returns a new tensor with the floor of the elements of input, the largest integer less than or equal to each element.
* @param[in] ctx Context environment.
* @param input the input tensor. type = [float16, float32, float64].
* @param[out] out the output tensor. type = [float16, float32, float64].
*/
DIOPI_API diopiError_t diopiFloor(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
/**
* @brief the in-place version of diopiSqrt().
* @param[in] input the input and output tensor and will be stored result tensor, type = [float16, float32]
*/
DIOPI_API diopiError_t diopiSqrtInp(diopiContextHandle_t ctx, diopiTensorHandle_t input);
/**
* @brief Take the element-wise square root of the input tensor.
* @param[in] ctx Context environment.
* @param input Input tensor, type = [float16, float32].
* @param[out] out the output tensor. type = [float16, float32].
*/
DIOPI_API diopiError_t diopiSqrt(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
DIOPI_API diopiError_t diopiRsqrtInp(diopiContextHandle_t ctx, diopiTensorHandle_t input);
DIOPI_API diopiError_t diopiRsqrt(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
/**
* @brief the in-place version of diopiSin().
* @param[in] input the input and output tensor and will be stored result tensor,
* type = [float16, float32, float64, int16, int32, int64, uint8, int8].
*/
DIOPI_API diopiError_t diopiSinInp(diopiContextHandle_t ctx, diopiTensorHandle_t input);
/**
* @brief Compute the element-wise sine values of the input tensor input.
* @param[in] ctx Context environment.
* @param input Input tensor, type = [float16, float32, float64, int16, int32, int64, uint8, int8].
* @param[out] out the output tensor. type = [float16, float32, float64].
*/
DIOPI_API diopiError_t diopiSin(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
/**
* @brief the in-place version of diopiCos().
* @param[in] input the input and output tensor and will be stored result tensor,
* type = [float16, float32, float64, int16, int32, int64, uint8, int8].
*/
DIOPI_API diopiError_t diopiCosInp(diopiContextHandle_t ctx, diopiTensorHandle_t input);
/**
* @brief Compute the element-wise cosine values of the input tensor input.
* @param[in] ctx Context environment.
* @param input Input tensor, type = [float16, float32, float64, int16, int32, int64, uint8, int8].
* @param[out] out the output tensor. type = [float16, float32, float64].
*/
DIOPI_API diopiError_t diopiCos(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
/**
* @brief the in-place version of tanh.
* @param[in] ctx Context environment.
* @param input the input tensor. type = [float16, float32, float64].
*/
DIOPI_API diopiError_t diopiTanhInp(diopiContextHandle_t ctx, diopiTensorHandle_t input);
/**
* @brief Returns a new tensor with the hyperbolic tangent of the elements of input.
* @param[in] ctx Context environment.
* @param input the input tensor. type = [float16, float32, float64].
* @param[out] out the input tensor. type = [float16, float32, float64].
*/
DIOPI_API diopiError_t diopiTanh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
/**
* @brief Backward pass for tanh.
* @param[in] grad_output the grad tensor of output.
* @param output the output tensor. type = [float16, float32, float64].
* @param[out] grad_input the grad tensor of input. type = [float16, float32, float64].
*/
DIOPI_API diopiError_t diopiTanhBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input,
diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t output);
/**
* @brief the in-place version of diopiSigmoid().
* @param[in] input the input tensor and will be stroed reuslt tensor. type = [float16, float32].
*/
DIOPI_API diopiError_t diopiSigmoidInp(diopiContextHandle_t ctx, diopiTensorHandle_t input);
/**
* @brief Element-wise applies the sigmoid function to the input tensor input.
* @param[in] ctx Context environment.
* @param input the input tensor.type = [float16, float32].
* @param[out] out the output tensor. type = [float16, float32].
*/
DIOPI_API diopiError_t diopiSigmoid(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
/**
* @brief compute the backward pass of diopiSigmoid().
* @param[in] grad_output the grad of output. type = [float16, float32].
* @param output the output tensor of diopiSigmoid(). type = [float16, float32].
* @param[out] grad_input the grad of input. type = [float16, float32].
* @sa Other parameters refer to diopiSigmoid().
*/
DIOPI_API diopiError_t diopiSigmoidBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input,
diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t output);
DIOPI_API diopiError_t diopiSiluInp(diopiContextHandle_t ctx, diopiTensorHandle_t input);
DIOPI_API diopiError_t diopiSilu(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
DIOPI_API diopiError_t diopiSiluBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output,
diopiConstTensorHandle_t input);
/**
* @brief the in-place version of diopiExp().
* @param[in] input the input tensor and will be stroed reuslt tensor.
*/
DIOPI_API diopiError_t diopiExpInp(diopiContextHandle_t ctx, diopiTensorHandle_t input);
/**
* @brief Returns a new tensor with the exponential of the elements of the input tensor input
* @param[in] ctx Context environment.
* @param input the input tensor. type = [float16, float32, float64, int16, int32,
* int64, uint8, int8, bool].
* @param[out] out the output tensor. type = [float16, float32, float64].
*/
DIOPI_API diopiError_t diopiExp(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
/**
* @brief the in-place version of diopiLog().
* @param[in] input the input tensor and will be stroed reuslt tensor. type = [float16, float32, float64, int16, int32, int64, uint8, int8].
*/
DIOPI_API diopiError_t diopiLogInp(diopiContextHandle_t ctx, diopiTensorHandle_t input);
/**
* @brief Compute the element-wise natural logarithm of input tensor input.
* @param[in] ctx Context environment.
* @param input the input tensor. type = [float16, float32, float64, int16, int32, int64, uint8, int8].
* @param[out] out the output tensor. type = [float16, float32, float64].
*/
DIOPI_API diopiError_t diopiLog(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
/**
* @brief the in-place version of diopiLog2().
* @param[in] input the input tensor and will be stroed reuslt tensor. type = [float16, float32, float64, int16, int32, int64, uint8, int8].
*/
DIOPI_API diopiError_t diopiLog2Inp(diopiContextHandle_t ctx, diopiTensorHandle_t input);
/**
* @brief Compute the logarithm (base-2) of each element in the input tensor element-wise.
* @param[in] ctx Context environment.
* @param input the input tensor. type = [float16, float32, float64, int16, int32, int64, uint8, int8].
* @param[out] out the output tensor. type = [float16, float32, float64].
*/
DIOPI_API diopiError_t diopiLog2(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
DIOPI_API diopiError_t diopiLog10Inp(diopiContextHandle_t ctx, diopiTensorHandle_t input);
DIOPI_API diopiError_t diopiLog10(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
DIOPI_API diopiError_t diopiErfInp(diopiContextHandle_t ctx, diopiTensorHandle_t input);
DIOPI_API diopiError_t diopiErf(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
DIOPI_API diopiError_t diopiPowScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, const diopiScalar_t* input, diopiConstTensorHandle_t exponent);
/**
* @brief Raise each element in the input to the power of the exponent.
* @param[in] ctx Context environment.
* @param input the input tensor. type = [int32, int64, uint8, int8, int16, float32, float64, float16].
* @param exponent the value of exponent. type = [int32, int64, uint8, int8, int16, float32, float64, float16, bool].
* @param[out] out the output tensor. type = [int32, int64, uint8, int8, int16, float32, float64, float16].
*/
DIOPI_API diopiError_t diopiPow(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* exponent);
/**
* @brief the in-place version of diopiPow().
* @param[in] input the input tensor andw will be stored result tensor. type = [int32, int64, uint8, int8, int16, float32, float64, float16].
* @sa Other parameters refer to diopiPow().
*/
DIOPI_API diopiError_t diopiPowInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* exponent);
/**
* @brief Raise each element in the input to the power of the corresponding element in exponent.
* @param[in] ctx Context environment.
* @param input the input tensor. type = [int32, int64, uint8, int8, int16, float32, float64, float16].
* @param exponent the exponent tensor. type = [int32, int64, uint8, int8, int16, float32, float64, float16, bool].
* @param[out] out the output tensor. type = [int32, int64, uint8, int8, int16, float32, float64, float16].
*/
DIOPI_API diopiError_t diopiPowTensor(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t exponent);
/**
* @brief the in-place version of diopiPowTensor().
* @param[in] input the input tensor andw will be stored result tensor.
* @sa Other parameters refer to diopiPowTensor().
*/
DIOPI_API diopiError_t diopiPowInpTensor(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t exponent);
/**
* @brief This function is used to perform addition operations between tensors.
* @param[in] ctx Context environment.
* @param input the first input tensor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @param other the second input tensor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool]
* @param alpha Scaling factor, i.e., the scaling factor of the second tensor.type = [float32, float64, int32, int64].
* @param[out] out Output tensor for storing the result of the addition operation. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
*/
DIOPI_API diopiError_t diopiAdd(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
diopiConstTensorHandle_t other, const diopiScalar_t* alpha);
/**
* @brief the in-place version of diopiAdd()
* @param[in] input the first input tensor and will be stored result tensor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @sa Other parameters refer to diopiAdd().
*
*/
DIOPI_API diopiError_t diopiAddInp(diopiContextHandle_t ctx, diopiTensorHandle_t input,
diopiConstTensorHandle_t other, const diopiScalar_t* alpha);
/**
* @brief Add a scalar to a tensor.
* @param[in] other The scalar value to be added. type = [float64, float32, float16, int64, int32, int16, int8, uint8].
* @sa Other parameters refer to diopiAdd().
*/
DIOPI_API diopiError_t diopiAddScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
const diopiScalar_t* other, const diopiScalar_t* alpha);
/**
* @brief the in-place version of diopiAddScalar().
* @param[in] input the first input tensor and will be stored result tensor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @sa Other parameters refer to diopiAddScalar().
*/
DIOPI_API diopiError_t diopiAddInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input,
const diopiScalar_t* other, const diopiScalar_t* alpha);
/**
* @brief perform subtraction operations between tensors.
* @param[in] ctx Context environment.
* @param input the first input tensor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @param other the second input tensor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @param alpha Scaling factor, i.e., the scaling factor of the second tensor. type = [float32, float64, int32, int64].
* @param[out] out the output tensor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
*/
DIOPI_API diopiError_t diopiSub(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
diopiConstTensorHandle_t other, const diopiScalar_t* alpha);
/**
* @brief the in-place version of diopiSub().
* @param[in] input the first input tensor and will be stored result tensor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @sa Other parameters refer to diopiSub().
*/
DIOPI_API diopiError_t diopiSubInp(diopiContextHandle_t ctx, diopiTensorHandle_t input,
diopiConstTensorHandle_t other, const diopiScalar_t* alpha);
/**
* @brief sub a scalar to a tensor.
* @param[in] other The scalar value to be sub. type = [float64, float32, float16, int64, int32, int16, int8, uint8].
* @sa Other parameters refer to diopiSub().
*/
DIOPI_API diopiError_t diopiSubScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
const diopiScalar_t* other, const diopiScalar_t* alpha);
/**
* @brief the in-place version of diopiSubScalar().
* @param[in] input the first input tensor and will be stored result tensor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @sa Other parameters refer to diopiSub().
*/
DIOPI_API diopiError_t diopiSubInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input,
const diopiScalar_t* other, const diopiScalar_t* alpha);
/**
* @brief Multiply tensor input with other (matrix multiplication)
* @param[in] ctx Context environment.
* @param input the input tensor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @param other the second tensor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @param[out] out the output tensor.
*/
DIOPI_API diopiError_t diopiMul(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other);
DIOPI_API diopiError_t diopiMulInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t other);
DIOPI_API diopiError_t diopiMulScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* other);
DIOPI_API diopiError_t diopiMulInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* other);
/**
* @brief Divides each element of input tensor by the corresponding element in other tensor.
* @param[in] ctx Context environment.
* @param input the input tensor, dividend. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @param other the second tensor, Divisor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @param rounding_mode Rounding mode applied to the result, None: no rounding is performed, if both input and other are integer types,
* the inputs are promoted to the default scalar type; trunc: truncate towards zero; floor: round down towards negative infinity for the result of the division.
* @param[out] out the output tensor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
*/
DIOPI_API diopiError_t diopiDiv(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
diopiConstTensorHandle_t other, diopiRoundMode_t rounding_mode);
/**
* @brief the in-place version of diopiDiv().
* @param[in] input the input tensor and will be stored result tensor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @sa Other parameters refer to diopiDiv().
*/
DIOPI_API diopiError_t diopiDivInp(diopiContextHandle_t ctx, diopiTensorHandle_t input,
diopiConstTensorHandle_t other, diopiRoundMode_t rounding_mode);
/**
* @brief Divides each element of input tensor by the scalar element.
* @param[in] other float scalar, Divisor. type = [int32, int64, float32, float64].
* @sa Other parameters refer to diopiDiv().
*/
DIOPI_API diopiError_t diopiDivScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
const diopiScalar_t* other, diopiRoundMode_t rounding_mode);
/**
* @brief the in-place version of diopiDivScalar().
* @param[in] input the input tensor and will be stored result tensor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @sa Other parameters refer to diopiDivScalar().
*/
DIOPI_API diopiError_t diopiDivInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input,
const diopiScalar_t* other, diopiRoundMode_t rounding_mode);
/**
* @brief Broadcast-BLAS functions
* @param[in] ctx Context environment.
* @param input the first batch of matrices to be multiplied. type = [float16, float32, float64].
* @param mat2 the second batch of matrices to be multiplied. type = [float16, float32, float64].
* @param[out] out the output tensor. type = [float16, float32, float64].
*/
DIOPI_API diopiError_t diopiBmm(diopiContextHandle_t ctx, diopiTensorHandle_t out,
diopiConstTensorHandle_t input, diopiConstTensorHandle_t mat2);
DIOPI_API diopiError_t diopiBaddbmm(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
diopiConstTensorHandle_t batch1, diopiConstTensorHandle_t batch2, double beta, double alpha);
DIOPI_API diopiError_t diopiBaddbmmInp(diopiContextHandle_t ctx, diopiTensorHandle_t input,
diopiConstTensorHandle_t batch1, diopiConstTensorHandle_t batch2, double beta, double alpha);
/**
* @brief Performs the element-wise multiplication.
* @param[in] ctx Context environment.
* @param input the input tensor to be added. type = [float16, float32, float64].
* @param tensor1 the tensor to be multiplied. type = [float16, float32, float64].
* @param tensor2 the tensor to be multiplied. type = [float16, float32, float64].
* @param value multiplier tensor1 * tensor2, type=[float16, float32, float64].
* @param[out] out the out tensor. type=[float16, float32, float64].
*/
DIOPI_API diopiError_t diopiAddcmul(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
diopiConstTensorHandle_t tensor1, diopiConstTensorHandle_t tensor2, const diopiScalar_t* value);
/**
* @brief the in-place version of addcmul.
* @param[in] ctx Context environment.
* @param tensor1 the tensor to be multiplied. type = [float16, float32, float64].
* @param tensor2 the tensor to be multiplied. type = [float16, float32, float64].
* @param value multiplier for tensor1 * tensor2, type=[float16, float32, float64].
* @param[out] input the input tensor to be added and will be stored result tensor. type = [float16, float32, float64].
*/
DIOPI_API diopiError_t diopiAddcmulInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t tensor1, diopiConstTensorHandle_t tensor2, const diopiScalar_t* value);
/**
* @brief Matrix multiplication. The multiplication rules depend on the dimensions of the input tensors.
* @param[in] ctx Context environment.
* @param input the input tensor. type = [float32, float64].
* @param other the second tensor. type = [float32, float64].
* @param[out] out the output tensor. type = [float32, float64].
*/
DIOPI_API diopiError_t diopiMatmul(diopiContextHandle_t ctx, diopiTensorHandle_t out,
diopiConstTensorHandle_t input, diopiConstTensorHandle_t other);
/**
* @brief Performs the element-wise division.
* @param[in] ctx Context environment.
* @param input the input tensor to be added. type = [float16, float32, float64].
* @param tensor1 the numerator tensor. type = [float16, float32, float64].
* @param tensor2 the denominator tensor. type = [float16, float32, float64].
* @param value multiplier for tensor1 / tensor2, type=[float16, float32, float64].
* @param[out] out the out tensor. type = [float16, float32, float64].
*/
DIOPI_API diopiError_t diopiAddcdiv(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
diopiConstTensorHandle_t tensor1, diopiConstTensorHandle_t tensor2, const diopiScalar_t* value);
/**
* @brief the in-place version of addcdiv.
* @param[in] ctx Context environment.
* @param tensor1 the numerator tensor. type = [float16, float32, float64].
* @param tensor2 the denominator tensor. type = [float16, float32, float64].
* @param value multiplier for tensor1 / tensor2, type=[float16, float32, float64].
* @param[out] input the input tensor to be added and will be stored result tensor. type = [float16, float32, float64].
*/
DIOPI_API diopiError_t diopiAddcdivInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t tensor1, diopiConstTensorHandle_t tensor2, const diopiScalar_t* value);
/**
* @brief Performs matrix multiplication between mat1 and mat2, multiplies the result by scalar value alpha,
* adds it to input tensor beta x input.
* @param[in] ctx Context environment.
* @param input the input tensor. type = [float32, float64, float16]].
* @param mat1 the first martix. type = [float32, float64, float16].
* @param mat2 the second martix. type = [float32, float64, float16].
* @param beta scale factor of input. type = [int32, int64, float32, float64].
* @param alpha the scaling factor for the multiplication result of the tensors. type = [int32, int64, float32, float64].
* @param[out] out the output tensor. type = [float32, float64, float16].
*/
DIOPI_API diopiError_t diopiAddmm(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
diopiConstTensorHandle_t mat1, diopiConstTensorHandle_t mat2, const diopiScalar_t* beta, const diopiScalar_t* alpha);
DIOPI_API diopiError_t diopiCholesky(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t info, diopiConstTensorHandle_t mat, bool upper, bool checkerror);
DIOPI_API diopiError_t diopiCholeskyBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_mat, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t L, bool upper);
DIOPI_API diopiError_t diopiTriangularSolve(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t cloned_mat, diopiConstTensorHandle_t b,
diopiConstTensorHandle_t mat, bool upper, bool transpose, bool unitriangular);
DIOPI_API diopiError_t diopiTriangularSolveBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_b, diopiTensorHandle_t grad_mat, diopiConstTensorHandle_t grad_x, diopiConstTensorHandle_t grad_cloned_mat,
diopiConstTensorHandle_t x, diopiConstTensorHandle_t b, diopiConstTensorHandle_t mat, bool upper, bool transpose, bool unitriangular);
/**
* @brief the in-place version of diopiClampScalar().
* @param input the input tensor and will be stored result tensor. type = [float32, float64, float16, int16, int32, int64, int8].
* @sa Other parameters refer to diopiClampScalar()
*/
DIOPI_API diopiError_t diopiClampInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* min, const diopiScalar_t* max);
/**
* @brief the in-place version of diopiClamp().
* @param[in] input the input tensor and will be stored result tensor. type = [float32, float64, float16, int16, int32, int64, int8].
* @sa Other parameters refer to diopiClamp()
*/
DIOPI_API diopiError_t diopiClampInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t min, diopiConstTensorHandle_t max);
/**
* @brief Clamps all elements in input into the range [min, max]
* @param[in] ctx Context environment.
* @param input the input tensor and output tensor.type = [float32, float64, float16, int16, int32, int64, int8].
* @param min scalar, the lower-bound value. type = [float32, float64].
* @param max scalar, the upper-bound value. type = [float32, float64].
* @param[out] out the output tensor. type = [float32, float64, float16].
*/
DIOPI_API diopiError_t diopiClampScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* min, const diopiScalar_t* max);
/**
* @brief Clamps all elements in input into the range [min, max].
* @param[in] ctx Context environment.
* @param input the input tensor, type = [float32, float64, float16, int16, int32, int64, int8, uint8]
* @param min The lower-bound value tensor. type=[float32, float64].
* @param max The upper-bound value tensor. type=[float32, float64].
* @param[out] out the output tensor. type = [float32, float64, float16].
*/
DIOPI_API diopiError_t diopiClamp(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input,
diopiConstTensorHandle_t min, diopiConstTensorHandle_t max);
DIOPI_API diopiError_t diopiClampMaxInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* max);
DIOPI_API diopiError_t diopiClampMaxInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t max);
DIOPI_API diopiError_t diopiClampMaxScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* max);
DIOPI_API diopiError_t diopiClampMax(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t max);
DIOPI_API diopiError_t diopiClampMinInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* min);
DIOPI_API diopiError_t diopiClampMinInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t min);
DIOPI_API diopiError_t diopiClampMinScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* min);
DIOPI_API diopiError_t diopiClampMin(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t min);
/**
* \brief Fills elements of self tensor with value.
*/
DIOPI_API diopiError_t diopiFill(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* value);
/**
* @brief Computes the element-wise logical AND of the given input tensors.
* @param[in] ctx Context environment.
* @param input the first tensor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @param other the second tesnor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool]
* @param[out] out the output tensor. type = [bool].
*/
DIOPI_API diopiError_t diopiLogicalAnd(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other);
/**
* @brief the in-place version of diopiLogicalAnd().
* @param[in] input the input tensor and will be stored result tensor.
* @sa Other parameters refer to diopiLogicalAnd().
*/
DIOPI_API diopiError_t diopiLogicalAndInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t other);
/**
* @brief Computes the element-wise logical OR of the given input tensors.
* @param[in] ctx Context environment.
* @param input the first tensor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @param other the second tesnor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @param[out] out the output tensor. type = [bool].
*/
DIOPI_API diopiError_t diopiLogicalOr(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other);
/**
* @brief the in-place version of diopiLogicalOr().
* @param[in] input the input tensor and will be stored result tensor.
* @sa Other parameters refer to diopiLogicalOr().
*/
DIOPI_API diopiError_t diopiLogicalOrInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t other);
DIOPI_API diopiError_t diopiLogicalNot(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
DIOPI_API diopiError_t diopiLogicalNotInp(diopiContextHandle_t ctx, diopiTensorHandle_t input);
/**
* \brief Computes the bitwise AND/OR/NOT of the given input tensors.
*/
DIOPI_API diopiError_t diopiBitwiseAnd(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other);
DIOPI_API diopiError_t diopiBitwiseAndInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t other);
DIOPI_API diopiError_t diopiBitwiseAndScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* other);
DIOPI_API diopiError_t diopiBitwiseAndInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* other);
DIOPI_API diopiError_t diopiBitwiseOr(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other);
DIOPI_API diopiError_t diopiBitwiseOrInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t other);
DIOPI_API diopiError_t diopiBitwiseOrScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* other);
DIOPI_API diopiError_t diopiBitwiseOrInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* other);
/**
* @brief Computes the bitwise NOT of the given input tensor. The input tensor must be of integral or Boolean types. For bool tensors, it computes the logical NOT.
* @param[in] ctx Context environment.
* @param input the input tensor, type=[int16, int32, int64, uint8, int8, bool].
* @param[out] out the result tensor. type=[int16, int32, int64, uint8, int8, bool].
*/
DIOPI_API diopiError_t diopiBitwiseNot(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
/**
* @brief the in-place version of diopiBitwiseNot.
* @param[in] ctx Context environment. type=[int16, int32, int64, uint8, int8, bool].
* @param input the input tensor and will be stored result tensor. type=[int16, int32, int64, uint8, int8, bool].
*/
DIOPI_API diopiError_t diopiBitwiseNotInp(diopiContextHandle_t ctx, diopiTensorHandle_t input);
/**
* @brief Computes equal element-wise comparison with a scalar, ">=".
* @param[in] ctx Context environment.
* @param input the first tensor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @param other the scalar to be compared. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @param[out] out the output tensor.Each element has a boolean value, i.e. either false or true. type = [bool].
*/
DIOPI_API diopiError_t diopiEqScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* other);
/**
* @brief the in-place version of diopiEqScalar().
* @param[in] input the input tensor and will be stored result tensor.
* @sa Other parameters refer to diopiEqScalar().
*/
DIOPI_API diopiError_t diopiEqInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* other);
/**
* @brief Computes equal element-wise comparison, "=".
* @param[in] ctx Context environment.
* @param input the first tensor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @param other the second tensor. The dimenson should be same as input tensor. type = [float64, float32, float16, int64, int32, int16, int8, uint8, bool].
* @param[out] out the output tensor.Each element has a boolean value, i.e. either false or true. type = [bool].
*/
DIOPI_API diopiError_t diopiEq(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other);