-
Notifications
You must be signed in to change notification settings - Fork 37
/
Copy pathunion16_amd64.s
74 lines (67 loc) · 1.39 KB
/
union16_amd64.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
// Code generated by command: go run union16_asm.go -pkg sortedset -out ../sortedset/union16_amd64.s -stubs ../sortedset/union16_amd64.go. DO NOT EDIT.
//go:build !purego
#include "textflag.h"
// func union16(dst []byte, a []byte, b []byte) (i int, j int, k int)
// Requires: AVX
TEXT ·union16(SB), NOSPLIT, $0-96
MOVQ dst_base+0(FP), AX
MOVQ a_base+24(FP), CX
MOVQ b_base+48(FP), DX
MOVQ a_len+32(FP), BX
ADDQ CX, BX
MOVQ b_len+56(FP), SI
ADDQ DX, SI
VPCMPEQB X0, X0, X0
VMOVUPS (CX), X1
VMOVUPS (DX), X2
loop:
VPCMPEQB X1, X2, X3
VPXOR X3, X0, X3
VPMINUB X1, X2, X4
VPCMPEQB X1, X4, X4
VPAND X4, X3, X4
VPMOVMSKB X3, DI
VPMOVMSKB X4, R8
TESTL DI, DI
JZ equal
BSFL DI, R9
BTSL R9, R8
JCS less
VMOVUPS X2, (AX)
ADDQ $0x10, AX
ADDQ $0x10, DX
CMPQ DX, SI
JE done
VMOVUPS (DX), X2
JMP loop
less:
VMOVUPS X1, (AX)
ADDQ $0x10, AX
ADDQ $0x10, CX
CMPQ CX, BX
JE done
VMOVUPS (CX), X1
JMP loop
equal:
VMOVUPS X1, (AX)
ADDQ $0x10, AX
ADDQ $0x10, CX
ADDQ $0x10, DX
CMPQ CX, BX
JE done
CMPQ DX, SI
JE done
VMOVUPS (CX), X1
VMOVUPS (DX), X2
JMP loop
done:
MOVQ a_base+24(FP), BX
SUBQ BX, CX
MOVQ CX, i+72(FP)
MOVQ b_base+48(FP), CX
SUBQ CX, DX
MOVQ DX, j+80(FP)
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, k+88(FP)
RET