strchr-mte.S source code [libc/AOR_v20.02/string/aarch64/strchr-mte.S]

1	/*
2	* strchr - find a character in a string
3	*
4	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5	* See https://llvm.org/LICENSE.txt for license information.
6	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7	*/
8
9	/ Assumptions:*
10	*
11	* ARMv8-a, AArch64
12	* Neon Available.
13	*/
14
15	#include "../asmdefs.h"
16
17	/ Arguments and results. /
18	#define srcin x0
19	#define chrin w1
20
21	#define result x0
22
23	#define src x2
24	#define tmp1 x3
25	#define wtmp2 w4
26	#define tmp3 x5
27
28	#define vrepchr v0
29	#define qdata q1
30	#define vdata v1
31	#define vhas_nul v2
32	#define vhas_chr v3
33	#define vrepmask_0 v4
34	#define vrepmask_c v5
35	#define vend v6
36
37	#define L(l) .L ## l
38
39	/ Core algorithm.*
40
41	For each 16-byte chunk we calculate a 64-bit syndrome value, with
42	four bits per byte (LSB is always in bits 0 and 1, for both big
43	and little-endian systems). For each tuple, bit 0 is set if
44	the relevant byte matched the requested character; bit 1 is set
45	if the relevant byte matched the NUL end of string (we trigger
46	off bit0 for the special case of looking for NUL) and bits 2 and 3
47	are not used.
48	Since the bits in the syndrome reflect exactly the order in which
49	things occur in the original string a count_trailing_zeros()
50	operation will identify exactly which byte is causing the termination,
51	and why. /*
52
53	/ Locals and temporaries. /
54
55	ENTRY(__strchr_aarch64_mte)
56	/ Magic constant 0x10011001 to allow us to identify which lane*
57	matches the requested byte. Magic constant 0x20022002 used
58	similarly for NUL termination. /*
59	mov wtmp2, #`0x1001`
60	movk wtmp2, #`0x1001`, lsl #`16`
61	dup vrepchr`.16b`, chrin
62	bic src, srcin, #`15` / Work with aligned 16-byte chunks. /
63	dup vrepmask_c`.4s`, wtmp2
64	ands tmp1, srcin, #`15`
65	add vrepmask_0`.4s`, vrepmask_c`.4s`, vrepmask_c`.4s` / equiv: lsl #1 /
66	b.eq L(loop)
67
68	/ Input string is not 16-byte aligned. Rather than forcing*
69	the padding bytes to a safe value, we calculate the syndrome
70	for all the bytes, but then mask off those bits of the
71	syndrome that are related to the padding. /*
72	ldr qdata, [src], #`16`
73	cmeq vhas_nul`.16b`, vdata`.16b`, #`0`
74	cmeq vhas_chr`.16b`, vdata`.16b`, vrepchr`.16b`
75	and vhas_nul`.16b`, vhas_nul`.16b`, vrepmask_0`.16b`
76	and vhas_chr`.16b`, vhas_chr`.16b`, vrepmask_c`.16b`
77	lsl tmp1, tmp1, #`2`
78	orr vend`.16b`, vhas_nul`.16b`, vhas_chr`.16b`
79	mov tmp3, #~`0`
80	addp vend`.16b`, vend`.16b`, vend`.16b` / 128->64 /
81	lsl tmp1, tmp3, tmp1
82
83	mov tmp3, vend.d[`0`]
84	ands tmp1, tmp3, tmp1 / Mask padding bits. /
85	b.ne L(tail)
86
87	L(loop):
88	ldr qdata, [src], #`32`
89	cmeq vhas_nul`.16b`, vdata`.16b`, #`0`
90	cmeq vhas_chr`.16b`, vdata`.16b`, vrepchr`.16b`
91	/ Use a fast check for the termination condition. /
92	orr vend`.16b`, vhas_nul`.16b`, vhas_chr`.16b`
93	addp vend`.16b`, vend`.16b`, vend`.16b` / 128->64 /
94	mov tmp1, vend.d[`0`]
95	cbnz tmp1, L(end)
96
97	ldr qdata, [src, #-`16`]
98	cmeq vhas_nul`.16b`, vdata`.16b`, #`0`
99	cmeq vhas_chr`.16b`, vdata`.16b`, vrepchr`.16b`
100	/ Use a fast check for the termination condition. /
101	orr vend`.16b`, vhas_nul`.16b`, vhas_chr`.16b`
102	addp vend`.16b`, vend`.16b`, vend`.16b` / 128->64 /
103	mov tmp1, vend.d[`0`]
104	cbz tmp1, L(loop)
105
106	/ Adjust src for next two subtractions. /
107	add src, src, #`16`
108	L(end):
109	/ Termination condition found. Now need to establish exactly why*
110	we terminated. /*
111	and vhas_nul`.16b`, vhas_nul`.16b`, vrepmask_0`.16b`
112	and vhas_chr`.16b`, vhas_chr`.16b`, vrepmask_c`.16b`
113	sub src, src, #`16`
114	orr vend`.16b`, vhas_nul`.16b`, vhas_chr`.16b`
115	addp vend`.16b`, vend`.16b`, vend`.16b` / 128->64 /
116
117	mov tmp1, vend.d[`0`]
118	L(tail):
119	/ Count the trailing zeros, by bit reversing... /
120	rbit tmp1, tmp1
121	/ Re-bias source. /
122	sub src, src, #`16`
123	clz tmp1, tmp1 / And counting the leading zeros. /
124	/ Tmp1 is even if the target character was found first. Otherwise*
125	we've found the end of string and we weren't looking for NUL. /*
126	tst tmp1, #`1`
127	add result, src, tmp1, lsr #`2`
128	csel result, result, xzr, eq
129	ret
130
131	END(__strchr_aarch64_mte)
132

source code of libc/AOR_v20.02/string/aarch64/strchr-mte.S