| 1 | /* |
| 2 | * memchr - scan memory for a character |
| 3 | * |
| 4 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 5 | * See https://llvm.org/LICENSE.txt for license information. |
| 6 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 7 | */ |
| 8 | |
| 9 | /* |
| 10 | Written by Dave Gilbert <david.gilbert@linaro.org> |
| 11 | |
| 12 | This __memchr_arm routine is optimised on a Cortex-A9 and should work on |
| 13 | all ARMv7 processors. It has a fast past for short sizes, and has |
| 14 | an optimised path for large data sets; the worst case is finding the |
| 15 | match early in a large data set. |
| 16 | |
| 17 | */ |
| 18 | |
| 19 | @ 2011-02-07 david.gilbert@linaro.org |
| 20 | @ Extracted from local git a5b438d861 |
| 21 | @ 2011-07-14 david.gilbert@linaro.org |
| 22 | @ Import endianness fix from local git ea786f1b |
| 23 | @ 2011-12-07 david.gilbert@linaro.org |
| 24 | @ Removed unneeded cbz from align loop |
| 25 | |
| 26 | .syntax unified |
| 27 | .arch armv7-a |
| 28 | |
| 29 | @ this lets us check a flag in a 00/ff byte easily in either endianness |
| 30 | #ifdef __ARMEB__ |
| 31 | #define CHARTSTMASK(c) 1<<(31-(c*8)) |
| 32 | #else |
| 33 | #define CHARTSTMASK(c) 1<<(c*8) |
| 34 | #endif |
| 35 | .text |
| 36 | .thumb |
| 37 | |
| 38 | @ --------------------------------------------------------------------------- |
| 39 | .thumb_func |
| 40 | .align 2 |
| 41 | .p2align 4,,15 |
| 42 | .global __memchr_arm |
| 43 | .type __memchr_arm,%function |
| 44 | __memchr_arm: |
| 45 | @ r0 = start of memory to scan |
| 46 | @ r1 = character to look for |
| 47 | @ r2 = length |
| 48 | @ returns r0 = pointer to character or NULL if not found |
| 49 | and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char |
| 50 | |
| 51 | cmp r2,#16 @ If it's short don't bother with anything clever |
| 52 | blt 20f |
| 53 | |
| 54 | tst r0, #7 @ If it's already aligned skip the next bit |
| 55 | beq 10f |
| 56 | |
| 57 | @ Work up to an aligned point |
| 58 | 5: |
| 59 | ldrb r3, [r0],#1 |
| 60 | subs r2, r2, #1 |
| 61 | cmp r3, r1 |
| 62 | beq 50f @ If it matches exit found |
| 63 | tst r0, #7 |
| 64 | bne 5b @ If not aligned yet then do next byte |
| 65 | |
| 66 | 10: |
| 67 | @ At this point, we are aligned, we know we have at least 8 bytes to work with |
| 68 | push {r4,r5,r6,r7} |
| 69 | orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes |
| 70 | orr r1, r1, r1, lsl #16 |
| 71 | bic r4, r2, #7 @ Number of double words to work with |
| 72 | mvns r7, #0 @ all F's |
| 73 | movs r3, #0 |
| 74 | |
| 75 | 15: |
| 76 | ldmia r0!,{r5,r6} |
| 77 | subs r4, r4, #8 |
| 78 | eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target |
| 79 | eor r6,r6, r1 |
| 80 | uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 |
| 81 | sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION |
| 82 | uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 |
| 83 | sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION |
| 84 | cbnz r6, 60f |
| 85 | bne 15b @ (Flags from the subs above) If not run out of bytes then go around again |
| 86 | |
| 87 | pop {r4,r5,r6,r7} |
| 88 | and r1,r1,#0xff @ Get r1 back to a single character from the expansion above |
| 89 | and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done |
| 90 | |
| 91 | 20: |
| 92 | cbz r2, 40f @ 0 length or hit the end already then not found |
| 93 | |
| 94 | 21: @ Post aligned section, or just a short call |
| 95 | ldrb r3,[r0],#1 |
| 96 | subs r2,r2,#1 |
| 97 | eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub |
| 98 | cbz r3, 50f |
| 99 | bne 21b @ on r2 flags |
| 100 | |
| 101 | 40: |
| 102 | movs r0,#0 @ not found |
| 103 | bx lr |
| 104 | |
| 105 | 50: |
| 106 | subs r0,r0,#1 @ found |
| 107 | bx lr |
| 108 | |
| 109 | 60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was |
| 110 | @ r0 points to the start of the double word after the one that was tested |
| 111 | @ r5 has the 00/ff pattern for the first word, r6 has the chained value |
| 112 | cmp r5, #0 |
| 113 | itte eq |
| 114 | moveq r5, r6 @ the end is in the 2nd word |
| 115 | subeq r0,r0,#3 @ Points to 2nd byte of 2nd word |
| 116 | subne r0,r0,#7 @ or 2nd byte of 1st word |
| 117 | |
| 118 | @ r0 currently points to the 3rd byte of the word containing the hit |
| 119 | tst r5, # CHARTSTMASK(0) @ 1st character |
| 120 | bne 61f |
| 121 | adds r0,r0,#1 |
| 122 | tst r5, # CHARTSTMASK(1) @ 2nd character |
| 123 | ittt eq |
| 124 | addeq r0,r0,#1 |
| 125 | tsteq r5, # (3<<15) @ 2nd & 3rd character |
| 126 | @ If not the 3rd must be the last one |
| 127 | addeq r0,r0,#1 |
| 128 | |
| 129 | 61: |
| 130 | pop {r4,r5,r6,r7} |
| 131 | subs r0,r0,#1 |
| 132 | bx lr |
| 133 | |
| 134 | .size __memchr_arm, . - __memchr_arm |
| 135 | |