| 1 | /* |
| 2 | * memset - fill memory with a constant |
| 3 | * |
| 4 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 5 | * See https://llvm.org/LICENSE.txt for license information. |
| 6 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 7 | */ |
| 8 | |
| 9 | /* |
| 10 | Written by Dave Gilbert <david.gilbert@linaro.org> |
| 11 | |
| 12 | This memset routine is optimised on a Cortex-A9 and should work on |
| 13 | all ARMv7 processors. |
| 14 | |
| 15 | */ |
| 16 | |
| 17 | .syntax unified |
| 18 | .arch armv7-a |
| 19 | |
| 20 | @ 2011-08-30 david.gilbert@linaro.org |
| 21 | @ Extracted from local git 2f11b436 |
| 22 | |
| 23 | @ this lets us check a flag in a 00/ff byte easily in either endianness |
| 24 | #ifdef __ARMEB__ |
| 25 | #define CHARTSTMASK(c) 1<<(31-(c*8)) |
| 26 | #else |
| 27 | #define CHARTSTMASK(c) 1<<(c*8) |
| 28 | #endif |
| 29 | .text |
| 30 | .thumb |
| 31 | |
| 32 | @ --------------------------------------------------------------------------- |
| 33 | .thumb_func |
| 34 | .align 2 |
| 35 | .p2align 4,,15 |
| 36 | .global __memset_arm |
| 37 | .type __memset_arm,%function |
| 38 | __memset_arm: |
| 39 | @ r0 = address |
| 40 | @ r1 = character |
| 41 | @ r2 = count |
| 42 | @ returns original address in r0 |
| 43 | |
| 44 | mov r3, r0 @ Leave r0 alone |
| 45 | cbz r2, 10f @ Exit if 0 length |
| 46 | |
| 47 | tst r0, #7 |
| 48 | beq 2f @ Already aligned |
| 49 | |
| 50 | @ Ok, so we're misaligned here |
| 51 | 1: |
| 52 | strb r1, [r3], #1 |
| 53 | subs r2,r2,#1 |
| 54 | tst r3, #7 |
| 55 | cbz r2, 10f @ Exit if we hit the end |
| 56 | bne 1b @ go round again if still misaligned |
| 57 | |
| 58 | 2: |
| 59 | @ OK, so we're aligned |
| 60 | push {r4,r5,r6,r7} |
| 61 | bics r4, r2, #15 @ if less than 16 bytes then need to finish it off |
| 62 | beq 5f |
| 63 | |
| 64 | 3: |
| 65 | @ POSIX says that ch is cast to an unsigned char. A uxtb is one |
| 66 | @ byte and takes two cycles, where an AND is four bytes but one |
| 67 | @ cycle. |
| 68 | and r1, #0xFF |
| 69 | orr r1, r1, r1, lsl#8 @ Same character into all bytes |
| 70 | orr r1, r1, r1, lsl#16 |
| 71 | mov r5,r1 |
| 72 | mov r6,r1 |
| 73 | mov r7,r1 |
| 74 | |
| 75 | 4: |
| 76 | subs r4,r4,#16 |
| 77 | stmia r3!,{r1,r5,r6,r7} |
| 78 | bne 4b |
| 79 | and r2,r2,#15 |
| 80 | |
| 81 | @ At this point we're still aligned and we have upto align-1 bytes left to right |
| 82 | @ we can avoid some of the byte-at-a time now by testing for some big chunks |
| 83 | tst r2,#8 |
| 84 | itt ne |
| 85 | subne r2,r2,#8 |
| 86 | stmiane r3!,{r1,r5} |
| 87 | |
| 88 | 5: |
| 89 | pop {r4,r5,r6,r7} |
| 90 | cbz r2, 10f |
| 91 | |
| 92 | @ Got to do any last < alignment bytes |
| 93 | 6: |
| 94 | subs r2,r2,#1 |
| 95 | strb r1,[r3],#1 |
| 96 | bne 6b |
| 97 | |
| 98 | 10: |
| 99 | bx lr @ goodbye |
| 100 | .size __memset_arm, . - __memset_arm |
| 101 | |