aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWilco Dijkstra <wdijkstr@arm.com>2020-08-28 17:51:40 +0100
committerAndreas K. Hüttel <dilfridge@gentoo.org>2020-10-21 20:24:58 +0300
commit538fe433fbc658ccc4f3cf52e83e0f1633066a7d (patch)
tree8fe7a861aa9685802ae43c16ad4b179f3c7e2304
parentSet version.h RELEASE to "stable" (Bug 26700) (diff)
downloadglibc-538fe433fbc658ccc4f3cf52e83e0f1633066a7d.tar.gz
glibc-538fe433fbc658ccc4f3cf52e83e0f1633066a7d.tar.bz2
glibc-538fe433fbc658ccc4f3cf52e83e0f1633066a7d.zip
AArch64: Improve backwards memmove performance
On some microarchitectures performance of the backwards memmove improves if the stores use STR with decreasing addresses. So change the memmove loop in memcpy_advsimd.S to use 2x STR rather than STP. Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> (cherry picked from commit bd394d131c10c9ec22c6424197b79410042eed99) (cherry picked from commit 0f8f0ed25c196cfb93edf461aefdad15314ae05c)
-rw-r--r--sysdeps/aarch64/multiarch/memcpy_advsimd.S7
1 files changed, 4 insertions, 3 deletions
diff --git a/sysdeps/aarch64/multiarch/memcpy_advsimd.S b/sysdeps/aarch64/multiarch/memcpy_advsimd.S
index d4ba747777..48bb6d7ca4 100644
--- a/sysdeps/aarch64/multiarch/memcpy_advsimd.S
+++ b/sysdeps/aarch64/multiarch/memcpy_advsimd.S
@@ -223,12 +223,13 @@ L(copy_long_backwards):
b.ls L(copy64_from_start)
L(loop64_backwards):
- stp A_q, B_q, [dstend, -32]
+ str B_q, [dstend, -16]
+ str A_q, [dstend, -32]
ldp A_q, B_q, [srcend, -96]
- stp C_q, D_q, [dstend, -64]
+ str D_q, [dstend, -48]
+ str C_q, [dstend, -64]!
ldp C_q, D_q, [srcend, -128]
sub srcend, srcend, 64
- sub dstend, dstend, 64
subs count, count, 64
b.hi L(loop64_backwards)