[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [linux-2.6.18-xen] x86-64: provide a memset() that can deal with 4Gb or above at a time



# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1332754669 -7200
# Node ID 2748f5d7597de35e59318f4bad8ed762e8ce32aa
# Parent  b7fea0d6bf234c86e65ddf7781fdf63bb046cba6
x86-64: provide a memset() that can deal with 4Gb or above at a time

Now that a corresponding change got accepted into Linux 3.4, let's fix
this in our code too. It is particularly required by the memset()
invoked from __alloc_bootmem_core(), which can be called with sizes
beyond 4Gb out of alloc_node_mem_map() when CONFIG_FLAT_NODE_MEM_MAP is
defined (starting at around 300Gb).

In order to not affect the native kernel (which is unlikely to be
affected anyway, as it usually sets up separate maps for each node [as
long as NUMA is defined], and hence would require said amount of memory
per node [and SPARSEMEM not to be used] for the problem to become
visible, plus in this tree we're not really concerned about fixing
native problems), introduce a Xen-specific clone of the original file.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---


diff -r b7fea0d6bf23 -r 2748f5d7597d arch/x86_64/lib/memset-xen.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/x86_64/lib/memset-xen.S      Mon Mar 26 11:37:49 2012 +0200
@@ -0,0 +1,122 @@
+/* Copyright 2002 Andi Kleen, SuSE Labs */
+/*
+ * ISO C memset - set a memory block to a byte value.
+ *
+ * rdi   destination
+ * rsi   value (char)
+ * rdx   count (bytes)
+ *
+ * rax   original destination
+ */
+       .globl __memset
+       .globl memset
+       .p2align 4
+memset:
+__memset:
+       movq %rdi,%r10
+
+       /* expand byte value  */
+       movzbl %sil,%ecx
+       movabs $0x0101010101010101,%rax
+       imulq  %rcx,%rax
+
+       /* align dst */
+       movl  %edi,%r9d
+       andl  $7,%r9d
+       jnz  .Lbad_alignment
+.Lafter_bad_alignment:
+
+       movq  %rdx,%rcx
+       shrq  $6,%rcx
+       jz       .Lhandle_tail
+
+       .p2align 4
+.Lloop_64:
+       decq  %rcx
+       movq  %rax,(%rdi)
+       movq  %rax,8(%rdi)
+       movq  %rax,16(%rdi)
+       movq  %rax,24(%rdi)
+       movq  %rax,32(%rdi)
+       movq  %rax,40(%rdi)
+       movq  %rax,48(%rdi)
+       movq  %rax,56(%rdi)
+       leaq  64(%rdi),%rdi
+       jnz    .Lloop_64
+
+       /* Handle tail in loops. The loops should be faster than hard
+          to predict jump tables. */
+       .p2align 4
+.Lhandle_tail:
+       movl    %edx,%ecx
+       andl    $63&(~7),%ecx
+       jz              .Lhandle_7
+       shrl    $3,%ecx
+       .p2align 4
+.Lloop_8:
+       decl   %ecx
+       movq  %rax,(%rdi)
+       leaq  8(%rdi),%rdi
+       jnz    .Lloop_8
+
+.Lhandle_7:
+       andl    $7,%edx
+       jz      .Lende
+       .p2align 4
+.Lloop_1:
+       decl    %edx
+       movb    %al,(%rdi)
+       leaq    1(%rdi),%rdi
+       jnz     .Lloop_1
+
+.Lende:
+       movq    %r10,%rax
+       ret
+
+.Lbad_alignment:
+       cmpq $7,%rdx
+       jbe     .Lhandle_7
+       movq %rax,(%rdi)        /* unaligned store */
+       movq $8,%r8
+       subq %r9,%r8
+       addq %r8,%rdi
+       subq %r8,%rdx
+       jmp .Lafter_bad_alignment
+
+       /* Some CPUs run faster using the string instructions.
+          It is also a lot simpler. Use this when possible */
+
+#include <asm/cpufeature.h>
+
+       .section .altinstructions,"a"
+       .align 8
+       .quad  memset
+       .quad  memset_c
+       .byte  X86_FEATURE_REP_GOOD
+       .byte  memset_c_end-memset_c
+       .byte  memset_c_end-memset_c
+       .previous
+
+       .section .altinstr_replacement,"ax"
+ /* rdi        destination
+  * rsi value
+  * rdx count
+  */
+memset_c:
+       movq %rdi,%r9
+       movq %rdx,%rcx
+       andl $7,%edx
+       shrq $3,%rcx
+       /* expand byte value  */
+       movzbl %sil,%esi
+       movabs $0x0101010101010101,%rax
+       imulq %rsi,%rax
+       rep
+       stosq
+       movl %edx,%ecx
+       rep
+       stosb
+       movq %r9,%rax
+       ret
+memset_c_end:
+       .previous

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.