[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 3/12] Provide basic Xen PM infrastructure


  • To: <xen-devel@xxxxxxxxxxxxxxxxxxx>
  • From: "Tian, Kevin" <kevin.tian@xxxxxxxxx>
  • Date: Tue, 15 May 2007 22:16:30 +0800
  • Delivery-date: Tue, 15 May 2007 07:15:37 -0700
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>
  • Thread-index: AceW+6GWqRDl7Ly2S/+Xy1Y62QxDUA==
  • Thread-topic: [PATCH 3/12] Provide basic Xen PM infrastructure

Add basic infrastructure for xen power management. Now
only S3 (suspend to ram) is supported.

Signed-off-by Ke Yu <ke.yu@xxxxxxxxx>
Signed-off-by Kevin Tian <kevin.tian@xxxxxxxxx>

diff -r 84c103f8881a xen/arch/x86/acpi/Makefile
--- a/xen/arch/x86/acpi/Makefile        Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/acpi/Makefile        Mon May 14 16:34:31 2007 -0400
@@ -1,1 +1,2 @@ obj-y += boot.o
 obj-y += boot.o
+obj-y += power.o
diff -r 84c103f8881a xen/arch/x86/acpi/power.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/acpi/power.c Mon May 14 20:49:36 2007 -0400
@@ -0,0 +1,209 @@
+/* drivers/acpi/sleep/power.c - PM core functionality for Xen
+ *
+ * Copyrights from Linux side:
+ * Copyright (c) 2000-2003 Patrick Mochel
+ * Copyright (C) 2001-2003 Pavel Machek <pavel@xxxxxxx>
+ * Copyright (c) 2003 Open Source Development Lab
+ * Copyright (c) 2004 David Shaohua Li <shaohua.li@xxxxxxxxx>
+ * Copyright (c) 2005 Alexey Starikovskiy
<alexey.y.starikovskiy@xxxxxxxxx>
+ *
+ * Slimmed with Xen specific support.
+ */
+
+#include <asm/io.h>
+#define CONFIG_ACPI_SLEEP
+#include <asm/acpi.h>
+#include <xen/acpi.h>
+#include <xen/errno.h>
+#include <xen/iocap.h>
+#include <xen/sched.h>
+#include <asm/acpi.h>
+#include <asm/irq.h>
+#include <asm/init.h>
+#include <xen/spinlock.h>
+#include <xen/sched.h>
+#include <xen/domain.h>
+#include <xen/console.h>
+
+u8 sleep_states[ACPI_S_STATE_COUNT];
+DEFINE_SPINLOCK(pm_lock);
+
+extern void do_suspend_lowlevel(void);
+
+static char *acpi_states[ACPI_S_STATE_COUNT] =
+{
+    [ACPI_STATE_S1] = "standby",
+    [ACPI_STATE_S3] = "mem",
+    [ACPI_STATE_S4] = "disk",
+};
+
+/* address in low memory of the wakeup routine. */
+unsigned long acpi_wakeup_address;
+unsigned long acpi_video_flags;
+extern char wakeup_start, wakeup_end;
+unsigned long saved_videomode;
+extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
+
+/**
+ * acpi_save_state_mem - save kernel state
+ *
+ * Create an identity mapped page table and copy the wakeup routine to
+ * low memory.
+ */
+int acpi_save_state_mem(void)
+{
+    if (!acpi_wakeup_address)
+        return 1;
+
+    init_low_mappings();
+    memcpy((void *)acpi_wakeup_address, &wakeup_start,
+           &wakeup_end - &wakeup_start);
+    acpi_copy_wakeup_routine(acpi_wakeup_address);
+    return 0;
+}
+
+/*
+ * acpi_restore_state - undo effects of acpi_save_state_mem
+ */
+void acpi_restore_state_mem(void)
+{
+#ifdef CONFIG_X86_64
+    zap_low_mappings();
+#else
+    zap_low_mappings(idle_pg_table_l2);
+#endif
+}
+
+/**
+ * acpi_reserve_bootmem - do _very_ early ACPI initialisation
+ *
+ * We allocate a page from the first 1MB of memory for the wakeup
+ * routine for when we come back from a sleep state. The
+ * runtime allocator allows specification of <16MB pages, but not
+ * <1MB pages.
+ */
+void __init acpi_reserve_bootmem(void)
+{
+    if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) {
+       pmprintk(XENLOG_ERR, "ACPI: Wakeup code way too big, S3
disabled.\n");
+       return;
+    }
+    
+    /*  0~640K is not used by anyone, except 0x9000 is used by smp
+     *  trampoline code, so choose 0x7000 for XEN acpi wake up code
+     */
+    acpi_wakeup_address = (unsigned long)__va(0x7000);
+}
+
+/* Add suspend failure recover later */
+static int device_power_down(void)
+{
+    console_suspend();
+
+    time_suspend();
+
+    i8259A_suspend();
+    
+    ioapic_suspend();
+    
+    lapic_suspend();
+
+    return 0;
+}
+
+static void device_power_up(void)
+{
+    lapic_resume();
+    
+    ioapic_resume();
+
+    i8259A_resume();
+    
+    time_resume();
+
+    console_resume();
+}
+
+int enter_state(u32 state)
+{
+    struct domain *d;
+    unsigned long flags;
+    int error;
+
+    if (state <= ACPI_STATE_S0 || state > ACPI_S_STATES_MAX)
+        return -EINVAL;
+
+    if (!spin_trylock(&pm_lock))
+        return -EBUSY;
+    
+    for_each_domain(d)
+        if (d->domain_id != 0)
+            domain_pause(d);
+
+    pmprintk(XENLOG_INFO, "PM: Preparing system for %s sleep\n",
+        acpi_states[state]);
+
+    local_irq_save(flags);
+
+    if ((error = device_power_down())) {
+        printk(XENLOG_ERR "Some devices failed to power down\n");
+        goto Done;
+    }
+
+    ACPI_FLUSH_CPU_CACHE();
+
+    /* Do arch specific saving of state. */
+    if (state > ACPI_STATE_S1) {
+        error = acpi_save_state_mem();
+        if (error)
+            goto Powerup;
+    }
+
+    switch (state) {
+        case ACPI_STATE_S3:
+            do_suspend_lowlevel();
+            break;
+        default:
+            error = -EINVAL;
+            goto Powerup;
+    }
+
+    pmprintk(XENLOG_INFO, "Back to C!\n");
+    if (state > ACPI_STATE_S1)
+        acpi_restore_state_mem();
+
+ Powerup:
+    device_power_up();
+
+    pmprintk(XENLOG_INFO, "PM: Finishing wakeup.\n");
+    for_each_domain(d)
+       if (d->domain_id!=0)
+           domain_unpause(d);
+
+ Done:
+    local_irq_restore(flags);
+    spin_unlock(&pm_lock);
+    return error;
+
+}
+
+static int __init acpi_sleep_init(void)
+{
+    int i = 0; 
+
+    pmprintk(XENLOG_INFO, "ACPI (supports");
+    for (i = 0; i < ACPI_S_STATE_COUNT; i++) {
+        if (i == ACPI_STATE_S3){
+            sleep_states[i] = 1;
+            printk(" S%d", i);
+        }
+        else{
+            sleep_states[i] = 0;
+        }
+    }
+    printk(")\n");
+
+    acpi_reserve_bootmem();
+    return 0;
+}
+__initcall(acpi_sleep_init);
diff -r 84c103f8881a xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S        Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/boot/x86_32.S        Mon May 14 16:34:31 2007 -0400
@@ -146,6 +146,8 @@ start_paging:
         rdmsr
         bts     $_EFER_NX,%eax
         wrmsr
+        mov     $1,%eax
+        mov     %eax, nx_enabled-__PAGE_OFFSET
 no_execute_disable:
         pop     %ebx
 #endif
diff -r 84c103f8881a xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S        Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/boot/x86_64.S        Mon May 14 16:34:31 2007 -0400
@@ -198,6 +198,7 @@ multiboot_ptr:
         .long   0
 
         .word   0
+        .global nopaging_gdt_descr
 nopaging_gdt_descr:
         .word   LAST_RESERVED_GDT_BYTE
         .quad   gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
@@ -207,6 +208,7 @@ cpuid_ext_features:
         
         .word   0
 gdt_descr:
+        .global gdt_descr
         .word   LAST_RESERVED_GDT_BYTE
         .quad   gdt_table - FIRST_RESERVED_GDT_BYTE
 
diff -r 84c103f8881a xen/arch/x86/x86_32/Makefile
--- a/xen/arch/x86/x86_32/Makefile      Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/x86_32/Makefile      Mon May 14 16:34:31 2007 -0400
@@ -6,3 +6,5 @@ obj-y += traps.o
 obj-y += traps.o
 
 obj-$(supervisor_mode_kernel) += supervisor_mode_kernel.o
+subdir-y += acpi
+subdir-y += power
diff -r 84c103f8881a xen/arch/x86/x86_32/acpi/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/x86_32/acpi/Makefile Mon May 14 16:34:31 2007 -0400
@@ -0,0 +1,1 @@
+obj-y += wakeup.o
diff -r 84c103f8881a xen/arch/x86/x86_32/acpi/wakeup.S
--- a/xen/arch/x86/x86_32/acpi/wakeup.S Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/x86_32/acpi/wakeup.S Mon May 14 16:34:31 2007 -0400
@@ -1,6 +1,11 @@
 .text
+#ifndef __XEN__
 #include <linux/linkage.h>
 #include <asm/segment.h>
+#else
+#include <xen/config.h>
+#include <asm/asm_defns.h>
+#endif
 #include <asm/page.h>
 
 #
@@ -56,7 +61,11 @@ 1:
 1:
 
        # set up page table
+#ifndef __XEN__
        movl    $swsusp_pg_dir-__PAGE_OFFSET, %eax
+#else
+       movl    $idle_pg_table-__PAGE_OFFSET, %eax
+#endif
        movl    %eax, %cr3
 
        testl   $1, real_efer_save_restore - wakeup_code
@@ -88,7 +97,11 @@ 1:
        cmpl    $0x12345678, %eax
        jne     bogus_real_magic
 
+#ifndef __XEN__
        ljmpl   $__KERNEL_CS,$wakeup_pmode_return
+#else
+       ljmpl   $(__HYPERVISOR_CS),$wakeup_pmode_return
+#endif
 
 real_save_gdt: .word 0
                .long 0
@@ -184,7 +197,11 @@ ENTRY(wakeup_end)
 .org   0x1000
 
 wakeup_pmode_return:
+#ifndef __XEN__
        movw    $__KERNEL_DS, %ax
+#else
+       movw    $__HYPERVISOR_DS, %ax
+#endif
        movw    %ax, %ss
        movw    %ax, %ds
        movw    %ax, %es
@@ -196,7 +213,11 @@ wakeup_pmode_return:
        lgdt    saved_gdt
        lidt    saved_idt
        lldt    saved_ldt
+#ifndef __XEN__
        ljmp    $(__KERNEL_CS),$1f
+#else
+       ljmp    $(__HYPERVISOR_CS),$1f
+#endif
 1:
        movl    %cr3, %eax
        movl    %eax, %cr3
diff -r 84c103f8881a xen/arch/x86/x86_32/mm.c
--- a/xen/arch/x86/x86_32/mm.c  Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/x86_32/mm.c  Mon May 14 16:34:31 2007 -0400
@@ -34,6 +34,7 @@ unsigned int PAGE_HYPERVISOR_NOCACHE = _
 unsigned int PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE;
 
 static unsigned long mpt_size;
+int nx_enabled = 0;
 
 void *alloc_xen_pagetable(void)
 {
@@ -133,7 +134,7 @@ void __init setup_idle_pagetable(void)
                                 __PAGE_HYPERVISOR));
 }
 
-void __init zap_low_mappings(l2_pgentry_t *base)
+void zap_low_mappings(l2_pgentry_t *base)
 {
     int i;
     u32 addr;
@@ -147,6 +148,18 @@ void __init zap_low_mappings(l2_pgentry_
             continue;
         l2e_write(&base[i], l2e_empty());
     }
+
+    flush_tlb_all_pge();
+}
+
+void init_low_mappings(void)
+{
+    int sz = ((DIRECTMAP_MBYTES << 20) >> L2_PAGETABLE_SHIFT) *
+             sizeof(l2_pgentry_t);
+
+    memcpy(idle_pg_table_l2,
+           idle_pg_table_l2 + (DIRECTMAP_VIRT_START >>
L2_PAGETABLE_SHIFT),
+           sz);
 
     flush_tlb_all_pge();
 }
diff -r 84c103f8881a xen/arch/x86/x86_32/power/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/x86_32/power/Makefile        Mon May 14 16:34:31 2007
-0400
@@ -0,0 +1,1 @@
+obj-y += cpu.o
diff -r 84c103f8881a xen/arch/x86/x86_32/power/cpu.c
--- a/xen/arch/x86/x86_32/power/cpu.c   Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/x86_32/power/cpu.c   Mon May 14 20:49:34 2007 -0400
@@ -7,10 +7,91 @@
  * Copyright (c) 2001 Patrick Mochel <mochel@xxxxxxxx>
  */
 
+#ifndef __XEN__
 #include <linux/module.h>
 #include <linux/suspend.h>
 #include <asm/mtrr.h>
 #include <asm/mce.h>
+#else
+#include <xen/config.h>
+#include <xen/acpi.h>
+#include <xen/smp.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/flushtlb.h>
+
+/* image of the saved processor state */
+struct saved_context {
+       u16 es, fs, gs, ss;
+       unsigned long cr0, cr2, cr3, cr4;
+       u16 gdt_pad;
+       u16 gdt_limit;
+       unsigned long gdt_base;
+       u16 idt_pad;
+       u16 idt_limit;
+       unsigned long idt_base;
+       u16 ldt;
+       u16 tss;
+       unsigned long tr;
+       unsigned long safety;
+       unsigned long return_address;
+} __attribute__((packed));
+
+#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q"
(GDT_ENTRY_TSS*8))
+#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q"
(GDT_ENTRY_LDT*8))
+
+#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
+#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
+#define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr))
+#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt))
+
+#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr))
+#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
+#define store_tr(tr) __asm__ ("str %0":"=mr" (tr))
+#define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt))
+
+/*
+ * Load a segment. Fall back on loading the zero
+ * segment if something goes wrong..
+ */
+#define loadsegment(seg,value)          \
+    asm volatile("\n"           \
+        "1:\t"              \
+        "mov %0,%%" #seg "\n"       \
+        "2:\n"              \
+        ".section .fixup,\"ax\"\n"  \
+        "3:\t"              \
+        "pushl $0\n\t"          \
+        "popl %%" #seg "\n\t"       \
+        "jmp 2b\n"          \
+        ".previous\n"           \
+        ".section __ex_table,\"a\"\n\t" \
+        ".align 4\n\t"          \
+        ".long 1b,3b\n"         \
+        ".previous"         \
+        : :"rm" (value))
+
+/*
+ * Save a segment register away
+ */
+#define savesegment(seg, value) \
+       asm volatile("mov %%" #seg ",%0":"=rm" (value))
+
+#define set_debugreg(value, register)           \
+        __asm__("movl %0,%%db" #register        \
+            : /* no output */           \
+            :"r" (value))
+
+void kernel_fpu_begin(void)
+{
+       clts();
+}
+
+void kernel_fpu_end(void)
+{
+       stts();
+}
+#endif
 
 static struct saved_context saved_context;
 
@@ -34,8 +115,10 @@ void __save_processor_state(struct saved
         * segment registers
         */
        savesegment(es, ctxt->es);
+#ifndef __XEN__
        savesegment(fs, ctxt->fs);
        savesegment(gs, ctxt->gs);
+#endif
        savesegment(ss, ctxt->ss);
 
        /*
@@ -60,6 +143,7 @@ static void do_fpu_end(void)
        kernel_fpu_end();
 }
 
+#ifndef __XEN__
 static void fix_processor_context(void)
 {
        int cpu = smp_processor_id();
@@ -84,6 +168,32 @@ static void fix_processor_context(void)
        }
 
 }
+#else
+static void fix_processor_context(void)
+{
+       int cpu = smp_processor_id();
+       struct tss_struct * t = &init_tss[cpu];;
+
+       if ( supervisor_mode_kernel && cpu_has_sep )
+               wrmsr(MSR_IA32_SYSENTER_ESP, &t->esp1, 0);
+
+       set_tss_desc(cpu,t);    /* This just modifies memory; should not
be necessary. But... This is necessary, because 386 hardware has concept
of busy TSS or some similar stupidity. */
+
+       load_TR(cpu);          /* This does ltr */
+       __asm__ __volatile__ ( "lldt %%ax" : : "a" (0) );/* This does
lldt */
+
+       /*
+        * Now maybe reset the debug registers
+        */
+       set_debugreg(0UL, 0);
+       set_debugreg(0UL, 1);
+       set_debugreg(0UL, 2);
+       set_debugreg(0UL, 3);
+       /* no 4 and 5 */
+       set_debugreg(0UL, 6);
+       set_debugreg(0UL, 7);
+}
+#endif
 
 void __restore_processor_state(struct saved_context *ctxt)
 {
@@ -106,15 +216,19 @@ void __restore_processor_state(struct sa
         * segment registers
         */
        loadsegment(es, ctxt->es);
+#ifndef __XEN__
        loadsegment(fs, ctxt->fs);
        loadsegment(gs, ctxt->gs);
+#endif
        loadsegment(ss, ctxt->ss);
 
+#ifndef __XEN__
        /*
         * sysenter MSRs
         */
        if (boot_cpu_has(X86_FEATURE_SEP))
                enable_sep_cpu();
+#endif
 
        fix_processor_context();
        do_fpu_end();
@@ -127,6 +241,8 @@ void restore_processor_state(void)
        __restore_processor_state(&saved_context);
 }
 
+#ifndef __XEN__
 /* Needed by apm.c */
 EXPORT_SYMBOL(save_processor_state);
 EXPORT_SYMBOL(restore_processor_state);
+#endif
diff -r 84c103f8881a xen/arch/x86/x86_64/Makefile
--- a/xen/arch/x86/x86_64/Makefile      Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/x86_64/Makefile      Mon May 14 16:34:31 2007 -0400
@@ -5,6 +5,8 @@ obj-y += gpr_switch.o
 obj-y += gpr_switch.o
 obj-y += mm.o
 obj-y += traps.o
+
+subdir-y += power
 
 obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_COMPAT) += domain.o
diff -r 84c103f8881a xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c  Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/x86_64/mm.c  Mon May 14 16:34:31 2007 -0400
@@ -191,9 +191,16 @@ void __init setup_idle_pagetable(void)
                   __PAGE_HYPERVISOR));
 }
 
-void __init zap_low_mappings(void)
+void zap_low_mappings(void)
 {
     l4e_write(&idle_pg_table[0], l4e_empty());
+    flush_tlb_all_pge();
+}
+
+void init_low_mappings(void)
+{
+    l4e_write(&idle_pg_table[0],
+               l4e_from_paddr(__pa(idle_pg_table_l3),
__PAGE_HYPERVISOR));
     flush_tlb_all_pge();
 }
 
diff -r 84c103f8881a xen/arch/x86/x86_64/power/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/x86_64/power/Makefile        Mon May 14 16:34:31 2007
-0400
@@ -0,0 +1,2 @@
+obj-y += wakeup.o
+obj-y += suspend.o
diff -r 84c103f8881a xen/arch/x86/x86_64/power/suspend.c
--- a/xen/arch/x86/x86_64/power/suspend.c       Mon May 14 15:12:50 2007
-0400
+++ b/xen/arch/x86/x86_64/power/suspend.c       Mon May 14 21:03:02 2007
-0400
@@ -6,12 +6,17 @@
  * Copyright (c) 2002 Pavel Machek <pavel@xxxxxxx>
  * Copyright (c) 2001 Patrick Mochel <mochel@xxxxxxxx>
  */
-
-#include <linux/smp.h>
-#include <linux/suspend.h>
-#include <asm/proto.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
+#include <xen/config.h>
+#include <xen/acpi.h>
+#include <xen/smp.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/flushtlb.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/support.h>
+#include <asm/x86_64/suspend.h>
+#include <asm/x86_64/asm_defns.h>
+#include <asm/ldt.h>
 
 struct saved_context saved_context;
 
@@ -21,6 +26,44 @@ unsigned long saved_context_r12, saved_c
 unsigned long saved_context_r12, saved_context_r13, saved_context_r14,
saved_context_r15;
 unsigned long saved_context_eflags;
 
+#ifdef __XEN__
+unsigned long saved_context_msr_cstar, saved_context_msr_lstar;
+unsigned long saved_video_mode;
+
+#define MSR_KERNEL_GS_BASE MSR_SHADOW_GS_BASE
+
+static inline void kernel_fpu_begin(void){
+    clts();
+}
+
+static inline void kernel_fpu_end(void){
+    stts();
+}
+
+static inline void syscall_init(void){
+    wrmsrl(MSR_LSTAR, saved_context_msr_lstar);
+    wrmsrl(MSR_CSTAR, saved_context_msr_cstar);
+    wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS);
+    wrmsr(MSR_SYSCALL_MASK, EF_VM|EF_RF|EF_NT|EF_DF|EF_IE|EF_TF, 0U);
+}
+
+static inline void load_gs_index(unsigned base){
+        __asm__ __volatile__ (
+            "     swapgs              \n"
+            "1:   movl %k0,%%gs       \n"
+            "    "safe_swapgs"        \n"
+            ".section .fixup,\"ax\"   \n"
+            "2:   xorl %k0,%k0        \n"
+            "     jmp  1b             \n"
+            ".previous                \n"
+            ".section __ex_table,\"a\"\n"
+            "    .align 8             \n"
+            "    .quad 1b,2b          \n"
+            ".previous                  "
+            : : "r" (base&0xffff) );
+}
+#endif /* __XEN__*/
+
 void __save_processor_state(struct saved_context *ctxt)
 {
     kernel_fpu_begin();
@@ -55,6 +98,9 @@ void __save_processor_state(struct saved
     asm volatile ("movq %%cr3, %0" : "=r" (ctxt->cr3));
     asm volatile ("movq %%cr4, %0" : "=r" (ctxt->cr4));
     asm volatile ("movq %%cr8, %0" : "=r" (ctxt->cr8));
+
+    rdmsrl(MSR_CSTAR, saved_context_msr_cstar);
+    rdmsrl(MSR_LSTAR, saved_context_msr_lstar);
 }
 
 void save_processor_state(void)
@@ -91,10 +137,24 @@ void __restore_processor_state(struct sa
     /*
      * segment registers
      */
+#ifndef __XEN__
     asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
     asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
     asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
     load_gs_index(ctxt->gs);
+#else
+    /* Xen doesn't care these selectors. However if previous suspend
+     * happens on an idle context, we should avoid recover them since
+     * idle page table only maps xen portion of gdt table and that
+     * load may result page fault badly for guest portion.
+     */
+    if (!is_idle_vcpu(current)) {
+       asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
+       asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
+       asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
+       load_gs_index(ctxt->gs);
+    }
+#endif
     asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
 
     wrmsrl(MSR_FS_BASE, ctxt->fs_base);
@@ -114,6 +174,28 @@ void restore_processor_state(void)
 
 void fix_processor_context(void)
 {
+#ifdef __XEN__    
+    int cpu = smp_processor_id();
+
+    struct tss_struct *t = &init_tss[cpu];
+
+    set_tss_desc(cpu,t);    /* This just modifies memory; should not be
neccessary. But... This is neccessary, because 386 hardware has concept
of busy TSS or some similar stupidity. */
+
+    syscall_init();                         /* This sets MSR_*STAR and
related */
+    load_TR(cpu);             /* This does ltr */
+    load_LDT(current); /* This does lldt */
+
+    /*
+     * Now maybe reset the debug registers
+     */
+    set_debugreg(current, 0UL, 0);
+    set_debugreg(current, 0UL, 1);
+    set_debugreg(current, 0UL, 2);
+    set_debugreg(current, 0UL, 3);
+    /* no 4 and 5 */
+    set_debugreg(current, 0UL, 6);
+    set_debugreg(current, 0UL, 7);
+#else
     int cpu = smp_processor_id();
     struct tss_struct *t = &per_cpu(init_tss, cpu);
 
@@ -137,85 +219,6 @@ void fix_processor_context(void)
                 loaddebug(&current->thread, 6);
                 loaddebug(&current->thread, 7);
     }
-}
-
-#ifdef CONFIG_SOFTWARE_SUSPEND
-/* Defined in arch/x86_64/kernel/suspend_asm.S */
-extern int restore_image(void);
-
-pgd_t *temp_level4_pgt;
-
-static int res_phys_pud_init(pud_t *pud, unsigned long address,
unsigned long end)
-{
-    long i, j;
-
-    i = pud_index(address);
-    pud = pud + i;
-    for (; i < PTRS_PER_PUD; pud++, i++) {
-        unsigned long paddr;
-        pmd_t *pmd;
-
-        paddr = address + i*PUD_SIZE;
-        if (paddr >= end)
-            break;
-
-        pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
-        if (!pmd)
-            return -ENOMEM;
-        set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
-        for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
-            unsigned long pe;
-
-            if (paddr >= end)
-                break;
-            pe = _PAGE_NX | _PAGE_PSE | _KERNPG_TABLE | paddr;
-            pe &= __supported_pte_mask;
-            set_pmd(pmd, __pmd(pe));
-        }
-    }
-    return 0;
-}
-
-static int set_up_temporary_mappings(void)
-{
-    unsigned long start, end, next;
-    int error;
-
-    temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
-    if (!temp_level4_pgt)
-        return -ENOMEM;
-
-    /* It is safe to reuse the original kernel mapping */
-    set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
-        init_level4_pgt[pgd_index(__START_KERNEL_map)]);
-
-    /* Set up the direct mapping from scratch */
-    start = (unsigned long)pfn_to_kaddr(0);
-    end = (unsigned long)pfn_to_kaddr(end_pfn);
-
-    for (; start < end; start = next) {
-        pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
-        if (!pud)
-            return -ENOMEM;
-        next = start + PGDIR_SIZE;
-        if (next > end)
-            next = end;
-        if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
-            return error;
-        set_pgd(temp_level4_pgt + pgd_index(start),
-            mk_kernel_pgd(__pa(pud)));
-    }
-    return 0;
-}
-
-int swsusp_arch_resume(void)
-{
-    int error;
-
-    /* We have got enough memory and from now on we cannot recover */
-    if ((error = set_up_temporary_mappings()))
-        return error;
-    restore_image();
-    return 0;
-}
-#endif /* CONFIG_SOFTWARE_SUSPEND */
+#endif /* __XEN__ */
+}
+
diff -r 84c103f8881a xen/arch/x86/x86_64/power/wakeup.S
--- a/xen/arch/x86/x86_64/power/wakeup.S        Mon May 14 15:12:50 2007
-0400
+++ b/xen/arch/x86/x86_64/power/wakeup.S        Mon May 14 16:34:31 2007
-0400
@@ -1,8 +1,16 @@
 .text
+
+#ifdef __XEN__
+#include <asm/page.h>
+#include <asm/msr.h>
+#include <xen/config.h>
+#include <asm/config.h>
+#else
 #include <linux/linkage.h>
 #include <asm/segment.h>
 #include <asm/page.h>
 #include <asm/msr.h>
+#endif /* __XEN__ */
 
 # Copyright 2003 Pavel Machek <pavel@xxxxxxx>, distribute under GPLv2
 #
@@ -15,6 +23,13 @@
 # cs = 0x1234, eip = 0x05
 #
 
+#ifdef __XEN__
+/* A wakeup gdt is used before restoring cpu context. Clean it later */
+#define __KERNEL_CS   0x10
+#define __KERNEL_DS   0x18
+#define __START_KERNEL_map __PAGE_OFFSET
+#define SYM_PHYS(sym) (sym - __PAGE_OFFSET)
+#endif
 
 ALIGN
     .align  16
@@ -121,7 +136,13 @@ wakeup_32:
     movl    %eax, %cr4
 
     /* Setup early boot stage 4 level pagetables */
+    
+#ifdef __XEN__    
+     movl    $SYM_PHYS(idle_pg_table), %eax
+#else    
     movl    $(wakeup_level4_pgt - __START_KERNEL_map), %eax
+#endif /* __XEN__ */
+
     movl    %eax, %cr3
 
     /* Setup EFER (Extended Feature Enable Register) */
@@ -178,8 +199,12 @@ reach_compatibility_mode:
     movb    $0xa9, %al  ;  outb %al, $0x80
     
     /* Load new GDT with the 64bit segment using 32bit descriptor */
+#ifdef __XEN__    
+    lgdt    %cs:SYM_PHYS(nopaging_gdt_descr)
+#else
     movl    $(pGDT32 - __START_KERNEL_map), %eax
     lgdt    (%eax)
+#endif /* __XEN__ */
 
     movl    $(wakeup_jumpvector - __START_KERNEL_map), %eax
     /* Finally jump in 64bit mode */
@@ -187,7 +212,11 @@ reach_compatibility_mode:
 
 wakeup_jumpvector:
     .long   wakeup_long64 - __START_KERNEL_map
+#ifdef __XEN__    
+    .word   __HYPERVISOR_CS
+#else    
     .word   __KERNEL_CS
+#endif    
 
 .code64
 
@@ -199,20 +228,44 @@ wakeup_long64:
      * addresses where we're currently running on. We have to do that
here
      * because in 32bit we couldn't load a 64bit linear address.
      */
+#ifdef __XEN__
+    lgdt    SYM_PHYS(nopaging_gdt_descr)
+#else    
     lgdt    cpu_gdt_descr - __START_KERNEL_map
+#endif
 
     movw    $0x0e00 + 'u', %ds:(0xb8016)
     
     nop
     nop
+#ifdef __XEN__    
+    movw    $__HYPERVISOR_DS, %ax
+#else    
     movw    $__KERNEL_DS, %ax
+#endif    
     movw    %ax, %ss    
     movw    %ax, %ds
     movw    %ax, %es
     movw    %ax, %fs
     movw    %ax, %gs
+    
+#ifdef __XEN__    
+    /* Xen doesn't use large memory mode, and can we? */
+    movq    SYM_PHYS(saved_esp), %rsp
+    
+    movw    $0x0e00 + 'x', %ds:(0xb8018)
+    movq    SYM_PHYS(saved_ebx), %rbx
+    movq    SYM_PHYS(saved_edi), %rdi
+    movq    SYM_PHYS(saved_esi), %rsi
+    movq    SYM_PHYS(saved_ebp), %rbp
+
+    movw    $0x0e00 + '!', %ds:(0xb801a)
+    movq    SYM_PHYS(saved_eip), %rax
+
+#else    
+
     movq    saved_esp, %rsp
-
+    
     movw    $0x0e00 + 'x', %ds:(0xb8018)
     movq    saved_ebx, %rbx
     movq    saved_edi, %rdi
@@ -221,6 +274,8 @@ wakeup_long64:
 
     movw    $0x0e00 + '!', %ds:(0xb801a)
     movq    saved_eip, %rax
+    
+#endif /* __XEN__ */
     jmp *%rax
 
 .code32
@@ -355,7 +410,6 @@ bogus_magic2:
     movw    $0x0e00 + '2', %ds:(0xb8018)
     jmp bogus_magic2
     
-
 wakeup_stack_begin: # Stack grows down
 
 .org    0xff0
@@ -378,6 +432,35 @@ ENTRY(acpi_copy_wakeup_routine)
     pushq   %rcx
     pushq   %rdx
 
+#ifdef __XEN__
+    
+    sgdt    saved_gdt(%rip)
+    sidt    saved_idt(%rip)
+    sldt    saved_ldt(%rip)
+    str saved_tss(%rip)
+    
+    movq    %cr3, %rdx
+    movq    %rdx, saved_cr3(%rip)
+    movq    %cr4, %rdx
+    movq    %rdx, saved_cr4(%rip)
+    movq    %cr0, %rdx
+    movq    %rdx, saved_cr0(%rip)
+    sgdt    real_save_gdt - wakeup_start (,%rdi)
+    movl    $MSR_EFER, %ecx
+    rdmsr
+    movl    %eax, saved_efer(%rip)
+    movl    %edx, saved_efer2(%rip)
+
+    movl    saved_video_mode(%rip), %edx
+    movl    %edx, video_mode - wakeup_start (,%rdi)
+    movl    acpi_video_flags(%rip), %edx
+    movl    %edx, video_flags - wakeup_start (,%rdi)
+    movq    $0x12345678, real_magic - wakeup_start (,%rdi)
+    movq    $0x123456789abcdef0, %rdx
+    movq    %rdx, saved_magic(%rip)
+   
+#else
+    
     sgdt    saved_gdt
     sidt    saved_idt
     sldt    saved_ldt
@@ -415,6 +498,8 @@ ENTRY(acpi_copy_wakeup_routine)
     movq    %rax, %cr0
     jmp 1f      # Flush pipelines
 1:
+#endif /* __XEN__ */
+
     # restore the regs we used
     popq    %rdx
     popq    %rcx
@@ -450,6 +535,19 @@ do_suspend_lowlevel:
     movq %r15, saved_context_r15(%rip)
     pushfq ; popq saved_context_eflags(%rip)
 
+#ifdef __XEN__
+/* Xen did not use large memory mode, so change code to ip relative */
+
+    lea .L97(%rip), %rax
+    movq %rax, saved_eip(%rip)
+    
+    movq %rsp,saved_esp(%rip)
+    movq %rbp,saved_ebp(%rip)
+    movq %rbx,saved_ebx(%rip)
+    movq %rdi,saved_edi(%rip)
+    movq %rsi,saved_esi(%rip)
+
+#else    
     movq    $.L97, saved_eip(%rip)
 
     movq %rsp,saved_esp
@@ -458,6 +556,8 @@ do_suspend_lowlevel:
     movq %rdi,saved_edi
     movq %rsi,saved_esi
 
+#endif /* __XEN__ */
+    
     addq    $8, %rsp
     movl    $3, %edi
     xorl    %eax, %eax
@@ -466,7 +566,11 @@ do_suspend_lowlevel:
     .p2align 4,,7
 .L99:
     .align 4
+#ifdef __XEN__
+    movl $__HYPERVISOR_DS32, %eax
+#else    
     movl    $24, %eax
+#endif    
     movw %ax, %ds
     movq    saved_context+58(%rip), %rax
     movq %rax, %cr4
@@ -525,3 +629,4 @@ saved_cr4:  .quad 0
 saved_cr4:  .quad 0
 saved_efer: .quad 0
 saved_efer2:    .quad 0
+
diff -r 84c103f8881a xen/include/asm-x86/acpi.h
--- a/xen/include/asm-x86/acpi.h        Mon May 14 15:12:50 2007 -0400
+++ b/xen/include/asm-x86/acpi.h        Mon May 14 18:14:26 2007 -0400
@@ -178,4 +178,6 @@ extern u8 x86_acpiid_to_apicid[];
 extern u8 x86_acpiid_to_apicid[];
 #define MAX_LOCAL_APIC 256
 
+#define pmprintk(_l, _f, _a...)                      \
+    printk(_l "<PM>" _f, ## _a )
 #endif /*_ASM_ACPI_H*/
diff -r 84c103f8881a xen/include/asm-x86/config.h
--- a/xen/include/asm-x86/config.h      Mon May 14 15:12:50 2007 -0400
+++ b/xen/include/asm-x86/config.h      Mon May 14 20:49:27 2007 -0400
@@ -367,4 +367,6 @@ extern unsigned long xenheap_phys_end; /
 #define ELFSIZE 32
 #endif
 
+#define FASTCALL(x)    x __attribute__((regparm(3)))
+
 #endif /* __X86_CONFIG_H__ */
diff -r 84c103f8881a xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Mon May 14 15:12:50 2007 -0400
+++ b/xen/include/asm-x86/page.h        Mon May 14 16:34:31 2007 -0400
@@ -287,6 +287,9 @@ extern l2_pgentry_t   idle_pg_table_l2[R
 #else
 extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
 extern l2_pgentry_t   idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES];
+#if CONFIG_PAGING_LEVELS == 4
+extern l3_pgentry_t   idle_pg_table_l3[L3_PAGETABLE_ENTRIES];
+#endif
 #ifdef CONFIG_COMPAT
 extern l2_pgentry_t  *compat_idle_pg_table_l2;
 extern unsigned int   m2p_compat_vstart;
diff -r 84c103f8881a xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Mon May 14 15:12:50 2007 -0400
+++ b/xen/include/asm-x86/processor.h   Mon May 14 16:34:31 2007 -0400
@@ -297,6 +297,11 @@ static inline unsigned long read_cr2(voi
     return __cr2;
 }
 
+static inline void write_cr2(unsigned long val)
+{
+       __asm__("mov %0,%%cr2": :"r" ((unsigned long)val));
+}
+
 static inline unsigned long read_cr4(void)
 {
     unsigned long __cr4;
diff -r 84c103f8881a xen/include/asm-x86/smp.h
--- a/xen/include/asm-x86/smp.h Mon May 14 15:12:50 2007 -0400
+++ b/xen/include/asm-x86/smp.h Mon May 14 20:49:27 2007 -0400
@@ -45,6 +45,7 @@ extern void zap_low_mappings(l2_pgentry_
 extern void zap_low_mappings(l2_pgentry_t *base);
 #endif
 
+extern void init_low_mappings(void);
 #define MAX_APICID 256
 extern u8 x86_cpu_to_apicid[];
 
diff -r 84c103f8881a xen/include/asm-x86/x86_64/suspend.h
--- a/xen/include/asm-x86/x86_64/suspend.h      Mon May 14 15:12:50 2007
-0400
+++ b/xen/include/asm-x86/x86_64/suspend.h      Mon May 14 16:34:31 2007
-0400
@@ -39,11 +39,12 @@ extern unsigned long saved_context_eflag
 extern unsigned long saved_context_eflags;
 
 #define loaddebug(thread,register) \
-    set_debugreg((thread)->debugreg##register, register)
+               __asm__("movq %0,%%db" #register  \
+                       : /* no output */ \
+                       :"r" ((thread)->debugreg##register))
 
 extern void fix_processor_context(void);
 
-#ifdef CONFIG_ACPI_SLEEP
 extern unsigned long saved_eip;
 extern unsigned long saved_esp;
 extern unsigned long saved_ebp;
@@ -53,4 +54,3 @@ extern unsigned long saved_edi;
 
 /* routines for saving/restoring kernel state */
 extern int acpi_save_state_mem(void);
-#endif

Attachment: xen_pm_arch.patch
Description: xen_pm_arch.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.