[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] Merge



# HG changeset patch
# User Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
# Date 1330620516 0
# Node ID 08392a9cde1e858644ecce6a4ee0dea0dd4a5968
# Parent  01d95561350bd249fa0fe67741b3eacea66ee153
# Parent  3ea51ace058283c2db577d85d05fd680e321f84d
Merge
---


diff -r 01d95561350b -r 08392a9cde1e .hgignore
--- a/.hgignore Thu Mar 01 16:41:56 2012 +0000
+++ b/.hgignore Thu Mar 01 16:48:36 2012 +0000
@@ -202,6 +202,7 @@
 ^tools/misc/xenperf$
 ^tools/misc/xenpm$
 ^tools/misc/xen-hvmctx$
+^tools/misc/xen-lowmemd$
 ^tools/misc/gtraceview$
 ^tools/misc/gtracestat$
 ^tools/misc/xenlockprof$
diff -r 01d95561350b -r 08392a9cde1e tools/libxc/xc_cpufeature.h
--- a/tools/libxc/xc_cpufeature.h       Thu Mar 01 16:41:56 2012 +0000
+++ b/tools/libxc/xc_cpufeature.h       Thu Mar 01 16:48:36 2012 +0000
@@ -129,10 +129,12 @@
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx) */
 #define X86_FEATURE_FSGSBASE     0 /* {RD,WR}{FS,GS}BASE instructions */
 #define X86_FEATURE_BMI1         3 /* 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE          4 /* Hardware Lock Elision */
 #define X86_FEATURE_AVX2         5 /* AVX2 instructions */
 #define X86_FEATURE_SMEP         7 /* Supervisor Mode Execution Protection */
 #define X86_FEATURE_BMI2         8 /* 2nd group bit manipulation extensions */
 #define X86_FEATURE_ERMS         9 /* Enhanced REP MOVSB/STOSB */
 #define X86_FEATURE_INVPCID     10 /* Invalidate Process Context ID */
+#define X86_FEATURE_RTM         11 /* Restricted Transactional Memory */
 
 #endif /* __LIBXC_CPUFEATURE_H */
diff -r 01d95561350b -r 08392a9cde1e tools/libxc/xc_cpuid_x86.c
--- a/tools/libxc/xc_cpuid_x86.c        Thu Mar 01 16:41:56 2012 +0000
+++ b/tools/libxc/xc_cpuid_x86.c        Thu Mar 01 16:48:36 2012 +0000
@@ -363,11 +363,13 @@
     case 0x00000007: /* Intel-defined CPU features */
         if ( input[1] == 0 ) {
             regs[1] &= (bitmaskof(X86_FEATURE_BMI1) |
+                        bitmaskof(X86_FEATURE_HLE)  |
                         bitmaskof(X86_FEATURE_AVX2) |
                         bitmaskof(X86_FEATURE_SMEP) |
                         bitmaskof(X86_FEATURE_BMI2) |
                         bitmaskof(X86_FEATURE_ERMS) |
                         bitmaskof(X86_FEATURE_INVPCID) |
+                        bitmaskof(X86_FEATURE_RTM)  |
                         bitmaskof(X86_FEATURE_FSGSBASE));
         } else
             regs[1] = 0;
@@ -496,9 +498,11 @@
     case 0x00000007:
         if ( input[1] == 0 )
             regs[1] &= (bitmaskof(X86_FEATURE_BMI1) |
+                        bitmaskof(X86_FEATURE_HLE)  |
                         bitmaskof(X86_FEATURE_AVX2) |
                         bitmaskof(X86_FEATURE_BMI2) |
                         bitmaskof(X86_FEATURE_ERMS) |
+                        bitmaskof(X86_FEATURE_RTM)  |
                         bitmaskof(X86_FEATURE_FSGSBASE));
         else
             regs[1] = 0;
diff -r 01d95561350b -r 08392a9cde1e tools/misc/Makefile
--- a/tools/misc/Makefile       Thu Mar 01 16:41:56 2012 +0000
+++ b/tools/misc/Makefile       Thu Mar 01 16:48:36 2012 +0000
@@ -5,11 +5,12 @@
 
 CFLAGS += $(CFLAGS_libxenctrl)
 CFLAGS += $(CFLAGS_xeninclude)
+CFLAGS += $(CFLAGS_libxenstore)
 
 HDRS     = $(wildcard *.h)
 
 TARGETS-y := xenperf xenpm xen-tmem-list-parse gtraceview gtracestat 
xenlockprof xenwatchdogd
-TARGETS-$(CONFIG_X86) += xen-detect xen-hvmctx xen-hvmcrash
+TARGETS-$(CONFIG_X86) += xen-detect xen-hvmctx xen-hvmcrash xen-lowmemd
 TARGETS-$(CONFIG_MIGRATE) += xen-hptool
 TARGETS := $(TARGETS-y)
 
@@ -21,7 +22,7 @@
 INSTALL_BIN-$(CONFIG_X86) += xen-detect
 INSTALL_BIN := $(INSTALL_BIN-y)
 
-INSTALL_SBIN-y := xm xen-bugtool xen-python-path xend xenperf xsview xenpm 
xen-tmem-list-parse gtraceview gtracestat xenlockprof xenwatchdogd xen-ringwatch
+INSTALL_SBIN-y := xm xen-bugtool xen-python-path xend xenperf xsview xenpm 
xen-tmem-list-parse gtraceview gtracestat xenlockprof xenwatchdogd 
xen-ringwatch xen-lowmemd
 INSTALL_SBIN-$(CONFIG_X86) += xen-hvmctx xen-hvmcrash
 INSTALL_SBIN-$(CONFIG_MIGRATE) += xen-hptool
 INSTALL_SBIN := $(INSTALL_SBIN-y)
@@ -70,6 +71,9 @@
 xenwatchdogd: xenwatchdogd.o
        $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS_libxenctrl) $(APPEND_LDFLAGS)
 
+xen-lowmemd: xen-lowmemd.o
+       $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS_libxenctrl) $(LDLIBS_libxenstore) 
$(APPEND_LDFLAGS)
+
 gtraceview: gtraceview.o
        $(CC) $(LDFLAGS) -o $@ $< $(CURSES_LIBS) $(APPEND_LDFLAGS)
 
diff -r 01d95561350b -r 08392a9cde1e tools/misc/xen-lowmemd.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/misc/xen-lowmemd.c  Thu Mar 01 16:48:36 2012 +0000
@@ -0,0 +1,148 @@
+/*
+ * xen-lowmemd: demo VIRQ_ENOMEM
+ * Andres Lagar-Cavilla (GridCentric Inc.)
+ */
+
+#include <stdio.h>
+#include <xenctrl.h>
+#include <xs.h>
+#include <stdlib.h>
+#include <string.h>
+
+static evtchn_port_t virq_port      = -1;
+static xc_evtchn *xce_handle        = NULL;
+static xc_interface *xch            = NULL;
+static struct xs_handle *xs_handle  = NULL;
+
+void cleanup(void)
+{
+    if (virq_port > -1)
+        xc_evtchn_unbind(xce_handle, virq_port);
+    if (xce_handle)
+        xc_evtchn_close(xce_handle);
+    if (xch)
+        xc_interface_close(xch);
+    if (xs_handle)
+        xs_daemon_close(xs_handle);
+}
+
+/* Never shrink dom0 below 1 GiB */
+#define DOM0_FLOOR  (1 << 30)
+#define DOM0_FLOOR_PG   ((DOM0_FLOOR) >> 12)
+
+/* Act if free memory is less than 92 MiB */
+#define THRESHOLD   (92 << 20)
+#define THRESHOLD_PG    ((THRESHOLD) >> 12)
+
+#define BUFSZ 512
+void handle_low_mem(void)
+{
+    xc_dominfo_t  dom0_info;
+    xc_physinfo_t info;
+    unsigned long long free_pages, dom0_pages, diff, dom0_target;
+    char data[BUFSZ], error[BUFSZ];
+
+    if (xc_physinfo(xch, &info) < 0)
+    {
+        perror("Getting physinfo failed");
+        return;
+    }
+
+    free_pages = (unsigned long long) info.free_pages;
+    printf("Available free pages: 0x%llx:%llux\n",
+            free_pages, free_pages);
+
+    /* Don't do anything if we have more than the threshold free */
+    if ( free_pages >= THRESHOLD_PG )
+        return;
+    diff = THRESHOLD_PG - free_pages; 
+
+    if (xc_domain_getinfo(xch, 0, 1, &dom0_info) < 1)
+    {
+        perror("Failed to get dom0 info");
+        return;
+    }
+
+    dom0_pages = (unsigned long long) dom0_info.nr_pages;
+    printf("Dom0 pages: 0x%llx:%llu\n", dom0_pages, dom0_pages);
+    dom0_target = dom0_pages - diff;
+    if (dom0_target <= DOM0_FLOOR_PG)
+        return;
+
+    printf("Shooting for dom0 target 0x%llx:%llu\n", 
+            dom0_target, dom0_target);
+
+    snprintf(data, BUFSZ, "%llu", dom0_target);
+    if (!xs_write(xs_handle, XBT_NULL, 
+            "/local/domain/0/memory/target", data, strlen(data)))
+    {
+        snprintf(error, BUFSZ,"Failed to write target %s to xenstore", data);
+        perror(error);
+    }
+}
+
+int main(int argc, char *argv[])
+{
+    int rc;
+
+    atexit(cleanup);
+
+       xch = xc_interface_open(NULL, NULL, 0);
+       if (xch == NULL)
+    {
+        perror("Failed to open xc interface");
+        return 1;
+    }
+
+       xce_handle = xc_evtchn_open(NULL, 0);
+       if (xce_handle == NULL)
+    {
+        perror("Failed to open evtchn device");
+        return 2;
+    }
+
+    xs_handle = xs_daemon_open();
+    if (xs_handle == NULL)
+    {
+        perror("Failed to open xenstore connection");
+        return 3;
+    }
+
+       if ((rc = xc_evtchn_bind_virq(xce_handle, VIRQ_ENOMEM)) == -1)
+    {
+        perror("Failed to bind to domain exception virq port");
+        return 4;
+    }
+
+    virq_port = rc;
+    
+    while(1)
+    {
+        evtchn_port_t port;
+
+        if ((port = xc_evtchn_pending(xce_handle)) == -1)
+        {
+            perror("Failed to listen for pending event channel");
+            return 5;
+        }
+
+        if (port != virq_port)
+        {
+            char data[BUFSZ];
+            snprintf(data, BUFSZ, "Wrong port, got %d expected %d", port, 
virq_port);
+            perror(data);
+            return 6;
+        }
+
+        if (xc_evtchn_unmask(xce_handle, port) == -1)
+        {
+            perror("Failed to unmask port");
+            return 7;
+        }
+
+        printf("Got a virq kick, time to get work\n");
+        handle_low_mem();
+    }
+
+    return 0;
+}
diff -r 01d95561350b -r 08392a9cde1e xen/arch/x86/hvm/rtc.c
--- a/xen/arch/x86/hvm/rtc.c    Thu Mar 01 16:41:56 2012 +0000
+++ b/xen/arch/x86/hvm/rtc.c    Thu Mar 01 16:48:36 2012 +0000
@@ -33,6 +33,8 @@
 #define vrtc_domain(x) (container_of((x), struct domain, \
                                      arch.hvm_domain.pl_time.vrtc))
 #define vrtc_vcpu(x)   (pt_global_vcpu_target(vrtc_domain(x)))
+#define epoch_year     1900
+#define get_year(x)    (x + epoch_year)
 
 static void rtc_periodic_cb(struct vcpu *v, void *opaque)
 {
@@ -165,7 +167,7 @@
       
     ASSERT(spin_is_locked(&s->lock));
 
-    before = mktime(tm->tm_year, tm->tm_mon + 1, tm->tm_mday,
+    before = mktime(get_year(tm->tm_year), tm->tm_mon + 1, tm->tm_mday,
                    tm->tm_hour, tm->tm_min, tm->tm_sec);
     
     tm->tm_sec = from_bcd(s, s->hw.cmos_data[RTC_SECONDS]);
@@ -179,7 +181,7 @@
     tm->tm_mon = from_bcd(s, s->hw.cmos_data[RTC_MONTH]) - 1;
     tm->tm_year = from_bcd(s, s->hw.cmos_data[RTC_YEAR]) + 100;
 
-    after = mktime(tm->tm_year, tm->tm_mon + 1, tm->tm_mday,
+    after = mktime(get_year(tm->tm_year), tm->tm_mon + 1, tm->tm_mday,
                    tm->tm_hour, tm->tm_min, tm->tm_sec);
 
     /* We use the guest's setting of the RTC to define the local-time 
@@ -257,7 +259,7 @@
                 if ( (unsigned)tm->tm_wday >= 7 )
                     tm->tm_wday = 0;
                 days_in_month = get_days_in_month(tm->tm_mon, 
-                                                  tm->tm_year + 1900);
+                                                  get_year(tm->tm_year));
                 tm->tm_mday++;
                 if ( tm->tm_mday < 1 )
                 {
diff -r 01d95561350b -r 08392a9cde1e xen/common/grant_table.c
--- a/xen/common/grant_table.c  Thu Mar 01 16:41:56 2012 +0000
+++ b/xen/common/grant_table.c  Thu Mar 01 16:48:36 2012 +0000
@@ -585,6 +585,8 @@
             act->start = 0;
             act->length = PAGE_SIZE;
             act->is_sub_page = 0;
+            act->trans_domain = rd;
+            act->trans_gref = op->ref;
         }
     }
 
diff -r 01d95561350b -r 08392a9cde1e xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Thu Mar 01 16:41:56 2012 +0000
+++ b/xen/common/page_alloc.c   Thu Mar 01 16:48:36 2012 +0000
@@ -35,6 +35,7 @@
 #include <xen/perfc.h>
 #include <xen/numa.h>
 #include <xen/nodemask.h>
+#include <xen/event.h>
 #include <xen/tmem.h>
 #include <xen/tmem_xen.h>
 #include <public/sysctl.h>
@@ -300,6 +301,107 @@
     return needed;
 }
 
+/* Default to 64 MiB */
+#define DEFAULT_LOW_MEM_VIRQ    (((paddr_t) 64)   << 20)
+#define MAX_LOW_MEM_VIRQ        (((paddr_t) 1024) << 20)
+
+static paddr_t __read_mostly opt_low_mem_virq = ((paddr_t) -1);
+size_param("low_mem_virq_limit", opt_low_mem_virq);
+
+/* Thresholds to control hysteresis. In pages */
+/* When memory grows above this threshold, reset hysteresis.
+ * -1 initially to not reset until at least one virq issued. */
+static unsigned long low_mem_virq_high      = -1UL;
+/* Threshold at which we issue virq */
+static unsigned long low_mem_virq_th        = 0;
+/* Original threshold after all checks completed */
+static unsigned long low_mem_virq_orig      = 0;
+/* Order for current threshold */
+static unsigned int  low_mem_virq_th_order  = 0;
+
+/* Perform bootstrapping checks and set bounds */
+static void __init setup_low_mem_virq(void)
+{
+    unsigned int order;
+    paddr_t threshold;
+    bool_t halve;
+
+    /* If the user specifies zero, then he/she doesn't want this virq
+     * to ever trigger. */
+    if ( opt_low_mem_virq == 0 )
+    {
+        low_mem_virq_th = -1UL;
+        return;
+    }
+
+    /* If the user did not specify a knob, remember that */
+    halve = (opt_low_mem_virq == ((paddr_t) -1));
+    threshold = halve ? DEFAULT_LOW_MEM_VIRQ : opt_low_mem_virq;
+
+    /* Dom0 has already been allocated by now. So check we won't be
+     * complaining immediately with whatever's left of the heap. */
+    threshold = min(threshold,
+                    ((paddr_t) total_avail_pages) << PAGE_SHIFT);
+
+    /* Then, cap to some predefined maximum */
+    threshold = min(threshold, MAX_LOW_MEM_VIRQ);
+
+    /* If the user specified no knob, and we are at the current available
+     * level, halve the threshold. */
+    if ( halve &&
+         (threshold == (((paddr_t) total_avail_pages) << PAGE_SHIFT)) )
+        threshold >>= 1;
+
+    /* Zero? Have to fire immediately */
+    threshold = max(threshold, (paddr_t) PAGE_SIZE);
+
+    /* Threshold bytes -> pages */
+    low_mem_virq_th = threshold >> PAGE_SHIFT;
+
+    /* Next, round the threshold down to the next order */
+    order = get_order_from_pages(low_mem_virq_th);
+    if ( (1UL << order) > low_mem_virq_th )
+        order--;
+
+    /* Set bounds, ready to go */
+    low_mem_virq_th = low_mem_virq_orig = 1UL << order;
+    low_mem_virq_th_order = order;
+
+    printk("Initial low memory virq threshold set at 0x%lx pages.\n",
+            low_mem_virq_th);
+}
+
+static void check_low_mem_virq(void)
+{
+    if ( unlikely(total_avail_pages <= low_mem_virq_th) )
+    {
+        send_global_virq(VIRQ_ENOMEM);
+
+        /* Update thresholds. Next warning will be when we drop below
+         * next order. However, we wait until we grow beyond one
+         * order above us to complain again at the current order */
+        low_mem_virq_high   = 1UL << (low_mem_virq_th_order + 1);
+        if ( low_mem_virq_th_order > 0 )
+            low_mem_virq_th_order--;
+        low_mem_virq_th     = 1UL << low_mem_virq_th_order;
+        return;
+    }
+
+    if ( unlikely(total_avail_pages >= low_mem_virq_high) )
+    {
+        /* Reset hysteresis. Bring threshold up one order.
+         * If we are back where originally set, set high
+         * threshold to -1 to avoid further growth of
+         * virq threshold. */
+        low_mem_virq_th_order++;
+        low_mem_virq_th = 1UL << low_mem_virq_th_order;
+        if ( low_mem_virq_th == low_mem_virq_orig )
+            low_mem_virq_high = -1UL;
+        else
+            low_mem_virq_high = 1UL << (low_mem_virq_th_order + 2);
+    }
+}
+
 /* Allocate 2^@order contiguous pages. */
 static struct page_info *alloc_heap_pages(
     unsigned int zone_lo, unsigned int zone_hi,
@@ -420,6 +522,8 @@
     total_avail_pages -= request;
     ASSERT(total_avail_pages >= 0);
 
+    check_low_mem_virq();
+
     if ( d != NULL )
         d->last_alloc_node = node;
 
@@ -1022,6 +1126,10 @@
     }
 
     printk("done.\n");
+
+    /* Now that the heap is initialized, run checks and set bounds
+     * for the low mem virq algorithm. */
+    setup_low_mem_virq();
 }
 
 
diff -r 01d95561350b -r 08392a9cde1e xen/drivers/passthrough/amd/iommu_init.c
--- a/xen/drivers/passthrough/amd/iommu_init.c  Thu Mar 01 16:41:56 2012 +0000
+++ b/xen/drivers/passthrough/amd/iommu_init.c  Thu Mar 01 16:48:36 2012 +0000
@@ -367,6 +367,8 @@
     u32 tail, head, *entry, tail_offest, head_offset;
 
     BUG_ON(!iommu || ((log != &iommu->event_log) && (log != &iommu->ppr_log)));
+    
+    spin_lock(&log->lock);
 
     /* make sure there's an entry in the log */
     tail_offest = ( log == &iommu->event_log ) ?
@@ -396,6 +398,8 @@
         writel(head, iommu->mmio_base + head_offset);
     }
 
+    spin_unlock(&log->lock);
+   
     return 0;
 }
 
@@ -618,11 +622,11 @@
     u32 entry;
     unsigned long flags;
 
-    spin_lock_irqsave(&iommu->lock, flags);
-
     iommu_read_log(iommu, &iommu->event_log,
                    sizeof(event_entry_t), parse_event_log_entry);
 
+    spin_lock_irqsave(&iommu->lock, flags);
+    
     /*check event overflow */
     entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
@@ -651,14 +655,10 @@
     bus = PCI_BUS(device_id);
     devfn = PCI_DEVFN2(device_id);
 
-    local_irq_enable();
-
     spin_lock(&pcidevs_lock);
     pdev = pci_get_pdev(iommu->seg, bus, devfn);
     spin_unlock(&pcidevs_lock);
 
-    local_irq_disable();
-
     if ( pdev == NULL )
         return;
 
@@ -672,10 +672,10 @@
     u32 entry;
     unsigned long flags;
 
-    spin_lock_irqsave(&iommu->lock, flags);
-
     iommu_read_log(iommu, &iommu->ppr_log,
                    sizeof(ppr_entry_t), parse_ppr_log_entry);
+    
+    spin_lock_irqsave(&iommu->lock, flags);
 
     /*check event overflow */
     entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
@@ -852,6 +852,8 @@
     ring_buf->head = 0;
     ring_buf->tail = 0;
 
+    spin_lock_init(&ring_buf->lock);
+    
     ring_buf->alloc_size = PAGE_SIZE << get_order_from_bytes(entries *
                                                              entry_size);
     ring_buf->entries = ring_buf->alloc_size / entry_size;
diff -r 01d95561350b -r 08392a9cde1e xen/include/asm-x86/amd-iommu.h
--- a/xen/include/asm-x86/amd-iommu.h   Thu Mar 01 16:41:56 2012 +0000
+++ b/xen/include/asm-x86/amd-iommu.h   Thu Mar 01 16:48:36 2012 +0000
@@ -65,6 +65,7 @@
     unsigned long alloc_size;
     uint32_t tail;
     uint32_t head;
+    spinlock_t lock;    /* protect buffer pointers */
 };
 
 typedef struct iommu_cap {
diff -r 01d95561350b -r 08392a9cde1e xen/include/public/xen.h
--- a/xen/include/public/xen.h  Thu Mar 01 16:41:56 2012 +0000
+++ b/xen/include/public/xen.h  Thu Mar 01 16:48:36 2012 +0000
@@ -158,6 +158,7 @@
 #define VIRQ_PCPU_STATE 9  /* G. (DOM0) PCPU state changed                   */
 #define VIRQ_MEM_EVENT  10 /* G. (DOM0) A memory event has occured           */
 #define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient                     */
+#define VIRQ_ENOMEM     12 /* G. (DOM0) Low on heap memory       */
 
 /* Architecture-specific VIRQ definitions. */
 #define VIRQ_ARCH_0    16

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.