[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] Deadlocks by p2m_lock and event_lock



Can you give this a try? (Tim, Keir, Jan, if Xudong reports success, can
you please apply?)

Thanks,
Andres

# HG changeset patch
# User Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
# Date 1331737660 14400
# Node ID fe10f0433f6279091c193127d95d4d39b44a72ed
# Parent  5d20d2f6ffed0a49f030f04a8870f1926babbcbf
x86/mm: Fix deadlock between p2m and event channel locks.

The hvm io emulation code holds the p2m lock for the duration of the
emulation, which may include sending an event to qemu. On a separate path,
map_domain_pirq grabs the event channel and p2m locks in opposite order.

Fix this by ensuring liveness of the ram_gfn used by io emulation, with a
page ref.

Reported-by: "Hao, Xudong" <xudong.hao@xxxxxxxxx>
Signed-off-by: "Hao, Xudong" <xudong.hao@xxxxxxxxx>
Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>

diff -r 5d20d2f6ffed -r fe10f0433f62 xen/arch/x86/hvm/emulate.c
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -77,6 +77,17 @@ static int hvmemul_do_io(
         return X86EMUL_RETRY;
     }

+    /* Maintain a ref on the mfn to ensure liveness. Put the gfn
+     * to avoid potential deadlock wrt event channel lock, later. */
+    if ( mfn_valid(mfn_x(ram_mfn)) )
+        if ( !get_page(mfn_to_page(mfn_x(ram_mfn)),
+             curr->domain) )
+        {
+            put_gfn(curr->domain, ram_gfn);
+            return X86EMUL_RETRY;
+        }
+    put_gfn(curr->domain, ram_gfn);
+
     /*
      * Weird-sized accesses have undefined behaviour: we discard writes
      * and read all-ones.
@@ -87,7 +98,8 @@ static int hvmemul_do_io(
         ASSERT(p_data != NULL); /* cannot happen with a REP prefix */
         if ( dir == IOREQ_READ )
             memset(p_data, ~0, size);
-        put_gfn(curr->domain, ram_gfn);
+        if ( mfn_valid(mfn_x(ram_mfn)) )
+            put_page(mfn_to_page(mfn_x(ram_mfn)));
         return X86EMUL_UNHANDLEABLE;
     }

@@ -108,7 +120,8 @@ static int hvmemul_do_io(
             unsigned int bytes = vio->mmio_large_write_bytes;
             if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
             {
-                put_gfn(curr->domain, ram_gfn);
+                if ( mfn_valid(mfn_x(ram_mfn)) )
+                    put_page(mfn_to_page(mfn_x(ram_mfn)));
                 return X86EMUL_OKAY;
             }
         }
@@ -120,7 +133,8 @@ static int hvmemul_do_io(
             {
                 memcpy(p_data, &vio->mmio_large_read[addr - pa],
                        size);
-                put_gfn(curr->domain, ram_gfn);
+                if ( mfn_valid(mfn_x(ram_mfn)) )
+                    put_page(mfn_to_page(mfn_x(ram_mfn)));
                 return X86EMUL_OKAY;
             }
         }
@@ -134,7 +148,8 @@ static int hvmemul_do_io(
         vio->io_state = HVMIO_none;
         if ( p_data == NULL )
         {
-            put_gfn(curr->domain, ram_gfn);
+            if ( mfn_valid(mfn_x(ram_mfn)) )
+                put_page(mfn_to_page(mfn_x(ram_mfn)));
             return X86EMUL_UNHANDLEABLE;
         }
         goto finish_access;
@@ -144,11 +159,13 @@ static int hvmemul_do_io(
              (addr == (vio->mmio_large_write_pa +
                        vio->mmio_large_write_bytes)) )
         {
-            put_gfn(curr->domain, ram_gfn);
+            if ( mfn_valid(mfn_x(ram_mfn)) )
+                put_page(mfn_to_page(mfn_x(ram_mfn)));
             return X86EMUL_RETRY;
         }
     default:
-        put_gfn(curr->domain, ram_gfn);
+        if ( mfn_valid(mfn_x(ram_mfn)) )
+            put_page(mfn_to_page(mfn_x(ram_mfn)));
         return X86EMUL_UNHANDLEABLE;
     }

@@ -156,7 +173,8 @@ static int hvmemul_do_io(
     {
         gdprintk(XENLOG_WARNING, "WARNING: io already pending (%d)?\n",
                  p->state);
-        put_gfn(curr->domain, ram_gfn);
+        if ( mfn_valid(mfn_x(ram_mfn)) )
+            put_page(mfn_to_page(mfn_x(ram_mfn)));
         return X86EMUL_UNHANDLEABLE;
     }

@@ -208,7 +226,8 @@ static int hvmemul_do_io(

     if ( rc != X86EMUL_OKAY )
     {
-        put_gfn(curr->domain, ram_gfn);
+        if ( mfn_valid(mfn_x(ram_mfn)) )
+            put_page(mfn_to_page(mfn_x(ram_mfn)));
         return rc;
     }

@@ -244,7 +263,8 @@ static int hvmemul_do_io(
         }
     }

-    put_gfn(curr->domain, ram_gfn);
+    if ( mfn_valid(mfn_x(ram_mfn)) )
+        put_page(mfn_to_page(mfn_x(ram_mfn)));
     return X86EMUL_OKAY;
 }




> I prefer to the 2nd, I made a patch and testing show it works.
>
> diff -r 5d20d2f6ffed xen/arch/x86/hvm/emulate.c
> --- a/xen/arch/x86/hvm/emulate.c      Fri Mar 09 16:54:24 2012 +0000
> +++ b/xen/arch/x86/hvm/emulate.c      Wed Mar 14 15:11:52 2012 -0400
> @@ -60,20 +60,23 @@
>      ioreq_t *p = get_ioreq(curr);
>      unsigned long ram_gfn = paddr_to_pfn(ram_gpa);
>      p2m_type_t p2mt;
> -    mfn_t ram_mfn;
> +    unsigned long ram_mfn;
>      int rc;
>
>      /* Check for paged out page */
> -    ram_mfn = get_gfn_unshare(curr->domain, ram_gfn, &p2mt);
> +    ram_mfn = mfn_x(get_gfn_unshare(curr->domain, ram_gfn, &p2mt));
> +    if ( mfn_valid(ram_mfn) )
> +        get_page(mfn_to_page(ram_mfn), curr->domain);
> +    put_gfn(curr->domain, ram_gfn);
>      if ( p2m_is_paging(p2mt) )
>      {
> -        put_gfn(curr->domain, ram_gfn);
> +        put_page(mfn_to_page(ram_mfn));
>          p2m_mem_paging_populate(curr->domain, ram_gfn);
>          return X86EMUL_RETRY;
>      }
>      if ( p2m_is_shared(p2mt) )
>      {
> -        put_gfn(curr->domain, ram_gfn);
> +        put_page(mfn_to_page(ram_mfn));
>          return X86EMUL_RETRY;
>      }
>
> @@ -87,7 +90,7 @@
>          ASSERT(p_data != NULL); /* cannot happen with a REP prefix */
>          if ( dir == IOREQ_READ )
>              memset(p_data, ~0, size);
> -        put_gfn(curr->domain, ram_gfn);
> +        put_page(mfn_to_page(ram_mfn));
>          return X86EMUL_UNHANDLEABLE;
>      }
>
> @@ -108,7 +111,7 @@
>              unsigned int bytes = vio->mmio_large_write_bytes;
>              if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
>              {
> -                put_gfn(curr->domain, ram_gfn);
> +                put_page(mfn_to_page(ram_mfn));
>                  return X86EMUL_OKAY;
>              }
>          }
> @@ -120,7 +123,7 @@
>              {
>                  memcpy(p_data, &vio->mmio_large_read[addr - pa],
>                         size);
> -                put_gfn(curr->domain, ram_gfn);
> +                put_page(mfn_to_page(ram_mfn));
>                  return X86EMUL_OKAY;
>              }
>          }
> @@ -134,7 +137,7 @@
>          vio->io_state = HVMIO_none;
>          if ( p_data == NULL )
>          {
> -            put_gfn(curr->domain, ram_gfn);
> +            put_page(mfn_to_page(ram_mfn));
>              return X86EMUL_UNHANDLEABLE;
>          }
>          goto finish_access;
> @@ -144,11 +147,11 @@
>               (addr == (vio->mmio_large_write_pa +
>                         vio->mmio_large_write_bytes)) )
>          {
> -            put_gfn(curr->domain, ram_gfn);
> +            put_page(mfn_to_page(ram_mfn));
>              return X86EMUL_RETRY;
>          }
>      default:
> -        put_gfn(curr->domain, ram_gfn);
> +        put_page(mfn_to_page(ram_mfn));
>          return X86EMUL_UNHANDLEABLE;
>      }
>
> @@ -156,7 +159,7 @@
>      {
>          gdprintk(XENLOG_WARNING, "WARNING: io already pending (%d)?\n",
>                   p->state);
> -        put_gfn(curr->domain, ram_gfn);
> +        put_page(mfn_to_page(ram_mfn));
>          return X86EMUL_UNHANDLEABLE;
>      }
>
> @@ -208,7 +211,7 @@
>
>      if ( rc != X86EMUL_OKAY )
>      {
> -        put_gfn(curr->domain, ram_gfn);
> +        put_page(mfn_to_page(ram_mfn));
>          return rc;
>      }
>
> @@ -244,7 +247,7 @@
>          }
>      }
>
> -    put_gfn(curr->domain, ram_gfn);
> +    put_page(mfn_to_page(ram_mfn));
>      return X86EMUL_OKAY;
>  }
>
>
> Thanks,
> -Xudong
>
>
>> -----Original Message-----
>> From: Andres Lagar-Cavilla [mailto:andres@xxxxxxxxxxxxxxxx]
>> Sent: Wednesday, March 14, 2012 2:46 AM
>> To: Hao, Xudong
>> Cc: Tim Deegan; Keir Fraser; xen-devel@xxxxxxxxxxxxxxxxxxx; Zhang,
>> Xiantao;
>> JBeulich@xxxxxxxx
>> Subject: RE: [Xen-devel] Deadlocks by p2m_lock and event_lock
>>
>> > Hi, Tim and Andres
>> > The patch fix part of this issue. In handle_mmio, function
>> > hvmemul_do_io() is called and p2m lock was held again by calling
>> > get_gfn_unshare(), still trigger a deadlocks.
>>
>> Typically hvmemul_do_io gets the zero gfn, because in many cases that's
>> the
>> 'rma_gpa' it is passed. However, in the case of mmio, and particularly
>> stdvga,
>> ram_gpa is the data to be copied to the framebuffer. So it is in
>> principle ok to
>> get_gfn in hvmemul_do_io.
>>
>> There are two solutions
>> 1. msix_capability_init does not call p2m_change_entry_type_global. See
>> my
>> previous email. If we want to resync the
>> EPT/NPT/traditional/VTD/IOMMU/superduper TLBs, we can just do that
>> explicitly. I hope.
>>
>> 2. hvmemul_do_io does gets a ref on the mfn underlying ram_gpa, and
>> holds
>> that for the critical section, instead of the p2m lock. One way to
>> achieve this is
>>
>>     /* Check for paged out page */
>>     ram_mfn = get_gfn_unshare(curr->domain, ram_gfn, &p2mt);
>>     if ( this or that )
>>     { ... handle ... }
>>     if ( mfn_valid(ram_mfn) )
>>         get_page(mfn_to_page(ram_mfn, curr->domain));
>>     put_gfn(curr->domain, ram_gfn)
>>
>>     /* replace all put_gfn in all exit paths by put_page */
>>
>> This will ensure the target page is live and sane while not holding the
>> p2m lock.
>> Xudong, did that make sense? Do you think you could try that and report
>> back?
>>
>> Thanks!
>> Andres
>>
>> >
>> > (XEN) Xen call trace:
>> > (XEN)    [<ffff82c4801261a3>] _spin_lock+0x1b/0xa8
>> > (XEN)    [<ffff82c4801070d3>] notify_via_xen_event_channel+0x21/0x106
>> > (XEN)    [<ffff82c4801b6883>] hvm_buffered_io_send+0x1f1/0x21b
>> > (XEN)    [<ffff82c4801bbd3a>] stdvga_intercept_mmio+0x491/0x4c7
>> > (XEN)    [<ffff82c4801b5d58>] hvm_io_intercept+0x218/0x244
>> > (XEN)    [<ffff82c4801aa931>] hvmemul_do_io+0x55a/0x716
>> > (XEN)    [<ffff82c4801aab1a>] hvmemul_do_mmio+0x2d/0x2f
>> > (XEN)    [<ffff82c4801ab239>] hvmemul_write+0x181/0x1a2
>> > (XEN)    [<ffff82c4801963f0>] x86_emulate+0xcad3/0xfbdf
>> > (XEN)    [<ffff82c4801a9d2e>] hvm_emulate_one+0x120/0x1af
>> > (XEN)    [<ffff82c4801b63cb>] handle_mmio+0x4e/0x1d1
>> > (XEN)    [<ffff82c4801afd72>] hvm_hap_nested_page_fault+0x210/0x37f
>> > (XEN)    [<ffff82c4801d2419>] vmx_vmexit_handler+0x1523/0x17d0
>> >
>> > Thanks,
>> > -Xudong
>> >
>> >> -----Original Message-----
>> >> From: Tim Deegan [mailto:tim@xxxxxxx]
>> >> Sent: Saturday, March 10, 2012 12:56 AM
>> >> To: Andres Lagar-Cavilla
>> >> Cc: Hao, Xudong; Keir Fraser; xen-devel@xxxxxxxxxxxxxxxxxxx; Zhang,
>> >> Xiantao; JBeulich@xxxxxxxx
>> >> Subject: Re: [Xen-devel] Deadlocks by p2m_lock and event_lock
>> >>
>> >> At 08:29 -0800 on 09 Mar (1331281767), Andres Lagar-Cavilla wrote:
>> >> > >> I don't know about the event lock, but it seems unwise to call
>> >> > >> in to handle_mmio with a gfn lock held.  How about fixing the
>> >> > >> other
>> >> path?
>> >> > >>
>> >> > >> diff -r 04673ecb9d78 xen/arch/x86/hvm/hvm.c
>> >> > >> --- a/xen/arch/x86/hvm/hvm.c Thu Mar 08 16:40:05 2012 +0000
>> >> > >> +++ b/xen/arch/x86/hvm/hvm.c Fri Mar 09 11:15:25 2012 +0000
>> >> > >> @@ -1324,10 +1324,11 @@ int
>> hvm_hap_nested_page_fault(unsigned l
>> >> > >>      if ( (p2mt == p2m_mmio_dm) ||
>> >> > >>           (access_w && (p2mt == p2m_ram_ro)) )
>> >> > >>      {
>> >> > >> +        put_gfn(p2m->domain, gfn);
>> >> > >>          if ( !handle_mmio() )
>> >> > >>              hvm_inject_exception(TRAP_gp_fault, 0, 0);
>> >> > >>          rc = 1;
>> >> > >> -        goto out_put_gfn;
>> >> > >> +        goto out;
>> >> > >>      }
>> >> > >>
>> >> > >>  #ifdef __x86_64__
>> >> > >> @@ -1379,6 +1380,7 @@ int hvm_hap_nested_page_fault(unsigned l
>> >> > >>
>> >> > >>  out_put_gfn:
>> >> > >>      put_gfn(p2m->domain, gfn);
>> >> > >> +out:
>> >> > >>      if ( paged )
>> >> > >>          p2m_mem_paging_populate(v->domain, gfn);
>> >> > >>      if ( req_ptr )
>> >> > >
>> >> > > Yes, that's fine to release the p2m lock earlier than
>> handle_mmio.
>> >> >
>> >> > Ack
>> >>
>> >> OK, applied.
>> >>
>> >> Tim.
>> >
>>
>
>



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.