[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support



> -----Original Message-----
> From: Pasi Kärkkäinen [mailto:pasik@xxxxxx]
> Sent: Wednesday, August 15, 2012 4:19 PM
> To: Hao, Xudong
> Cc: xen-devel@xxxxxxxxxxxxx; ian.jackson@xxxxxxxxxxxxx; Zhang, Xiantao
> Subject: Re: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
> 
> On Wed, Aug 15, 2012 at 02:54:41PM +0800, Xudong Hao wrote:
> > Currently it is assumed PCI device BAR access < 4G memory. If there is such 
> > a
> > device whose BAR size is larger than 4G, it must access > 4G memory
> address.
> > This patch enable the 64bits big BAR support on hvmloader.
> >
> 
> Hello,
> 
> Do you have an example of such a PCI device with >4G BAR?
> 

We have a standard PCE-e device which integrated Intel MIC program, this device 
has 16G size bar. If we want to passthrough such a device to guest, current Xen 
can't support it, so we want Xen has this big bar support and make this patch.

For Intel MIC program technology, you can refer to 
http://openlab.web.cern.ch/sites/openlab.web.cern.ch/files/presentations/KNC_ISA_Overview_Apr2012_SJ_New_V4.pdf
  or 
http://openlab.web.cern.ch/sites/openlab.web.cern.ch/files/presentations/2012.04.25%20Andrzej%20Nowak%20-%20An%20overview%20of%20Intel%20MIC%20-%20technology,%20hardware%20and%20software%20v3.pdf

- Thanks
Xudong

> Thanks,
> 
> -- Pasi
> 
> > Signed-off-by: Xiantao Zhang <xiantao.zhang@xxxxxxxxx>
> > Signed-off-by: Xudong Hao <xudong.hao@xxxxxxxxx>
> >
> > diff -r 663eb766cdde tools/firmware/hvmloader/config.h
> > --- a/tools/firmware/hvmloader/config.h     Tue Jul 24 17:02:04 2012 +0200
> > +++ b/tools/firmware/hvmloader/config.h     Thu Jul 26 15:40:01 2012 +0800
> > @@ -53,6 +53,10 @@ extern struct bios_config ovmf_config;
> >  /* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */
> >  #define PCI_MEM_START       0xf0000000
> >  #define PCI_MEM_END         0xfc000000
> > +#define PCI_HIGH_MEM_START  0xa000000000ULL
> > +#define PCI_HIGH_MEM_END    0xf000000000ULL
> > +#define PCI_MIN_MMIO_ADDR   0x80000000
> > +
> >  extern unsigned long pci_mem_start, pci_mem_end;
> >
> >
> > diff -r 663eb766cdde tools/firmware/hvmloader/pci.c
> > --- a/tools/firmware/hvmloader/pci.c        Tue Jul 24 17:02:04 2012 +0200
> > +++ b/tools/firmware/hvmloader/pci.c        Thu Jul 26 15:40:01 2012 +0800
> > @@ -31,24 +31,33 @@
> >  unsigned long pci_mem_start = PCI_MEM_START;
> >  unsigned long pci_mem_end = PCI_MEM_END;
> >
> > +uint64_t pci_high_mem_start = PCI_HIGH_MEM_START;
> > +uint64_t pci_high_mem_end = PCI_HIGH_MEM_END;
> > +
> >  enum virtual_vga virtual_vga = VGA_none;
> >  unsigned long igd_opregion_pgbase = 0;
> >
> >  void pci_setup(void)
> >  {
> > -    uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd, mmio_total = 0;
> > +    uint8_t is_64bar, using_64bar, bar64_relocate = 0;
> > +    uint32_t devfn, bar_reg, cmd, bar_data, bar_data_upper;
> > +    uint64_t base, bar_sz, bar_sz_upper, mmio_total = 0;
> >      uint32_t vga_devfn = 256;
> >      uint16_t class, vendor_id, device_id;
> >      unsigned int bar, pin, link, isa_irq;
> > +    int64_t mmio_left;
> >
> >      /* Resources assignable to PCI devices via BARs. */
> >      struct resource {
> > -        uint32_t base, max;
> > -    } *resource, mem_resource, io_resource;
> > +        uint64_t base, max;
> > +    } *resource, mem_resource, high_mem_resource, io_resource;
> >
> >      /* Create a list of device BARs in descending order of size. */
> >      struct bars {
> > -        uint32_t devfn, bar_reg, bar_sz;
> > +        uint32_t is_64bar;
> > +        uint32_t devfn;
> > +        uint32_t bar_reg;
> > +        uint64_t bar_sz;
> >      } *bars = (struct bars *)scratch_start;
> >      unsigned int i, nr_bars = 0;
> >
> > @@ -133,23 +142,35 @@ void pci_setup(void)
> >          /* Map the I/O memory and port resources. */
> >          for ( bar = 0; bar < 7; bar++ )
> >          {
> > +            bar_sz_upper = 0;
> >              bar_reg = PCI_BASE_ADDRESS_0 + 4*bar;
> >              if ( bar == 6 )
> >                  bar_reg = PCI_ROM_ADDRESS;
> >
> >              bar_data = pci_readl(devfn, bar_reg);
> > +            is_64bar = !!((bar_data & (PCI_BASE_ADDRESS_SPACE |
> > +                         PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
> > +                         (PCI_BASE_ADDRESS_SPACE_MEMORY |
> > +                         PCI_BASE_ADDRESS_MEM_TYPE_64));
> >              pci_writel(devfn, bar_reg, ~0);
> >              bar_sz = pci_readl(devfn, bar_reg);
> >              pci_writel(devfn, bar_reg, bar_data);
> > +
> > +            if (is_64bar) {
> > +                bar_data_upper = pci_readl(devfn, bar_reg + 4);
> > +                pci_writel(devfn, bar_reg + 4, ~0);
> > +                bar_sz_upper = pci_readl(devfn, bar_reg + 4);
> > +                pci_writel(devfn, bar_reg + 4, bar_data_upper);
> > +                bar_sz = (bar_sz_upper << 32) | bar_sz;
> > +            }
> > +            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
> > +                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
> > +                       0xfffffffffffffff0 :
> > +                       (PCI_BASE_ADDRESS_IO_MASK & 0xffff));
> > +            bar_sz &= ~(bar_sz - 1);
> >              if ( bar_sz == 0 )
> >                  continue;
> >
> > -            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
> > -                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
> > -                       PCI_BASE_ADDRESS_MEM_MASK :
> > -                       (PCI_BASE_ADDRESS_IO_MASK & 0xffff));
> > -            bar_sz &= ~(bar_sz - 1);
> > -
> >              for ( i = 0; i < nr_bars; i++ )
> >                  if ( bars[i].bar_sz < bar_sz )
> >                      break;
> > @@ -157,6 +178,7 @@ void pci_setup(void)
> >              if ( i != nr_bars )
> >                  memmove(&bars[i+1], &bars[i], (nr_bars-i) *
> sizeof(*bars));
> >
> > +            bars[i].is_64bar = is_64bar;
> >              bars[i].devfn   = devfn;
> >              bars[i].bar_reg = bar_reg;
> >              bars[i].bar_sz  = bar_sz;
> > @@ -167,11 +189,8 @@ void pci_setup(void)
> >
> >              nr_bars++;
> >
> > -            /* Skip the upper-half of the address for a 64-bit BAR. */
> > -            if ( (bar_data & (PCI_BASE_ADDRESS_SPACE |
> > -
> PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
> > -                 (PCI_BASE_ADDRESS_SPACE_MEMORY |
> > -                  PCI_BASE_ADDRESS_MEM_TYPE_64) )
> > +            /*The upper half is already calculated, skip it! */
> > +            if (is_64bar)
> >                  bar++;
> >          }
> >
> > @@ -193,10 +212,14 @@ void pci_setup(void)
> >          pci_writew(devfn, PCI_COMMAND, cmd);
> >      }
> >
> > -    while ( (mmio_total > (pci_mem_end - pci_mem_start)) &&
> > -            ((pci_mem_start << 1) != 0) )
> > +    while ( mmio_total > (pci_mem_end - pci_mem_start) &&
> pci_mem_start )
> >          pci_mem_start <<= 1;
> >
> > +    if (!pci_mem_start) {
> > +        bar64_relocate = 1;
> > +        pci_mem_start = PCI_MIN_MMIO_ADDR;
> > +    }
> > +
> >      /* Relocate RAM that overlaps PCI space (in 64k-page chunks). */
> >      while ( (pci_mem_start >> PAGE_SHIFT) <
> hvm_info->low_mem_pgend )
> >      {
> > @@ -218,11 +241,15 @@ void pci_setup(void)
> >          hvm_info->high_mem_pgend += nr_pages;
> >      }
> >
> > +    high_mem_resource.base = pci_high_mem_start;
> > +    high_mem_resource.max = pci_high_mem_end;
> >      mem_resource.base = pci_mem_start;
> >      mem_resource.max = pci_mem_end;
> >      io_resource.base = 0xc000;
> >      io_resource.max = 0x10000;
> >
> > +    mmio_left = pci_mem_end - pci_mem_end;
> > +
> >      /* Assign iomem and ioport resources in descending order of size. */
> >      for ( i = 0; i < nr_bars; i++ )
> >      {
> > @@ -230,13 +257,21 @@ void pci_setup(void)
> >          bar_reg = bars[i].bar_reg;
> >          bar_sz  = bars[i].bar_sz;
> >
> > +        using_64bar = bars[i].is_64bar && bar64_relocate && (mmio_left
> < bar_sz);
> >          bar_data = pci_readl(devfn, bar_reg);
> >
> >          if ( (bar_data & PCI_BASE_ADDRESS_SPACE) ==
> >               PCI_BASE_ADDRESS_SPACE_MEMORY )
> >          {
> > -            resource = &mem_resource;
> > -            bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> > +            if (using_64bar) {
> > +                resource = &high_mem_resource;
> > +                bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> > +            }
> > +            else {
> > +                resource = &mem_resource;
> > +                bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> > +            }
> > +            mmio_left -= bar_sz;
> >          }
> >          else
> >          {
> > @@ -244,13 +279,14 @@ void pci_setup(void)
> >              bar_data &= ~PCI_BASE_ADDRESS_IO_MASK;
> >          }
> >
> > -        base = (resource->base + bar_sz - 1) & ~(bar_sz - 1);
> > -        bar_data |= base;
> > +        base = (resource->base  + bar_sz - 1) & ~(uint64_t)(bar_sz - 1);
> > +        bar_data |= (uint32_t)base;
> > +        bar_data_upper = (uint32_t)(base >> 32);
> >          base += bar_sz;
> >
> >          if ( (base < resource->base) || (base > resource->max) )
> >          {
> > -            printf("pci dev %02x:%x bar %02x size %08x: no space for "
> > +            printf("pci dev %02x:%x bar %02x size %llx: no space for "
> >                     "resource!\n", devfn>>3, devfn&7, bar_reg,
> bar_sz);
> >              continue;
> >          }
> > @@ -258,7 +294,9 @@ void pci_setup(void)
> >          resource->base = base;
> >
> >          pci_writel(devfn, bar_reg, bar_data);
> > -        printf("pci dev %02x:%x bar %02x size %08x: %08x\n",
> > +        if (using_64bar)
> > +            pci_writel(devfn, bar_reg + 4, bar_data_upper);
> > +        printf("pci dev %02x:%x bar %02x size %llx: %08x\n",
> >                 devfn>>3, devfn&7, bar_reg, bar_sz, bar_data);
> >
> >          /* Now enable the memory or I/O mapping. */
> >
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@xxxxxxxxxxxxx
> > http://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.