[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] Add VMDq support to ixgbe
This patch adds experimental VMDq support (AKA Netchannel2 vmq) to the ixgbe driver. This applies to the Netchannel2 tree, and should NOT be applied to the "normal" development tree. To enable VMDq functionality, load the driver with the command-line parameter VMDQ=<num queues>, as in: $ modprobe ixgbe VMDQ=8 You can then set up PV domains to use the device by modifying your VM configuration file from vif = [ '<whatever>' ] to vif2 = [ 'pdev=<netdev>' ] where <netdev> is the interface name for your 82598 board, e.g peth0 in dom0. The Netchannel2 code is VERY experimental at this stage and should not be used in production environments. This patch is intended to support further development and testing efforts. Signed-off-by: Mitch Williams <mitch.a.williams@xxxxxxxxx> diff -urpN -X dontdiff a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h --- a/drivers/net/ixgbe/ixgbe.h 2009-01-23 11:27:18.000000000 -0800 +++ b/drivers/net/ixgbe/ixgbe.h 2009-01-23 11:27:34.000000000 -0800 @@ -35,6 +35,9 @@ #include <linux/pci.h> #include <linux/netdevice.h> #include <linux/vmalloc.h> +#ifdef CONFIG_XEN_NETDEV2_BACKEND +#include <linux/netvmq.h> +#endif #ifdef SIOCETHTOOL #include <linux/ethtool.h> @@ -224,6 +227,9 @@ struct ixgbe_ring { #endif u16 work_limit; /* max work per interrupt */ u16 rx_buf_len; + u8 mac_addr[ETH_ALEN]; + u8 active; + u8 allocated; }; #define RING_F_DCB 0 @@ -417,6 +423,10 @@ struct ixgbe_adapter { unsigned int lro_flushed; unsigned int lro_no_desc; #endif +#ifdef CONFIG_XEN_NETDEV2_BACKEND + struct net_vmq *vmq; + u32 rx_queues_allocated; +#endif unsigned int tx_ring_count; unsigned int rx_ring_count; diff -urpN -X dontdiff a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c --- a/drivers/net/ixgbe/ixgbe_main.c 2009-01-23 11:27:18.000000000 -0800 +++ b/drivers/net/ixgbe/ixgbe_main.c 2009-01-26 11:24:10.000000000 -0800 @@ -66,7 +66,7 @@ static const char ixgbe_driver_string[] #define DRIVERNAPI "-NAPI" #endif -#define DRV_VERSION "1.3.56.5" DRIVERNAPI DRV_HW_PERF +#define DRV_VERSION "1.3.56.5-vmq" DRIVERNAPI DRV_HW_PERF const char ixgbe_driver_version[] = DRV_VERSION; static char ixgbe_copyright[] = "Copyright (c) 1999-2008 Intel Corporation."; /* ixgbe_pci_tbl - PCI Device ID Table @@ -431,6 +431,17 @@ static void ixgbe_receive_skb(struct ixg bool is_vlan = (status & IXGBE_RXD_STAT_VP); u16 tag = le16_to_cpu(rx_desc->wb.upper.vlan); +#ifdef CONFIG_XEN_NETDEV2_BACKEND + if(ring->queue_index) { + /* This is a VMDq packet destined for a VM. */ + vmq_netif_rx(skb, ring->queue_index); + return; + } + else { + netif_rx(skb); + return; + } +#endif #ifndef IXGBE_NO_INET_LRO if (adapter->netdev->features & NETIF_F_LRO && skb->ip_summed == CHECKSUM_UNNECESSARY) { @@ -511,6 +522,10 @@ static inline void ixgbe_rx_checksum(str /* It must be a TCP or UDP packet with a valid checksum */ skb->ip_summed = CHECKSUM_UNNECESSARY; adapter->hw_csum_rx_good++; + +#ifdef CONFIG_XEN_NETDEV2_BACKEND + skb->proto_data_valid = 1; +#endif } /** @@ -554,13 +569,33 @@ static void ixgbe_alloc_rx_buffers(struc } if (!bi->skb) { - struct sk_buff *skb = netdev_alloc_skb(adapter->netdev, - bufsz); + struct sk_buff *skb; +#ifdef CONFIG_XEN_NETDEV2_BACKEND + if ((adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) && + rx_ring->queue_index) { + skb = vmq_alloc_skb(adapter->netdev, + rx_ring->queue_index, + bufsz); + if (!skb) { + adapter->alloc_rx_buff_failed++; + goto no_buffers; + } + bi->skb = skb; + bi->dma = pci_map_page(pdev, + skb_shinfo(skb)->frags[0].page, + skb_shinfo(skb)->frags[0].page_offset, + skb_shinfo(skb)->frags[0].size, + PCI_DMA_FROMDEVICE); + } else { +#endif + skb = netdev_alloc_skb(adapter->netdev, bufsz); - if (!skb) { - adapter->alloc_rx_buff_failed++; - goto no_buffers; - } + if (!skb) { + adapter->alloc_rx_buff_failed++; + goto no_buffers; + } + + skb->dev = adapter->netdev; /* * Make buffer alignment 2 beyond a 16 byte boundary @@ -572,7 +607,11 @@ static void ixgbe_alloc_rx_buffers(struc bi->skb = skb; bi->dma = pci_map_single(pdev, skb->data, bufsz, PCI_DMA_FROMDEVICE); +#ifdef CONFIG_XEN_NETDEV2_BACKEND + } +#endif } + /* Refresh the desc even if buffer_addrs didn't change because * each write-back erases this info. */ if (adapter->flags & IXGBE_FLAG_RX_PS_ENABLED) { @@ -1019,9 +1058,23 @@ static bool ixgbe_clean_rx_irq(struct ix cleaned = true; skb = rx_buffer_info->skb; - prefetch(skb->data - NET_IP_ALIGN); rx_buffer_info->skb = NULL; - +#ifdef CONFIG_XEN_NETDEV2_BACKEND + if(!rx_ring->queue_index || !skb_shinfo(skb)->nr_frags) { + prefetch(skb->data - NET_IP_ALIGN); + } else { + /* for Xen VMDq, packet data goes in first page of + * skb, instead of data. + */ + // TODO this is broke for jumbos > 4k + pci_unmap_page(pdev, rx_buffer_info->dma, + PAGE_SIZE, PCI_DMA_FROMDEVICE); + skb->len += len; + skb_shinfo(skb)->frags[0].size = len; + } +#else + prefetch(skb->data - NET_IP_ALIGN); +#endif if (len && !skb_shinfo(skb)->nr_frags) { pci_unmap_single(pdev, rx_buffer_info->dma, rx_ring->rx_buf_len + NET_IP_ALIGN, @@ -1081,8 +1134,11 @@ static bool ixgbe_clean_rx_irq(struct ix /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; total_rx_packets++; +#ifdef CONFIG_XEN_NETDEV2_BACKEND + if(!rx_ring->queue_index) +#endif + skb->protocol = eth_type_trans(skb, adapter->netdev); - skb->protocol = eth_type_trans(skb, adapter->netdev); #ifndef IXGBE_NO_LRO if (ixgbe_lro_ring_queue(rx_ring->lrolist, adapter, skb, staterr, rx_ring, rx_desc) == 0) { @@ -1475,6 +1531,8 @@ static irqreturn_t ixgbe_msix_clean_rx(i r_idx = find_first_bit(q_vector->rxr_idx, adapter->num_rx_queues); for (i = 0; i < q_vector->rxr_count; i++) { rx_ring = &(adapter->rx_ring[r_idx]); + if (!rx_ring->active) + continue; rx_ring->total_bytes = 0; rx_ring->total_packets = 0; #ifndef CONFIG_IXGBE_NAPI @@ -1501,6 +1559,8 @@ static irqreturn_t ixgbe_msix_clean_rx(i r_idx = find_first_bit(q_vector->rxr_idx, adapter->num_rx_queues); rx_ring = &(adapter->rx_ring[r_idx]); + if (!rx_ring->active) + return IRQ_HANDLED; /* disable interrupts on this vector only */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, rx_ring->v_idx); netif_rx_schedule(adapter->netdev, &q_vector->napi); @@ -2217,6 +2277,8 @@ static void ixgbe_configure_rx(struct ix IXGBE_WRITE_REG(hw, IXGBE_RDT(j), 0); adapter->rx_ring[i].head = IXGBE_RDH(j); adapter->rx_ring[i].tail = IXGBE_RDT(j); + +#ifndef CONFIG_XEN_NETDEV2_BACKEND if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) { /* Reserve VMDq set 1 for FCoE, using 3k buffers */ if ((i & adapter->ring_feature[RING_F_VMDQ].mask) == 1) @@ -2226,6 +2288,10 @@ static void ixgbe_configure_rx(struct ix } else { adapter->rx_ring[i].rx_buf_len = rx_buf_len; } +#else + adapter->rx_ring[i].rx_buf_len = rx_buf_len; +#endif /* CONFIG_XEN_NETDEV2_BACKEND */ + #ifndef IXGBE_NO_INET_LRO /* Intitial LRO Settings */ adapter->rx_ring[i].lro_mgr.max_aggr = adapter->lro_max_aggr; @@ -2398,6 +2464,7 @@ static void ixgbe_restore_vlan(struct ix } #endif +#ifndef CONFIG_XEN_NETDEV2_BACKEND /** * compare_ether_oui - Compare two OUIs * @addr1: pointer to a 6 byte array containing an Ethernet address @@ -2426,10 +2493,13 @@ static inline int is_fcoe_ether_addr(con static const u8 fcoe_oui[] = { 0x0e, 0xfc, 0x00 }; return compare_ether_oui(addr, fcoe_oui) == 0; } +#endif /* CONFIG_XEN_NETDEV2_BACKEND */ static u8 *ixgbe_addr_list_itr(struct ixgbe_hw *hw, u8 **mc_addr_ptr, u32 *vmdq) { +#ifndef CONFIG_XEN_NETDEV2_BACKEND struct ixgbe_adapter *adapter = hw->back; +#endif struct dev_mc_list *mc_ptr; u8 *addr = *mc_addr_ptr; *vmdq = 0; @@ -2439,7 +2509,7 @@ static u8 *ixgbe_addr_list_itr(struct ix *mc_addr_ptr = mc_ptr->next->dmi_addr; else *mc_addr_ptr = NULL; - +#ifndef CONFIG_XEN_NETDEV2_BACKEND if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) { /* VMDQ set 1 is used for FCoE */ if (adapter->ring_feature[RING_F_VMDQ].indices) @@ -2459,6 +2529,7 @@ static u8 *ixgbe_addr_list_itr(struct ix IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); } } +#endif return addr; } @@ -2665,8 +2736,9 @@ static void ixgbe_configure(struct ixgbe ixgbe_configure_tx(adapter); ixgbe_configure_rx(adapter); for (i = 0; i < adapter->num_rx_queues; i++) - ixgbe_alloc_rx_buffers(adapter, &adapter->rx_ring[i], - IXGBE_DESC_UNUSED(&adapter->rx_ring[i])); + if (adapter->rx_ring[i].active) + ixgbe_alloc_rx_buffers(adapter, &adapter->rx_ring[i], + IXGBE_DESC_UNUSED(&adapter->rx_ring[i])); } static int ixgbe_up_complete(struct ixgbe_adapter *adapter) @@ -2751,7 +2823,8 @@ static int ixgbe_up_complete(struct ixgb * and HTHRESH=0 descriptors (to minimize latency on fetch), * this also removes a pesky rx_no_buffer_count increment */ rxdctl |= 0x0020; - rxdctl |= IXGBE_RXDCTL_ENABLE; + if (adapter->rx_ring[i].active) + rxdctl |= IXGBE_RXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(j), rxdctl); } /* enable all receives */ @@ -2832,16 +2905,27 @@ static void ixgbe_clean_rx_ring(struct i struct ixgbe_rx_buffer *rx_buffer_info; rx_buffer_info = &rx_ring->rx_buffer_info[i]; + if (rx_buffer_info->skb) { +#ifdef CONFIG_XEN_NETDEV2_BACKEND + if (rx_ring->queue_index) { + pci_unmap_page(pdev, rx_buffer_info->dma, + PAGE_SIZE, + PCI_DMA_FROMDEVICE); + vmq_free_skb(rx_buffer_info->skb, + rx_ring->queue_index); + rx_buffer_info->dma = 0; + } else +#endif + dev_kfree_skb(rx_buffer_info->skb); + rx_buffer_info->skb = NULL; + } + if (rx_buffer_info->dma) { pci_unmap_single(pdev, rx_buffer_info->dma, rx_ring->rx_buf_len + NET_IP_ALIGN, PCI_DMA_FROMDEVICE); rx_buffer_info->dma = 0; } - if (rx_buffer_info->skb) { - dev_kfree_skb(rx_buffer_info->skb); - rx_buffer_info->skb = NULL; - } if (!rx_buffer_info->page) continue; pci_unmap_page(pdev, rx_buffer_info->page_dma, PAGE_SIZE / 2, @@ -3787,6 +3871,19 @@ int ixgbe_setup_rx_resources(struct ixgb rx_ring->work_limit = rx_ring->count / 2; #endif +#ifdef CONFIG_XEN_NETDEV2_BACKEND + if ((adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) && + rx_ring->queue_index) { + rx_ring->active = 0; + rx_ring->allocated = 0; + } else { +#endif + rx_ring->active = 1; + rx_ring->allocated = 1; +#ifdef CONFIG_XEN_NETDEV2_BACKEND + } +#endif + #ifndef IXGBE_NO_LRO ixgbe_lro_ring_init(rx_ring->lrolist, adapter); #endif @@ -3906,6 +4003,9 @@ static int ixgbe_setup_all_rx_resources( DPRINTK(PROBE, ERR, "Allocation for Rx Queue %u failed\n", i); break; } +#ifdef CONFIG_XEN_NETDEV2_BACKEND + adapter->rx_queues_allocated = 0; +#endif return err; } @@ -3949,6 +4049,12 @@ static int ixgbe_change_mtu(struct net_d if ((new_mtu < 68) || (max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE)) return -EINVAL; +#ifdef CONFIG_XEN_NETDEV2_BACKEND + /* Jumbo frames not currently supported in VMDq mode under Xen */ + if ((adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) && + (max_frame > ETH_FRAME_LEN)) + return -EINVAL; +#endif DPRINTK(PROBE, INFO, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); /* must set new MTU before calling down or up */ @@ -4854,6 +4960,191 @@ static int ixgbe_ioctl(struct net_device } #endif + +#ifdef CONFIG_XEN_NETDEV2_BACKEND +int ixgbe_get_avail_queues(struct net_device *netdev, unsigned int queue_type) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + if (queue_type == VMQ_TYPE_RX) + return (adapter->num_rx_queues - adapter->rx_queues_allocated) - 1; + else if (queue_type == VMQ_TYPE_TX) + return 0; + else return 0; +} +int ixgbe_get_vmq_maxsize(struct net_device *netdev) { + return IXGBE_MAX_TXD; +} + +int ixgbe_alloc_vmq_queue(struct net_device *netdev, unsigned int queue_type) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + if (queue_type == VMQ_TYPE_TX) { + return -EINVAL; + } + + if (adapter->rx_queues_allocated >= adapter->num_rx_queues) { + return -EINVAL; + } + else { + int i; + for (i = 1; i < adapter->num_rx_queues; i++) { + if (!adapter->rx_ring[i].allocated) { + adapter->rx_ring[i].allocated = TRUE; + adapter->rx_queues_allocated++; + return i; + } + } + return -EINVAL; + } +} + +int ixgbe_free_vmq_queue(struct net_device *netdev, int queue) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + if (queue >= adapter->num_rx_queues) + return -EINVAL; + + if (!adapter->rx_ring[queue].allocated) { + return -EINVAL; + } + + adapter->rx_ring[queue].allocated = FALSE; + adapter->rx_queues_allocated--; + ixgbe_clean_rx_ring(adapter, &adapter->rx_ring[queue]); + + return 0; +} + +int ixgbe_set_rxqueue_macfilter(struct net_device *netdev, int queue, + u8 *mac_addr) +{ + int err = 0; + u32 rah; + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_ring *rx_ring = &adapter->rx_ring[queue]; + + if ((queue < 0) || (queue > adapter->num_rx_queues)) { + return -EADDRNOTAVAIL; + } + + /* Note: Broadcast address is used to disable the MAC filter*/ + if (!is_valid_ether_addr(mac_addr)) { + + memset(rx_ring->mac_addr, 0xFF, ETH_ALEN); + + /* Clear RAR */ + IXGBE_WRITE_REG(hw, IXGBE_RAL(queue), 0); + IXGBE_WRITE_FLUSH(hw); + IXGBE_WRITE_REG(hw, IXGBE_RAH(queue), 0); + IXGBE_WRITE_FLUSH(hw); + + return -EADDRNOTAVAIL; + } + + /* Store in ring */ + memcpy(rx_ring->mac_addr, mac_addr, ETH_ALEN); + + err = ixgbe_set_rar(&adapter->hw, queue, rx_ring->mac_addr, 1, IXGBE_RAH_AV); + + if (!err) { + /* Set the VIND for the indicated queue's RAR Entry */ + rah = IXGBE_READ_REG(hw, IXGBE_RAH(queue)); + rah &= ~IXGBE_RAH_VIND_MASK; + rah |= (queue << IXGBE_RAH_VIND_SHIFT); + IXGBE_WRITE_REG(hw, IXGBE_RAH(queue), rah); + IXGBE_WRITE_FLUSH(hw); + } + + return err; +} + +int ixgbe_get_vmq_size(struct net_device *netdev, int queue) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + if (queue >= adapter->num_rx_queues) + return -EINVAL; + return adapter->rx_ring[queue].count; +} + +int ixgbe_set_vmq_size(struct net_device *netdev, int queue, int size) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + /* Not implemented yet, so just return count. */ + return adapter->rx_ring[queue].count; +} + +int ixgbe_set_vmq_vlan(struct net_device *netdev, int queue, int vlan_id) +{ + return 0; /* not implemented */ +} + +int ixgbe_vmq_enable(struct net_device *netdev, int queue) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + u32 rxdctl; + + if (queue >= adapter->num_rx_queues) + return -EINVAL; + + if (!adapter->rx_ring[queue].allocated) { + return -EINVAL; + } + adapter->rx_ring[queue].active = 1; + rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(queue)); + rxdctl |= IXGBE_RXDCTL_ENABLE; + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(queue), rxdctl); + IXGBE_WRITE_FLUSH(hw); + ixgbe_alloc_rx_buffers(adapter, + &adapter->rx_ring[queue], + IXGBE_DESC_UNUSED(&adapter->rx_ring[queue])); + return 0; +} +int ixgbe_vmq_disable(struct net_device *netdev, int queue) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + u32 rxdctl; + + if (queue >= adapter->num_rx_queues) + return -EINVAL; + + if (!adapter->rx_ring[queue].allocated) { + return -EINVAL; + } + + adapter->rx_ring[queue].active = 0; + rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(queue)); + rxdctl &= ~IXGBE_RXDCTL_ENABLE; + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(queue), rxdctl); + return 0; +} + +static void ixgbe_setup_vmq(struct ixgbe_adapter *adapter) { + net_vmq_t *vmq; + + vmq = alloc_vmq(adapter->num_rx_queues); + if (vmq) { + vmq->avail_queues = ixgbe_get_avail_queues; + vmq->alloc_queue = ixgbe_alloc_vmq_queue; + vmq->free_queue = ixgbe_free_vmq_queue; + vmq->get_maxsize = ixgbe_get_vmq_maxsize; + vmq->get_size = ixgbe_get_vmq_size; + vmq->set_size = ixgbe_set_vmq_size; + vmq->set_mac = ixgbe_set_rxqueue_macfilter; + vmq->set_vlan = ixgbe_set_vmq_vlan; + vmq->enable = ixgbe_vmq_enable; + vmq->disable = ixgbe_vmq_disable; + vmq->nvmq = adapter->num_rx_queues; + adapter->netdev->vmq = vmq; + } +} +#endif /* CONFIG_XEN_NETDEV2_BACKEND */ + #ifdef CONFIG_NET_POLL_CONTROLLER /* * Polling 'interrupt' - used by things like netconsole to send skbs @@ -5152,12 +5443,18 @@ static int __devinit ixgbe_probe(struct #endif strcpy(netdev->name, "eth%d"); +#ifdef CONFIG_XEN_NETDEV2_BACKEND + if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) + ixgbe_setup_vmq(adapter); +#endif err = register_netdev(netdev); if (err) goto err_register; +#ifndef CONFIG_XEN_NETDEV2_BACKEND if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) ixgbe_sysfs_create(adapter); +#endif #if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) if (adapter->flags & IXGBE_FLAG_DCA_CAPABLE) { @@ -5267,8 +5564,17 @@ static void __devexit ixgbe_remove(struc } #endif +#ifdef CONFIG_XEN_NETDEV2_BACKEND + if (netdev->vmq) { + free_vmq(netdev->vmq); + netdev->vmq = 0; + } +#endif + +#ifndef CONFIG_XEN_NETDEV2_BACKEND if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) ixgbe_sysfs_remove(adapter); +#endif if (netdev->reg_state == NETREG_REGISTERED) unregister_netdev(netdev); diff -urpN -X dontdiff a/drivers/net/ixgbe/ixgbe_param.c b/drivers/net/ixgbe/ixgbe_param.c --- a/drivers/net/ixgbe/ixgbe_param.c 2009-01-23 11:27:18.000000000 -0800 +++ b/drivers/net/ixgbe/ixgbe_param.c 2009-01-23 11:27:40.000000000 -0800 @@ -723,6 +723,13 @@ void __devinit ixgbe_check_options(struc adapter->flags |= IXGBE_FLAG_RX_PS_CAPABLE; } #endif +#ifdef CONFIG_XEN_NETDEV2_BACKEND + if (adapter->flags & + (IXGBE_FLAG_RX_PS_CAPABLE | IXGBE_FLAG_VMDQ_ENABLED)) { + printk(KERN_INFO "ixgbe: packet split disabled for Xen VMDQ\n"); + adapter->flags &= ~IXGBE_FLAG_RX_PS_CAPABLE; + } +#endif } } _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |