[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH net-next v1 6/8] xen-netback: add an implementation of toeplitz hashing...
...for receive-side packets. My recent patch to include/xen/interface/io/netif.h defines a set of control messages that can be used by a VM frontend driver to configure toeplitz hashing of receive-side packets and consequent steering of those packets to particular queues. This patch introduces an implementation of toeplitz hashing and into xen-netback and allows it to be configured using the new control messages. Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx> Cc: Ian Campbell <ian.campbell@xxxxxxxxxx> Cc: Wei Liu <wei.liu2@xxxxxxxxxx> --- drivers/net/xen-netback/common.h | 13 ++++ drivers/net/xen-netback/interface.c | 149 ++++++++++++++++++++++++++++++++++++ drivers/net/xen-netback/netback.c | 128 ++++++++++++++++++++++++++++++- 3 files changed, 287 insertions(+), 3 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 093a12a..6687702 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -220,6 +220,12 @@ struct xenvif_mcast_addr { #define XEN_NETBK_MCAST_MAX 64 +#define XEN_NETBK_MAX_TOEPLITZ_KEY_SIZE 40 + +#define XEN_NETBK_MAX_TOEPLITZ_MAPPING_ORDER 7 +#define XEN_NETBK_MAX_TOEPLITZ_MAPPING_SIZE \ + BIT(XEN_NETBK_MAX_TOEPLITZ_MAPPING_ORDER) + struct xenvif { /* Unique identifier for this interface. */ domid_t domid; @@ -251,6 +257,13 @@ struct xenvif { unsigned int num_queues; /* active queues, resource allocated */ unsigned int stalled_queues; + struct { + u32 flags; + u8 key[XEN_NETBK_MAX_TOEPLITZ_KEY_SIZE]; + u32 mapping[XEN_NETBK_MAX_TOEPLITZ_MAPPING_SIZE]; + unsigned int order; + } toeplitz; + struct xenbus_watch credit_watch; struct xenbus_watch mcast_ctrl_watch; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 1850ebb..230afde 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -1,3 +1,4 @@ + /* * Network-device interface management. * @@ -151,6 +152,153 @@ void xenvif_wake_queue(struct xenvif_queue *queue) netif_tx_wake_queue(netdev_get_tx_queue(dev, id)); } +static u32 toeplitz_hash(const u8 *k, unsigned int klen, + const u8 *d, unsigned int dlen) +{ + unsigned int di, ki; + u64 prefix = 0; + u64 hash = 0; + + /* Pre-load prefix with the first 8 bytes of the key */ + for (ki = 0; ki < 8; ki++) { + prefix <<= 8; + prefix |= (ki < klen) ? k[ki] : 0; + } + + for (di = 0; di < dlen; di++) { + u8 byte = d[di]; + unsigned int bit; + + for (bit = 0x80; bit != 0; bit >>= 1) { + if (byte & bit) + hash ^= prefix; + prefix <<= 1; + } + + /* prefix has now been left-shifted by 8, so OR in + * the next byte. + */ + prefix |= (ki < klen) ? k[ki] : 0; + ki++; + } + + /* The valid part of the hash is in the upper 32 bits. */ + return hash >> 32; +} + +static void xenvif_set_toeplitz_hash(struct xenvif *vif, struct sk_buff *skb) +{ + struct flow_keys flow; + u32 hash = 0; + enum pkt_hash_types type = PKT_HASH_TYPE_NONE; + const u8 *key = vif->toeplitz.key; + u32 flags = vif->toeplitz.flags; + const unsigned int len = XEN_NETBK_MAX_TOEPLITZ_KEY_SIZE; + bool has_tcp_hdr; + + /* Quick rejection test: If the network protocol doesn't + * correspond to any enabled hash type then there's no point + * in parsing the packet header. + */ + switch (skb->protocol) { + case htons(ETH_P_IP): + if (flags & (XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV4_TCP | + XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV4)) + break; + + goto done; + + case htons(ETH_P_IPV6): + if (flags & (XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV6_TCP | + XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV6)) + break; + + goto done; + + default: + goto done; + } + + memset(&flow, 0, sizeof(flow)); + if (!skb_flow_dissect_flow_keys(skb, &flow, 0)) + goto done; + + has_tcp_hdr = (flow.basic.ip_proto == IPPROTO_TCP) && + !(flow.control.flags & FLOW_DIS_IS_FRAGMENT); + + switch (skb->protocol) { + case htons(ETH_P_IP): + if (has_tcp_hdr && + (flags & XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV4_TCP)) { + u8 data[12]; + + memcpy(&data[0], &flow.addrs.v4addrs.src, 4); + memcpy(&data[4], &flow.addrs.v4addrs.dst, 4); + memcpy(&data[8], &flow.ports.src, 2); + memcpy(&data[10], &flow.ports.dst, 2); + + hash = toeplitz_hash(key, len, + data, sizeof(data)); + type = PKT_HASH_TYPE_L4; + } else if (flags & XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV4) { + u8 data[8]; + + memcpy(&data[0], &flow.addrs.v4addrs.src, 4); + memcpy(&data[4], &flow.addrs.v4addrs.dst, 4); + + hash = toeplitz_hash(key, len, + data, sizeof(data)); + type = PKT_HASH_TYPE_L3; + } + + break; + + case htons(ETH_P_IPV6): + if (has_tcp_hdr && + (flags & XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV6_TCP)) { + u8 data[36]; + + memcpy(&data[0], &flow.addrs.v6addrs.src, 16); + memcpy(&data[16], &flow.addrs.v6addrs.dst, 16); + memcpy(&data[32], &flow.ports.src, 2); + memcpy(&data[34], &flow.ports.dst, 2); + + hash = toeplitz_hash(key, len, + data, sizeof(data)); + type = PKT_HASH_TYPE_L4; + } else if (flags & XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV6) { + u8 data[32]; + + memcpy(&data[0], &flow.addrs.v6addrs.src, 16); + memcpy(&data[16], &flow.addrs.v6addrs.dst, 16); + + hash = toeplitz_hash(key, len, + data, sizeof(data)); + type = PKT_HASH_TYPE_L3; + } + + break; + } + +done: + skb_set_hash(skb, hash, type); +} + +static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, + select_queue_fallback_t fallback) +{ + struct xenvif *vif = netdev_priv(dev); + unsigned int mask = (1u << vif->toeplitz.order) - 1; + + if (vif->toeplitz.flags == 0) + return fallback(dev, skb) % dev->real_num_tx_queues; + + xenvif_set_toeplitz_hash(vif, skb); + + return vif->toeplitz.mapping[skb_get_hash_raw(skb) & mask]; +} + static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); @@ -395,6 +543,7 @@ static const struct ethtool_ops xenvif_ethtool_ops = { }; static const struct net_device_ops xenvif_netdev_ops = { + .ndo_select_queue = xenvif_select_queue, .ndo_start_xmit = xenvif_start_xmit, .ndo_get_stats = xenvif_get_stats, .ndo_open = xenvif_open, diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index a1f1a38..41ec7e9 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -2163,6 +2163,89 @@ int xenvif_dealloc_kthread(void *data) return 0; } +static u32 xenvif_set_toeplitz_flags(struct xenvif *vif, u32 flags) +{ + if (flags & ~(XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV4 | + XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV4_TCP | + XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV6 | + XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV6_TCP)) + return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + + vif->toeplitz.flags = flags; + + return XEN_NETIF_CTRL_STATUS_SUCCESS; +} + +static u32 xenvif_set_toeplitz_key(struct xenvif *vif, u32 gref, u32 len) +{ + u8 *key = vif->toeplitz.key; + struct gnttab_copy copy_op = { + .source.u.ref = gref, + .source.domid = vif->domid, + .dest.u.gmfn = virt_to_gfn(key), + .dest.domid = DOMID_SELF, + .dest.offset = xen_offset_in_page(key), + .len = len, + .flags = GNTCOPY_source_gref + }; + + if (len > XEN_NETBK_MAX_TOEPLITZ_KEY_SIZE) + return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + + gnttab_batch_copy(©_op, 1); + + if (copy_op.status != GNTST_okay) + return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + + /* Clear any remaining key octets */ + if (len < XEN_NETBK_MAX_TOEPLITZ_KEY_SIZE) + memset(key + len, 0, XEN_NETBK_MAX_TOEPLITZ_KEY_SIZE - len); + + return XEN_NETIF_CTRL_STATUS_SUCCESS; +} + +static u32 xenvif_set_toeplitz_mapping_order(struct xenvif *vif, + u32 order) +{ + if (order > XEN_NETBK_MAX_TOEPLITZ_MAPPING_ORDER) + return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + + vif->toeplitz.order = order; + memset(vif->toeplitz.mapping, 0, sizeof(u32) << order); + + return XEN_NETIF_CTRL_STATUS_SUCCESS; +} + +static u32 xenvif_set_toeplitz_mapping(struct xenvif *vif, u32 gref, + u32 len, u32 off) +{ + u32 *mapping = &vif->toeplitz.mapping[off]; + struct gnttab_copy copy_op = { + .source.u.ref = gref, + .source.domid = vif->domid, + .dest.u.gmfn = virt_to_gfn(mapping), + .dest.domid = DOMID_SELF, + .dest.offset = xen_offset_in_page(mapping), + .len = len * sizeof(u32), + .flags = GNTCOPY_source_gref + }; + + if ((off + len > (1u << vif->toeplitz.order)) || + copy_op.len > XEN_PAGE_SIZE) + return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + + while (len-- != 0) + if (mapping[off++] >= vif->num_queues) + return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + + gnttab_batch_copy(©_op, 1); + + if (copy_op.status != GNTST_okay) + return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + + return XEN_NETIF_CTRL_STATUS_SUCCESS; +} + static void make_ctrl_response(struct xenvif *vif, const struct xen_netif_ctrl_request *req, u32 status, u32 data) @@ -2191,9 +2274,48 @@ static void push_ctrl_response(struct xenvif *vif) static void process_ctrl_request(struct xenvif *vif, const struct xen_netif_ctrl_request *req) { - /* There is no support for control requests yet. */ - make_ctrl_response(vif, req, - XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED, 0); + u32 status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED; + u32 data = 0; + + switch (req->type) { + case XEN_NETIF_CTRL_TYPE_GET_TOEPLITZ_FLAGS: + status = XEN_NETIF_CTRL_STATUS_SUCCESS; + data = XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV4 | + XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV4_TCP | + XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV6 | + XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV6_TCP; + break; + + case XEN_NETIF_CTRL_TYPE_SET_TOEPLITZ_FLAGS: + status = xenvif_set_toeplitz_flags(vif, req->data[0]); + break; + + case XEN_NETIF_CTRL_TYPE_SET_TOEPLITZ_KEY: + status = xenvif_set_toeplitz_key(vif, req->data[0], + req->data[1]); + break; + + case XEN_NETIF_CTRL_TYPE_GET_TOEPLITZ_MAPPING_ORDER: + status = XEN_NETIF_CTRL_STATUS_SUCCESS; + data = XEN_NETBK_MAX_TOEPLITZ_MAPPING_ORDER; + break; + + case XEN_NETIF_CTRL_TYPE_SET_TOEPLITZ_MAPPING_ORDER: + status = xenvif_set_toeplitz_mapping_order(vif, + req->data[0]); + break; + + case XEN_NETIF_CTRL_TYPE_SET_TOEPLITZ_MAPPING: + status = xenvif_set_toeplitz_mapping(vif, req->data[0], + req->data[1], + req->data[2]); + break; + + default: + break; + } + + make_ctrl_response(vif, req, status, data); push_ctrl_response(vif); } -- 2.1.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |