[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH][RFC] net/bridge: Add basic VEPA support to Xen Dom0
This patch adds basic Virtual Ethernet Port Aggregator (VEPA) capabilities to the Xen Dom0 kernel Ethernet bridging code. A Virtual Ethernet Port Aggregator (VEPA) is a capability within a physical end station that collaborates with an adjacent, external bridge to provide distributed bridging support between multiple virtual end stations and external networks. The VEPA collaborates by forwarding all station-originated frames to the adjacent bridge for frame processing and frame relay (including so-called 'hairpin' forwarding) and by steering and replicating frames received from the VEPA uplink to the appropriate destinations. A VEPA may be implemented in software or in conjunction with embedded hardware. In particular, the patch extends the Xen Dom0 Ethernet bridge to act as (1) a VEPA - for this we have added VEPA forwarding functionality and added a configuration option for a VEPA uplink port, or as (2) a bridge supporting 'hairpin' forwarding - for this we have added a bridge port 'hairpin' mode which allows sending frames back out through the port the frame was received on. Configuration of VEPA capabilities through Linux userspace bridge utilities is provided by an additional patch 'bridge-utils: add basic VEPA support'. Integration of VEPA capabilities and Xen tools is provided by a separate patch 'tools: Add basic VEPA support' which extends Xen's userspace network scripts. You can find additional information on VEPA here: http://tech.groups.yahoo.com/group/evb/ http://www.ieee802.org/1/files/public/docs2009/new-hudson-vepa_seminar-20090514d.pdf Signed-off-by: Paul Congdon <paul.congdon@xxxxxx> Signed-off-by: Anna Fischer <anna.fischer@xxxxxx> --- A similar patch has also been submitted to the Linux/KVM developer community for inclusion of the VEPA capabilities into the mainline Linux kernel. --- diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -343,6 +343,15 @@ fdb = fdb_find(head, addr); if (likely(fdb)) { + /* + * If we are a VEPA and the source port is the uplink, + * this could be a reflected packet, so don't learn any + * addresses that already are in the fdb but on other ports + */ + if ((br->flags & BR_VEPA_MODE) && br->uplink == source && + fdb->dst != br->uplink) + return; + /* attempt to update an entry for a local interface */ if (unlikely(fdb->is_local)) { if (net_ratelimit()) @@ -364,3 +373,16 @@ spin_unlock(&br->hash_lock); } } + +struct net_bridge_port *br_vepa_find_src(struct net_bridge *br, + const unsigned char *addr) +{ + struct hlist_head *head = &br->hash[br_mac_hash(addr)]; + struct net_bridge_fdb_entry *fdb; + + fdb = fdb_find(head, addr); + if (fdb) + return fdb->dst; + else + return NULL; +} diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -24,7 +24,8 @@ static inline int should_deliver(const struct net_bridge_port *p, const struct sk_buff *skb) { - return (skb->dev != p->dev && p->state == BR_STATE_FORWARDING); + return (((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && + p->state == BR_STATE_FORWARDING); } static inline unsigned packet_length(const struct sk_buff *skb) @@ -92,6 +93,17 @@ } /* called with rcu_read_lock */ +void br_vepa_deliver(const struct net_bridge_port *to, struct sk_buff *skb) +{ + if (to != NULL) { + __br_forward(to, skb); + return; + } + + kfree_skb(skb); +} + +/* called with rcu_read_lock */ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb) { if (should_deliver(to, skb)) { @@ -109,6 +121,7 @@ { struct net_bridge_port *p; struct net_bridge_port *prev; + struct net_bridge_port *sp = NULL; if (clone) { struct sk_buff *skb2; @@ -121,6 +134,13 @@ skb = skb2; } + /* + * If we are a VEPA, then we do not want to send the frame + * to the port it came from originally. + */ + if (br->flags & BR_VEPA_MODE) + sp = br_vepa_find_src(br, eth_hdr(skb)->h_source); + prev = NULL; list_for_each_entry_rcu(p, &br->port_list, list) { diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -168,6 +168,8 @@ list_del_rcu(&p->list); rcu_assign_pointer(dev->br_port, NULL); + if (br->uplink == p) + br->uplink = NULL; kobject_uevent(&p->kobj, KOBJ_REMOVE); kobject_del(&p->kobj); @@ -225,6 +227,7 @@ br->topology_change_detected = 0; br->ageing_time = 300 * HZ; INIT_LIST_HEAD(&br->age_list); + br->uplink = NULL; br_stp_timer_init(br); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -52,6 +52,15 @@ br = p->br; br_fdb_update(br, p, eth_hdr(skb)->h_source); + /* + * If we are a VEPA, and the receiving port is not the uplink we + * simply want to send this frame to the uplink (after learning) + */ + if ((br->flags & BR_VEPA_MODE) && p != br->uplink) { + br_vepa_deliver(br->uplink, skb); + goto out; + } + if (p->state == BR_STATE_LEARNING) goto drop; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -84,6 +84,9 @@ struct kobject kobj; struct work_struct carrier_check; struct rcu_head rcu; + + unsigned long flags; +#define BR_HAIRPIN_MODE 0x00000001 }; struct net_bridge @@ -96,6 +99,8 @@ struct hlist_head hash[BR_HASH_SIZE]; struct list_head age_list; unsigned long feature_mask; + unsigned long flags; +#define BR_VEPA_MODE 0x00000010 /* STP */ bridge_id designated_root; @@ -120,6 +125,9 @@ struct timer_list topology_change_timer; struct timer_list gc_timer; struct kobject ifobj; + + /* VEPA */ + struct net_bridge_port *uplink; }; extern struct notifier_block br_device_notifier; @@ -157,6 +165,9 @@ extern void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr); +extern struct net_bridge_port *br_vepa_find_src(struct net_bridge *br, + const unsigned char *addr); + /* br_forward.c */ extern void br_deliver(const struct net_bridge_port *to, @@ -171,6 +182,8 @@ extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, int clone); +extern void br_vepa_deliver(const struct net_bridge_port *to, + struct sk_buff *skb); /* br_if.c */ extern int br_add_bridge(const char *name); diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -290,6 +290,69 @@ show_group_addr, store_group_addr); +static ssize_t show_vepa_mode(struct class_device *d, char *buf) +{ + struct net_bridge *br = to_bridge(d); + int vepa_mode = (br->flags & BR_VEPA_MODE) ? 1 : 0; + return sprintf(buf, "%d\n", vepa_mode); +} + +static ssize_t store_vepa_mode(struct class_device *d, const char *buf, + size_t len) +{ + struct net_bridge *br = to_bridge(d); + int vepa_mode = 0; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (sscanf(buf, "%d", &vepa_mode) != 1) + return -EINVAL; + + rtnl_lock(); + if (vepa_mode) + br->flags |= BR_VEPA_MODE; + else + br->flags &= ~BR_VEPA_MODE; + rtnl_unlock(); + + return len; +} +static CLASS_DEVICE_ATTR(vepa_mode, S_IRUGO | S_IWUSR, show_vepa_mode, + store_vepa_mode); + +static ssize_t show_uplink_port(struct class_device *d, char *buf) +{ + struct net_bridge *br = to_bridge(d); + if (br->uplink && br->uplink->dev) + return sprintf(buf, "%s\n", br->uplink->dev->name); + else + return sprintf(buf, "\n"); +} + +static ssize_t store_uplink_port(struct class_device *d, const char *buf, + size_t len) +{ + struct net_bridge *br = to_bridge(d); + struct net_device *dev; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + dev = dev_get_by_name(buf); + if (!dev || !dev->br_port || (dev->br_port->br != br)) { + br->uplink = NULL; + return -EINVAL; + } + + rtnl_lock(); + br->uplink = dev->br_port; + rtnl_unlock(); + + return len; +} +static CLASS_DEVICE_ATTR(uplink_port, S_IRUGO | S_IWUSR, show_uplink_port, + store_uplink_port); + static struct attribute *bridge_attrs[] = { &class_device_attr_forward_delay.attr, &class_device_attr_hello_time.attr, @@ -308,6 +371,8 @@ &class_device_attr_topology_change_timer.attr, &class_device_attr_gc_timer.attr, &class_device_attr_group_addr.attr, + &class_device_attr_vepa_mode.attr, + &class_device_attr_uplink_port.attr, NULL }; diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -137,6 +137,22 @@ } static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL); +static ssize_t show_hairpin_mode(struct net_bridge_port *p, char *buf) +{ + int hairpin_mode = (p->flags & BR_HAIRPIN_MODE) ? 1 : 0; + return sprintf(buf, "%d\n", hairpin_mode); +} +static ssize_t store_hairpin_mode(struct net_bridge_port *p, unsigned long v) +{ + if (v) + p->flags |= BR_HAIRPIN_MODE; + else + p->flags &= ~BR_HAIRPIN_MODE; + return 0; +} +static BRPORT_ATTR(hairpin_mode, S_IRUGO | S_IWUSR, + show_hairpin_mode, store_hairpin_mode); + static struct brport_attribute *brport_attrs[] = { &brport_attr_path_cost, &brport_attr_priority, @@ -152,6 +168,7 @@ &brport_attr_message_age_timer, &brport_attr_forward_delay_timer, &brport_attr_hold_timer, + &brport_attr_hairpin_mode, NULL }; _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |