Initial support for restartable network driver domains.
-h -- Print extended help message, including all arguments
-n -- Dry run only, don't actually create domain
-q -- Quiet - write output only to the system log
+ -s -- Don't start the domain, just build it.
""" % (sys.argv[0], xc_config_file)
def extra_usage ():
vbd_list = []; cmdline_ip = ''; cmdline_root=''; cmdline_extra=''
pci_device_list = []; console_port = -1
auto_console = False
+dontstart = False
##### Determine location of defaults file
#####
try:
- opts, args = getopt.getopt(sys.argv[1:], "h?nqcf:D:k:r:b:m:N:a:e:d:i:I:R:E:L:" )
+ opts, args = getopt.getopt(sys.argv[1:], "h?nqcsf:D:k:r:b:m:N:a:e:d:i:I:R:E:L:" )
for opt in opts:
if opt[0] == '-f': config_file= opt[1]
exec "%s='%s'" % (l,r)
if opt[0] == '-q': quiet = True
if opt[0] == '-L': restore = True; state_file = opt[1]
+ if opt[0] == '-s': dontstart = True
except getopt.GetoptError:
sys.exit()
else:
- ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=cons_response["remote_port"] )' % builder_fn )
+ ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=cons_response["remote_port"], flags=flags )' % builder_fn )
if ret < 0:
print "Error building Linux guest OS: "
print "Return code = " + str(ret)
sys.exit()
if new_io_world:
- cmsg = 'new_network_interface(dom='+str(id)+')'
- xend_response = xenctl.utils.xend_control_message(cmsg)
- if not xend_response['success']:
- print "Error creating network interface"
- print "Error type: " + xend_response['error_type']
- if xend_response['error_type'] == 'exception':
- print "Exception type: " + xend_response['exception_type']
- print "Exception val: " + xend_response['exception_value']
- xc.domain_destroy ( dom=id )
- sys.exit()
+ if not (flags & 8): # If it's not the net backend, give it a frontend.
+ cmsg = 'new_network_interface(dom='+str(id)+')'
+ xend_response = xenctl.utils.xend_control_message(cmsg)
+ if not xend_response['success']:
+ print "Error creating network interface"
+ print "Error type: " + xend_response['error_type']
+ if xend_response['error_type'] == 'exception':
+ print "Exception type: " + xend_response['exception_type']
+ print "Exception val: " + xend_response['exception_value']
+ xc.domain_destroy ( dom=id )
+ sys.exit()
+ else: # It's a new net backend - notify Xend.
+ cmsg = 'set_network_backend(dom='+str(id)+')'
+ xend_response = xenctl.utils.xend_control_message(cmsg)
+ if not xend_response['success']:
+ print "Error registering network backend"
+ print "Error type: " + xend_response['error_type']
+ if xend_response['error_type'] == 'exception':
+ print "Exception type: " + xend_response['exception_type']
+ print "Exception val: " + xend_response['exception_value']
+ xc.domain_destroy ( dom=id )
+ sys.exit()
else:
# setup virtual firewall rules for all aliases
for ip in vfr_ipaddr:
os.system('/usr/sbin/arping -A -b -I eth0 -c 1 -s %s %s' % (ip,gw))
if not nlb: print >>open('/proc/sys/net/ipv4/ip_nonlocal_bind','w'), '0'
- if xc.domain_start( dom=id ) < 0:
- print "Error starting domain"
- xc.domain_destroy ( dom=id )
- sys.exit()
+ if not dontstart:
+ if xc.domain_start( dom=id ) < 0:
+ print "Error starting domain"
+ xc.domain_destroy ( dom=id )
+ sys.exit()
return (id, cons_response['console_port'])
# end of make_domain()
const char *image_name,
const char *ramdisk_name,
const char *cmdline,
- unsigned int control_evtchn);
+ unsigned int control_evtchn,
+ unsigned long flags);
int xc_netbsd_build(int xc_handle,
u32 domid,
full_execution_context_t *ctxt,
const char *cmdline,
unsigned long shared_info_frame,
- unsigned int control_evtchn)
+ unsigned int control_evtchn,
+ unsigned long flags)
{
l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
memset(start_info, 0, sizeof(*start_info));
start_info->nr_pages = nr_pages;
start_info->shared_info = shared_info_frame << PAGE_SHIFT;
- start_info->flags = 0;
+ start_info->flags = flags;
start_info->pt_base = vpt_start;
start_info->nr_pt_frames = nr_pt_pages;
start_info->mfn_list = vphysmap_start;
const char *image_name,
const char *ramdisk_name,
const char *cmdline,
- unsigned int control_evtchn)
+ unsigned int control_evtchn,
+ unsigned long flags)
{
dom0_op_t launch_op, op;
int initrd_fd = -1;
&vstartinfo_start, &vkern_entry,
ctxt, cmdline,
op.u.getdomaininfo.shared_info_frame,
- control_evtchn) < 0 )
+ control_evtchn, flags) < 0 )
{
ERROR("Error constructing guest OS");
goto error_out;
u32 dom;
char *image, *ramdisk = NULL, *cmdline = "";
- int control_evtchn;
+ int control_evtchn, flags = 0;
static char *kwd_list[] = { "dom", "control_evtchn",
- "image", "ramdisk", "cmdline", NULL };
+ "image", "ramdisk", "cmdline", "flags",
+ NULL };
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|ss", kwd_list,
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|ssi", kwd_list,
&dom, &control_evtchn,
- &image, &ramdisk, &cmdline) )
+ &image, &ramdisk, &cmdline, &flags) )
return NULL;
- if ( xc_linux_build(xc->xc_handle, dom, image,
- ramdisk, cmdline, control_evtchn) != 0 )
+ if ( xc_linux_build(xc->xc_handle, dom, image,
+ ramdisk, cmdline, control_evtchn, flags) != 0 )
return PyErr_SetFromErrno(xc_error);
Py_INCREF(zero);
# Lists of all interfaces, indexed by local event-channel port.
port_list = {}
-
+
xc = Xc.new()
# Ignore writes to disconnected sockets. We clean up differently.
xend.blkif.backend_rx_req(port, msg)
elif type == CMSG_NETIF_FE and net_if:
net_if.ctrlif_rx_req(port, msg)
- elif type == CMSG_NETIF_BE and port == dom0_port:
+ elif type == CMSG_NETIF_BE and port == xend.netif.be_port:
xend.netif.backend_rx_req(port, msg)
else:
port.write_response(msg)
type = (msg.get_header())['type']
if type == CMSG_BLKIF_BE and port == dom0_port:
xend.blkif.backend_rx_rsp(port, msg)
- elif type == CMSG_NETIF_BE and port == dom0_port:
+ elif type == CMSG_NETIF_BE and port == xend.netif.be_port:
xend.netif.backend_rx_rsp(port, msg)
# Send console data.
work_done = True
# Back-end network-device work.
- if port == dom0_port and xend.netif.backend_do_work(port):
+ if port == xend.netif.be_port and xend.netif.backend_do_work(port):
work_done = True
# Finally, notify the remote end of any work that we did.
# Response is deferred until back-end driver sends acknowledgement.
return None
+
+##
+## set_network_backend
+## Authorise a domain to act as the net backend (assumes we only have one
+## backend driver for now). After this call, back end "up" notifications
+## for the network will only be accepted from this domain.
+##
+def set_network_backend(dom):
+ if xend.netif.be_port: xend.netif.recovery = True
+ xend.netif.be_port = xend.main.port_from_dom(dom)
+ return { 'success' : True }
CMSG_NETIF_FE = 4
CMSG_NETIF_FE_INTERFACE_STATUS_CHANGED = 0
CMSG_NETIF_FE_DRIVER_STATUS_CHANGED = 32
+CMSG_NETIF_BE_DRIVER_STATUS_CHANGED = 32
CMSG_NETIF_FE_INTERFACE_CONNECT = 33
CMSG_NETIF_FE_INTERFACE_DISCONNECT = 34
CMSG_NETIF_BE_CREATE = 0
CMSG_NETIF_BE_CONNECT = 2
CMSG_NETIF_BE_DISCONNECT = 3
+NETIF_DRIVER_STATUS_DOWN = 0
+NETIF_DRIVER_STATUS_UP = 1
+
pendmsg = None
pendaddr = None
+recovery = False # Is a recovery in progress? (if so, we'll need to notify guests)
+be_port = None # Port object for backend domain
+
def backend_tx_req(msg):
- port = xend.main.dom0_port
- if port.space_to_write_request():
- port.write_request(msg)
- port.notify()
+ if not xend.netif.be_port:
+ print "BUG: attempt to transmit request to non-existant netif driver"
+ if xend.netif.be_port.space_to_write_request():
+ xend.netif.be_port.write_request(msg)
+ xend.netif.be_port.notify()
else:
xend.netif.pendmsg = msg
def backend_rx_req(port, msg):
port.write_response(msg)
+ subtype = (msg.get_header())['subtype']
+ print "Received netif-be request, subtype %d" % subtype
+ if subtype == CMSG_NETIF_BE_DRIVER_STATUS_CHANGED:
+ (status, dummy) = struct.unpack("II", msg.get_payload())
+ if status == NETIF_DRIVER_STATUS_UP:
+ if xend.netif.recovery:
+ print "New netif backend now UP, notifying guests:"
+ for netif_key in interface.list.keys():
+ netif = interface.list[netif_key]
+ netif.create()
+ print " Notifying %d" % netif.dom
+ msg = xend.utils.message(CMSG_NETIF_FE, \
+ CMSG_NETIF_FE_INTERFACE_STATUS_CHANGED, 0)
+ msg.append_payload(struct.pack("IIIBBBBBBBB", \
+ 0,1,0,0,0,0,0,0,0,0,0))
+ netif.ctrlif_tx_req(xend.main.port_from_dom(netif.dom), msg)
+ print "Done notifying guests"
+ recovery = False
+ else: # No recovery in progress.
+ if xend.netif.be_port: # This should never be true! (remove later)
+ print "BUG: unexpected netif backend UP message from %d" \
+ % port.remote_dom
+ else:
+ print "Unexpected net backend driver status: %d" % status
def backend_rx_rsp(port, msg):
subtype = (msg.get_header())['subtype']
netif = interface.list[xend.main.port_from_dom(dom).local_port]
msg = xend.utils.message(CMSG_NETIF_FE, \
CMSG_NETIF_FE_INTERFACE_STATUS_CHANGED, 0)
- msg.append_payload(struct.pack("IIIBBBBBBBB",0,2, \
- netif.evtchn['port2'], \
+ msg.append_payload(struct.pack("IIIBBBBBBBB",0,2, \
+ netif.evtchn['port2'], \
netif.mac[0],netif.mac[1], \
netif.mac[2],netif.mac[3], \
netif.mac[4],netif.mac[5], \
# Dictionary of all network-device interfaces.
list = {}
+ drvdom = None
# NB. 'key' is an opaque value that has no meaning in this class.
def __init__(self, dom, key):
self.mac.append(int(random.random()*256))
interface.list[key] = self
+ self.create()
+
+ def create(self):
+ """Notify the current network back end to create the virtual interface
+ represented by this object."""
msg = xend.utils.message(CMSG_NETIF_BE, CMSG_NETIF_BE_CREATE, 0)
- msg.append_payload(struct.pack("IIBBBBBBBBI",dom,0, \
- self.mac[0],self.mac[1], \
- self.mac[2],self.mac[3], \
- self.mac[4],self.mac[5], \
+ msg.append_payload(struct.pack("IIBBBBBBBBI",self.dom,0, \
+ self.mac[0],self.mac[1], \
+ self.mac[2],self.mac[3], \
+ self.mac[4],self.mac[5], \
0,0,0))
xend.netif.pendaddr = xend.main.mgmt_req_addr
backend_tx_req(msg)
port.write_response(msg)
subtype = (msg.get_header())['subtype']
if subtype == CMSG_NETIF_FE_DRIVER_STATUS_CHANGED:
+ print "netif driver up message from %d" % port.remote_dom
msg = xend.utils.message(CMSG_NETIF_FE, \
CMSG_NETIF_FE_INTERFACE_STATUS_CHANGED, 0)
msg.append_payload(struct.pack("IIIBBBBBBBB",0,1,0,self.mac[0], \
self.mac[5],0,0))
self.ctrlif_tx_req(port, msg)
elif subtype == CMSG_NETIF_FE_INTERFACE_CONNECT:
+ print "netif connect request from %d" % port.remote_dom
(hnd,tx_frame,rx_frame) = struct.unpack("ILL", msg.get_payload())
xc = Xc.new()
- self.evtchn = xc.evtchn_bind_interdomain(dom1=0,dom2=self.dom)
+ self.evtchn = xc.evtchn_bind_interdomain( \
+ dom1=xend.netif.be_port.remote_dom, \
+ dom2=self.dom)
msg = xend.utils.message(CMSG_NETIF_BE, \
CMSG_NETIF_BE_CONNECT, 0)
msg.append_payload(struct.pack("IIILLI",self.dom,0, \
/* Make the domain privileged. */
set_bit(PF_PHYSDEV, &p->flags);
+ /* FIXME: MAW for now make the domain REALLY privileged so that it
+ * can run a backend driver (hw access should work OK otherwise) */
+ set_bit(PF_PRIVILEGED, &p->flags);
/* Grant write access to the specified device. */
if ( (pdev = pci_find_slot(bus, PCI_DEVFN(dev, func))) == NULL )
/* These flags are passed in the 'flags' field of start_info_t. */
#define SIF_PRIVILEGED 1 /* Is the domain privileged? */
-#define SIF_INITDOMAIN 2 /* Is thsi the initial control domain? */
+#define SIF_INITDOMAIN 2 /* Is this the initial control domain? */
+#define SIF_BLK_BE_DOMAIN 4 /* Is this a block backend domain? */
+#define SIF_NET_BE_DOMAIN 8 /* Is this a net backend domain? */
/* For use in guest OSes. */
extern shared_info_t *HYPERVISOR_shared_info;
{
int i;
- if ( !(start_info.flags & SIF_INITDOMAIN) )
+ if ( !(start_info.flags & SIF_NET_BE_DOMAIN) &&
+ !(start_info.flags & SIF_INIT_DOMAIN) )
return 0;
+ printk("Initialising Xen virtual ethernet backend driver\n");
+
skb_queue_head_init(&rx_queue);
skb_queue_head_init(&tx_queue);
#include <asm/evtchn.h>
#include <asm/ctrl_if.h>
+#include <asm/page.h>
+
#include "../netif.h"
#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */
netif_rx_interface_t *rx;
spinlock_t tx_lock;
+ spinlock_t rx_lock;
unsigned int handle;
unsigned int evtchn;
(_list)[0] = (_list)[_id]; \
(unsigned short)_id; })
-
static struct net_device *find_dev_by_handle(unsigned int handle)
{
struct list_head *ent;
np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
memset(&np->stats, 0, sizeof(np->stats));
spin_lock_init(&np->tx_lock);
+ spin_lock_init(&np->rx_lock);
/* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
for ( i = 0; i <= NETIF_TX_RING_SIZE; i++ )
panic("alloc_skb needs to provide us page-aligned buffers.");
id = GET_ID_FROM_FREELIST(np->rx_skbs);
- np->rx_skbs[id] = skb;
+ np->rx_skbs[id] = skb;
+
np->rx->ring[MASK_NET_RX_IDX(i)].req.id = id;
rx_pfn_array[nr_pfns] = virt_to_machine(skb->head) >> PAGE_SHIFT;
spin_lock_irq(&np->tx_lock);
+ /* if the backend isn't available then don't do anything! */
+ if ( !netif_carrier_ok(dev) )
+ {
+ spin_unlock_irq(&np->tx_lock);
+ return 1;
+ }
+
i = np->tx->req_prod;
id = GET_ID_FROM_FREELIST(np->tx_skbs);
unsigned long flags;
spin_lock_irqsave(&np->tx_lock, flags);
+
+ if( !netif_carrier_ok(dev) )
+ {
+ spin_unlock_irqrestore(&np->tx_lock, flags);
+ return;
+ }
+
network_tx_buf_gc(dev);
spin_unlock_irqrestore(&np->tx_lock, flags);
struct sk_buff_head rxq;
unsigned long flags;
+ spin_lock(&np->rx_lock);
+
+ /* if the device is undergoing recovery then don't do anything */
+ if ( !netif_carrier_ok(dev) )
+ {
+ spin_unlock(&np->rx_lock);
+ return 0;
+ }
+
skb_queue_head_init(&rxq);
if ( (budget = *pbudget) > dev->quota )
local_irq_restore(flags);
}
+ spin_unlock(&np->rx_lock);
+
return more_to_do;
}
netif_fe_interface_connect_t up;
struct net_device *dev;
struct net_private *np;
-
+ int i;
+
+ unsigned long tsc;
+
if ( status->handle != 0 )
{
printk(KERN_WARNING "Status change on unsupported netif %d\n",
{
printk(KERN_WARNING "Unexpected netif-DISCONNECTED message"
" in state %d\n", np->state);
- break;
+ printk(KERN_INFO "Attempting to reconnect network interface\n");
+
+ /* Begin interface recovery.
+ * TODO: Change the Xend<->Guest protocol so that a recovery
+ * is initiated by a special "RESET" message - disconnect could
+ * just mean we're not allowed to use this interface any more.
+ */
+
+ /* Stop old i/f to prevent errors whilst we rebuild the state. */
+ spin_lock_irq(&np->tx_lock);
+ spin_lock_irq(&np->rx_lock);
+ netif_stop_queue(dev);
+ netif_carrier_off(dev);
+ np->state = NETIF_STATE_DISCONNECTED;
+ spin_unlock_irq(&np->rx_lock);
+ spin_unlock_irq(&np->tx_lock);
+
+ /* Free resources. */
+ free_irq(np->irq, dev);
+ unbind_evtchn_from_irq(np->evtchn);
+
+ free_page((unsigned long)np->tx);
+ free_page((unsigned long)np->rx);
}
/* Move from CLOSED to DISCONNECTED state. */
memcpy(dev->dev_addr, status->mac, ETH_ALEN);
+ if(netif_carrier_ok(dev))
+ np->state = NETIF_STATE_CONNECTED;
+ else
+ {
+ int i, requeue_idx;
+ netif_tx_request_t *tx;
+
+ spin_lock_irq(&np->rx_lock);
+ spin_lock(&np->tx_lock);
+
+ /* Recovery procedure: */
+
+ /* Step 1: Reinitialise variables. */
+ np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
+ np->rx->event = 1;
+
+ /* Step 1: Rebuild the RX and TX ring contents.
+ * NB. We could just throw away the queued TX packets but we hope
+ * that sending them out might do some good. We have to rebuild
+ * the RX ring because some of our pages are currently flipped out
+ * so we can't just free the RX skbs.
+ * NB2. Freelist index entries are always going to be less than
+ * __PAGE_OFFSET, whereas pointers to skbs will always be equal or
+ * greater than __PAGE_OFFSET, so we use this to distinguish them.
+ */
+
+ /* Rebuild the TX buffer freelist and the TX ring itself.
+ * NB. This reorders packets :-( We could keep more private state
+ * to avoid this but maybe it doesn't matter so much given the
+ * interface has been down.
+ */
+ for ( requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++ )
+ {
+ if ( np->tx_skbs[i] >= __PAGE_OFFSET )
+ {
+ struct sk_buff *skb = np->tx_skbs[i];
+
+ tx = &np->tx->ring[MASK_NET_TX_IDX(requeue_idx++)].req;
+
+ tx->id = i;
+ tx->addr = virt_to_machine(skb->data);
+ tx->size = skb->len;
+
+ np->stats.tx_bytes += skb->len;
+ np->stats.tx_packets++;
+ }
+ }
+ wmb();
+ np->tx->req_prod = requeue_idx;
+
+ /* Rebuild the RX buffer freelist and the RX ring itself. */
+ for ( requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++ )
+ if ( np->rx_skbs[i] >= __PAGE_OFFSET )
+ np->rx->ring[requeue_idx++].req.id = i;
+ wmb();
+ np->rx->req_prod = requeue_idx;
+
+ /* Step 4: All public and private state should now be sane. Start
+ * sending and receiving packets again and give the driver domain a
+ * kick because we've probably just queued some packets. */
+
+ netif_carrier_on(dev);
+ netif_start_queue(dev);
+ np->state = NETIF_STATE_ACTIVE;
+
+ notify_via_evtchn(status->evtchn);
+
+ printk(KERN_INFO "Recovery completed\n");
+
+ spin_unlock(&np->tx_lock);
+ spin_unlock_irq(&np->rx_lock);
+ }
+
np->evtchn = status->evtchn;
np->irq = bind_evtchn_to_irq(np->evtchn);
(void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM,
- dev->name, dev);
-
- np->state = NETIF_STATE_CONNECTED;
+ dev->name, dev);
break;
default:
struct net_device *dev;
struct net_private *np;
- if ( start_info.flags & SIF_INITDOMAIN )
+ if ( start_info.flags & SIF_INITDOMAIN
+ || start_info.flags & SIF_NET_BE_DOMAIN )
return 0;
+ printk("Initialising Xen virtual ethernet frontend driver");
+
INIT_LIST_HEAD(&dev_list);
if ( (dev = alloc_etherdev(sizeof(struct net_private))) == NULL )
cmsg.length = sizeof(netif_fe_driver_status_changed_t);
st.status = NETIF_DRIVER_STATUS_UP;
memcpy(cmsg.msg, &st, sizeof(st));
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+
/*
* We should read 'nr_interfaces' from response message and wait
* for notifications before proceeding. For now we assume that we