bitkeeper revision 1.879.2.1 (4092738fCfvp-pu-UzwhXsHdzHbYPw)
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Fri, 30 Apr 2004 15:41:03 +0000 (15:41 +0000)
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Fri, 30 Apr 2004 15:41:03 +0000 (15:41 +0000)
Completed first cut of new blkdev i/o world.

16 files changed:
.rootkeys
tools/examples/xc_dom_create.py
tools/xend/lib/blkif.py [new file with mode: 0644]
tools/xend/lib/console.py
tools/xend/lib/domain_controller.h
tools/xend/lib/main.py
tools/xend/lib/manager.py
xen/common/event_channel.c
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c
xenolinux-2.4.26-sparse/include/asm-xen/io.h

index f391d811f692683f104df0d253347ea574ffe946..f73ce770b4fba642961256c8be60d6dd7a2314be 100644 (file)
--- a/.rootkeys
+++ b/.rootkeys
 4055ee44Bu6oP7U0WxxXypbUt4dNPQ tools/xenctl/setup.py
 40431ac64Hj4ixUnKmlugZKhXPFE_Q tools/xend/Makefile
 4055ad95Se-FqttgxollqOAAHB94zA tools/xend/lib/__init__.py
+4092738fMRGC9fFBcPRCWaJaj9U3ag tools/xend/lib/blkif.py
 4055ad97wMLUj0BZT0e_T0EwQN0Bvw tools/xend/lib/console.py
 4048c0ddsF0WrU7HUzTvg1MJoCIfWg tools/xend/lib/domain_controller.h
 4054a301VEag2GwrBrFBna5U1BGlLA tools/xend/lib/main.py
index 799319c6a6c02e7885b6d2a4faff826a532006a3..bb9a0576d969e3089b6752f9631cb7d5105e74d0 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 import string, sys, os, time, socket, getopt, signal, syslog
-import Xc, xenctl.utils, xenctl.console_client
+import Xc, xenctl.utils, xenctl.console_client, re
 
 config_dir  = '/etc/xc/'
 config_file = xc_config_file = config_dir + 'defaults'
@@ -195,6 +195,15 @@ output('VM cmdline         : "%s"' % cmdline)
 if dryrun:
     sys.exit(1)
 
+##### HACK HACK HACK
+##### Until everyone moves to the new I/O world, and a more robust domain
+##### controller (xend), we use this little trick to discover whether we
+##### are in a testing environment for new I/O stuff.
+new_io_world = True
+for line in os.popen('cat /proc/interrupts').readlines():
+    if re.search('blkdev', line):
+        new_io_world = False
+
 ##### Code beyond this point is actually used to manage the mechanics of
 ##### starting (and watching if necessary) guest virtual machines.
 
@@ -228,14 +237,14 @@ def make_domain():
 
     cmsg = 'new_control_interface(dom='+str(id)+', console_port='+str(console_port)+')'
 
-    xend_response = xenctl.utils.xend_control_message(cmsg)
+    cons_response = xenctl.utils.xend_control_message(cmsg)
 
-    if not xend_response['success']:
+    if not cons_response['success']:
        print "Error creating initial event channel"
-       print "Error type: " + xend_response['error_type']
-       if xend_response['error_type'] == 'exception':
-           print "Exception type: " + xend_response['exception_type']
-           print "Exception value: " + xend_response['exception_value']
+       print "Error type: " + cons_response['error_type']
+       if cons_response['error_type'] == 'exception':
+           print "Exception type: " + cons_response['exception_type']
+           print "Exception value: " + cons_response['exception_value']
        xc.domain_destroy ( dom=id )
        sys.exit()
 
@@ -248,7 +257,7 @@ def make_domain():
             sys.exit()
     else:
 
-        ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=xend_response["remote_port"] )' % builder_fn)
+        ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=cons_response["remote_port"] )' % builder_fn)
         if ret < 0:
             print "Error building Linux guest OS: "
             print "Return code = " + str(ret)
@@ -259,6 +268,18 @@ def make_domain():
 
     # set the expertise level appropriately
     xenctl.utils.VBD_EXPERT_MODE = vbd_expert
+
+    if new_io_world:
+        cmsg = 'new_block_interface(dom='+str(id)+')'
+        xend_response = xenctl.utils.xend_control_message(cmsg)
+        if not xend_response['success']:
+            print "Error creating block interface"
+            print "Error type: " + xend_response['error_type']
+            if xend_response['error_type'] == 'exception':
+                print "Exception type: " + xend_response['exception_type']
+                print "Exception val:  " + xend_response['exception_value']
+            xc.domain_destroy ( dom=id )
+            sys.exit()
     
     for ( uname, virt_name, rw ) in vbd_list:
        virt_dev = xenctl.utils.blkdev_name_to_number( virt_name )
@@ -269,42 +290,70 @@ def make_domain():
            xc.domain_destroy ( dom=id )
            sys.exit()
 
-        # check that setting up this VBD won't violate the sharing
-        # allowed by the current VBD expertise level
-        if xenctl.utils.vd_extents_validate(segments, rw=='w' or rw=='rw') < 0:
-            xc.domain_destroy( dom = id )
-            sys.exit()
+        if new_io_world:
+            if len(segments) > 1:
+                print "New I/O world cannot deal with multi-extent vdisks"
+                xc.domain_destroy ( dom=id )
+                sys.exit()
+            seg = segments[0]
+            cmsg = 'new_block_device(dom=' + str(id) + \
+                   ',handle=0,vdev=' + str(virt_dev) + \
+                   ',pdev=' + str(seg['device']) + \
+                   ',start_sect=' + str(seg['start_sector']) + \
+                   ',nr_sect=' + str(seg['nr_sectors']) + \
+                   ',readonly=' + str(not re.match('w',rw)) + ')'
+            xend_response = xenctl.utils.xend_control_message(cmsg)
+            if not xend_response['success']:
+                print "Error creating virtual block device"
+                print "Error type: " + xend_response['error_type']
+                if xend_response['error_type'] == 'exception':
+                    print "Exception type: " + xend_response['exception_type']
+                    print "Exception val:  " + xend_response['exception_value']
+                xc.domain_destroy ( dom=id )
+                sys.exit()
+        else:
+            # check that setting up this VBD won't violate the sharing
+            # allowed by the current VBD expertise level
+            if xenctl.utils.vd_extents_validate(segments,
+                                                rw=='w' or rw=='rw') < 0:
+                xc.domain_destroy( dom = id )
+                sys.exit()
             
-       if xc.vbd_create( dom=id, vbd=virt_dev, writeable= rw=='w' or rw=='rw' ):
-           print "Error creating VBD vbd=%d writeable=%d\n" % (virt_dev,rw)
-           xc.domain_destroy ( dom=id )
-           sys.exit()
+            if xc.vbd_create( dom=id, vbd=virt_dev,
+                              writeable= rw=='w' or rw=='rw' ):
+                print "Error creating VBD %d (writeable=%d)\n" % (virt_dev,rw)
+                xc.domain_destroy ( dom=id )
+                sys.exit()
        
-        if xc.vbd_setextents( dom=id,
-                              vbd=virt_dev,
-                              extents=segments):
-            print "Error populating VBD vbd=%d\n" % virt_dev
-            xc.domain_destroy ( dom=id )
-            sys.exit()
-
-    # setup virtual firewall rules for all aliases
-    for ip in vfr_ipaddr:
-       xenctl.utils.setup_vfr_rules_for_vif( id, 0, ip )
-
-    # check for physical device access
-    for (pci_bus, pci_dev, pci_func) in pci_device_list:
-        if xc.physdev_pci_access_modify(
-            dom=id, bus=pci_bus, dev=pci_dev, func=pci_func, enable=1 ) < 0:
-            print "Non-fatal error enabling PCI device access."
-        else:
-            print "Enabled PCI access (%d:%d:%d)." % (pci_bus,pci_dev,pci_func)
+            if xc.vbd_setextents( dom=id,
+                                  vbd=virt_dev,
+                                  extents=segments):
+                print "Error populating VBD vbd=%d\n" % virt_dev
+                xc.domain_destroy ( dom=id )
+                sys.exit()
+
+    if not new_io_world:
+        # setup virtual firewall rules for all aliases
+        for ip in vfr_ipaddr:
+            xenctl.utils.setup_vfr_rules_for_vif( id, 0, ip )
+
+    if new_io_world:
+        # check for physical device access
+        for (pci_bus, pci_dev, pci_func) in pci_device_list:
+            if xc.physdev_pci_access_modify(
+                dom=id, bus=pci_bus, dev=pci_dev,
+                func=pci_func, enable=1 ) < 0:
+                print "Non-fatal error enabling PCI device access."
+            else:
+                print "Enabled PCI access (%d:%d:%d)." % \
+                      (pci_bus,pci_dev,pci_func)
 
     if xc.domain_start( dom=id ) < 0:
         print "Error starting domain"
         xc.domain_destroy ( dom=id )
         sys.exit()
 
-    return (id, xend_response['console_port'])
+    return (id, cons_response['console_port'])
 # end of make_domain()
 
 def mkpidfile():
diff --git a/tools/xend/lib/blkif.py b/tools/xend/lib/blkif.py
new file mode 100644 (file)
index 0000000..94e058f
--- /dev/null
@@ -0,0 +1,143 @@
+
+#################################################################
+## xend/blkif.py -- Block-interface management functions for Xend
+## Copyright (c) 2004, K A Fraser (University of Cambridge)
+#################################################################
+
+import errno, re, os, select, signal, socket, struct, sys
+import xend.main, xend.console, xend.manager, xend.utils, Xc
+
+CMSG_BLKIF_BE = 1
+CMSG_BLKIF_FE = 2
+CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED =  0
+CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED    = 32
+CMSG_BLKIF_FE_INTERFACE_CONNECT        = 33
+CMSG_BLKIF_FE_INTERFACE_DISCONNECT     = 34
+CMSG_BLKIF_BE_CREATE      = 0
+CMSG_BLKIF_BE_DESTROY     = 1
+CMSG_BLKIF_BE_CONNECT     = 2
+CMSG_BLKIF_BE_DISCONNECT  = 3
+CMSG_BLKIF_BE_VBD_CREATE  = 4
+CMSG_BLKIF_BE_VBD_DESTROY = 5
+CMSG_BLKIF_BE_VBD_GROW    = 6
+CMSG_BLKIF_BE_VBD_SHRINK  = 7
+
+pendmsg = None
+pendaddr = None
+
+def backend_tx_req(msg):
+    port = xend.main.dom0_port
+    if port.space_to_write_request():
+        port.write_request(msg)
+        port.notify()
+    else:
+        xend.blkif.pendmsg = msg
+
+def backend_rx_req(port, msg):
+    port.write_response(msg)
+
+def backend_rx_rsp(port, msg):
+    subtype = (msg.get_header())['subtype']
+    print "Received blkif-be response, subtype %d" % subtype
+    if subtype == CMSG_BLKIF_BE_CREATE:
+        rsp = { 'success': True }
+        xend.main.send_management_response(rsp, xend.blkif.pendaddr)
+    elif subtype == CMSG_BLKIF_BE_CONNECT:
+        (dom,hnd,evtchn,frame,st) = struct.unpack("QIILI", msg.get_payload())
+        blkif = interface.list[xend.main.port_from_dom(dom).local_port]
+        msg = xend.utils.message(CMSG_BLKIF_FE, \
+                                 CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED, 0)
+        msg.append_payload(struct.pack("III",0,2,blkif.evtchn['port2']))
+        blkif.ctrlif_tx_req(xend.main.port_list[blkif.key], msg)
+    elif subtype == CMSG_BLKIF_BE_VBD_CREATE:
+        (dom,hnd,vdev,ro,st) = struct.unpack("QIHII", msg.get_payload())
+        blkif = interface.list[xend.main.port_from_dom(dom).local_port]
+        (pdev, start_sect, nr_sect, readonly) = blkif.devices[vdev]
+        msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_GROW, 0)
+        msg.append_payload(struct.pack("QIHHHQQI",dom,0,vdev,0, \
+                                       pdev,start_sect,nr_sect,0))
+        backend_tx_req(msg)
+    elif subtype == CMSG_BLKIF_BE_VBD_GROW:
+        rsp = { 'success': True }
+        xend.main.send_management_response(rsp, xend.blkif.pendaddr)
+
+def backend_do_work(port):
+    global pendmsg
+    if pendmsg and port.space_to_write_request():
+        port.write_request(pendmsg)
+        pendmsg = None
+        return True
+    return False
+
+
+class interface:
+
+    # Dictionary of all block-device interfaces.
+    list = {}
+
+
+    # NB. 'key' is an opaque value that has no meaning in this class.
+    def __init__(self, dom, key):
+        self.dom     = dom
+        self.key     = key
+        self.devices = {}
+        self.pendmsg = None
+        interface.list[key] = self
+        msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_CREATE, 0)
+        msg.append_payload(struct.pack("QII",dom,0,0))
+        xend.blkif.pendaddr = xend.main.mgmt_req_addr
+        backend_tx_req(msg)
+
+    # Attach a device to the specified interface
+    def attach_device(self, vdev, pdev, start_sect, nr_sect, readonly):
+        if self.devices.has_key(vdev):
+            return False
+        self.devices[vdev] = (pdev, start_sect, nr_sect, readonly)
+        msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_CREATE, 0)
+        msg.append_payload(struct.pack("QIHII",self.dom,0,vdev,readonly,0))
+        xend.blkif.pendaddr = xend.main.mgmt_req_addr
+        backend_tx_req(msg)
+        return True
+
+
+    # Completely destroy this interface.
+    def destroy(self):
+        del interface.list[self.key]
+        msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_DESTROY, 0)
+        msg.append_payload(struct.pack("QII",self.dom,0,0))
+        backend_tx_req(msg)        
+
+
+    # The parameter @port is the control-interface event channel. This method
+    # returns True if messages were written to the control interface.
+    def ctrlif_transmit_work(self, port):
+        if self.pendmsg and port.space_to_write_request():
+            port.write_request(self.pendmsg)
+            self.pendmsg = None
+            return True
+        return False
+
+    def ctrlif_tx_req(self, port, msg):
+        if port.space_to_write_request():
+            port.write_request(msg)
+            port.notify()
+        else:
+            self.pendmsg = msg
+
+    def ctrlif_rx_req(self, port, msg):
+        port.write_response(msg)
+        subtype = (msg.get_header())['subtype']
+        if subtype == CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED:
+            msg = xend.utils.message(CMSG_BLKIF_FE, \
+                                     CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED, 0)
+            msg.append_payload(struct.pack("III",0,1,0))
+            self.ctrlif_tx_req(port, msg)
+        elif subtype == CMSG_BLKIF_FE_INTERFACE_CONNECT:
+            (hnd,frame) = struct.unpack("IL", msg.get_payload())
+            xc = Xc.new()
+            self.evtchn = xc.evtchn_bind_interdomain(dom1=0,dom2=self.dom)
+            msg = xend.utils.message(CMSG_BLKIF_BE, \
+                                     CMSG_BLKIF_BE_CONNECT, 0)
+            msg.append_payload(struct.pack("QIILI",self.dom,0, \
+                                           self.evtchn['port1'],frame,0))
+            backend_tx_req(msg)
index aad60699794937d9ce70453d8bbe3d795bdcfdb1..57898817f52ed00adca02996342ee680fbde6603 100644 (file)
@@ -5,7 +5,7 @@
 #############################################################
 
 import errno, re, os, select, signal, socket, struct, sys
-
+import xend.blkif, xend.main, xend.manager, xend.utils, Xc
 
 ##
 ## interface:
@@ -16,7 +16,7 @@ import errno, re, os, select, signal, socket, struct, sys
 ##   CONNECTED: sending/receiving console data on TCP port 'self.port'
 ##
 ##  A dictionary of all active interfaces, indexed by TCP socket descriptor,
-##  is accessible as 'interface.interface_list'.
+##  is accessible as 'interface.list_by_fd'.
 ##
 ##  NB. When a class instance is to be destroyed you *must* call the 'close'
 ##  method. Otherwise a stale reference will eb left in the interface list.
@@ -30,7 +30,11 @@ class interface:
 
 
     # Dictionary of all active (non-closed) console interfaces.
-    interface_list = {}
+    list_by_fd = {}
+
+
+    # Dictionary of all console interfaces, closed and open.
+    list = {}
 
 
     # NB. 'key' is an opaque value that has no meaning in this class.
@@ -38,6 +42,9 @@ class interface:
         self.status = interface.CLOSED
         self.port   = port
         self.key    = key
+        self.rbuf   = xend.utils.buffer()
+        self.wbuf   = xend.utils.buffer()
+        interface.list[key] = self
 
 
     # Is this interface closed (inactive)?
@@ -58,14 +65,14 @@ class interface:
     # Close the interface, if it is not closed already.
     def close(self):
         if not self.closed():
-            del interface.interface_list[self.sock.fileno()]
+            del interface.list_by_fd[self.sock.fileno()]
             self.sock.close()
             del self.sock
             self.status = interface.CLOSED
 
 
     # Move the interface into the 'listening' state. Opens a new listening
-    # socket and updates 'interface_list'.
+    # socket and updates 'list_by_fd'.
     def listen(self):
         # Close old socket (if any), and create a fresh one.
         self.close()
@@ -80,7 +87,7 @@ class interface:
 
             # Announce the new status of thsi interface.
             self.status = interface.LISTENING
-            interface.interface_list[self.sock.fileno()] = self
+            interface.list_by_fd[self.sock.fileno()] = self
 
         except:
             # In case of trouble ensure we get rid of dangling socket reference
@@ -105,7 +112,69 @@ class interface:
         # Publish the new socket and the new interface state.
         self.sock = sock
         self.status = interface.CONNECTED
-        interface.interface_list[self.sock.fileno()] = self
+        interface.list_by_fd[self.sock.fileno()] = self
         return 1
 
 
+    # Completely sestroy a console interface.
+    def destroy(self):
+        self.close()
+        del interface.list[self.key]
+
+
+    # Do work triggered by resource availability on a console-interface socket.
+    def socket_work(self):
+        # If the interface is listening, check for pending connections.
+        if self.listening():
+            self.connect()
+
+        # All done if the interface is not connected.
+        if not self.connected():
+            return
+
+        # Send as much pending data as possible via the socket.
+        while not self.rbuf.empty():
+            try:
+                bytes = self.sock.send(self.rbuf.peek())
+                if bytes > 0:
+                    self.rbuf.discard(bytes)
+            except socket.error, error:
+                pass
+
+        # Read as much data as is available. Don't worry about
+        # overflowing our buffer: it's more important to read the
+        # incoming data stream and detect errors or closure of the
+        # remote end in a timely manner.
+        try:
+            while 1:
+                data = self.sock.recv(2048)
+                # Return of zero means the remote end has disconnected.
+                # We therefore return the console interface to listening.
+                if not data:
+                    self.listen()
+                    break
+                self.wbuf.write(data)
+        except socket.error, error:
+            # Assume that most errors mean that the connection is dead.
+            # In such cases we return the interface to 'listening' state.
+            if error[0] != errno.EAGAIN:
+                print "Better return to listening"
+                self.listen()
+                print "New status: " + str(self.status)
+
+
+    # The parameter @port is the control-interface event channel. This method
+    # returns True if messages were written to the control interface.
+    def ctrlif_transmit_work(self, port):
+        work_done = False
+        while not self.wbuf.empty() and port.space_to_write_request():
+            msg = xend.utils.message(0, 0, 0)
+            msg.append_payload(self.wbuf.read(msg.MAX_PAYLOAD))
+            port.write_request(msg)
+            work_done = True
+        return work_done
+
+
+    def ctrlif_rx_req(self, port, msg):
+        self.rbuf.write(msg.get_payload())
+        port.write_response(msg)
index d9ea7d616081871d467959ea951f6def9f87f2bd..68d4fac1d238b1d1350cf8fa8024ef76a27abddb 100644 (file)
@@ -76,8 +76,8 @@ typedef struct {
 
 /* Messages from guest to domain controller. */
 #define CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED     32
-#define CMSG_BLKIF_FE_INTERFACE_UP              33
-#define CMSG_BLKIF_FE_INTERFACE_DOWN            34
+#define CMSG_BLKIF_FE_INTERFACE_CONNECT         33
+#define CMSG_BLKIF_FE_INTERFACE_DISCONNECT      34
 
 /* These are used by both front-end and back-end drivers. */
 #define blkif_vdev_t   u16
@@ -91,13 +91,13 @@ typedef struct {
  *   1. The shared-memory frame is available for reuse.
  *   2. Any unacknowledged messgaes pending on the interface were dropped.
  */
-#define BLKIF_INTERFACE_STATUS_DESTROYED 0 /* Interface doesn't exist.      */
-#define BLKIF_INTERFACE_STATUS_DOWN      1 /* Interface exists but is down. */
-#define BLKIF_INTERFACE_STATUS_UP        2 /* Interface exists and is up.   */
+#define BLKIF_INTERFACE_STATUS_DESTROYED    0 /* Interface doesn't exist.    */
+#define BLKIF_INTERFACE_STATUS_DISCONNECTED 1 /* Exists but is disconnected. */
+#define BLKIF_INTERFACE_STATUS_CONNECTED    2 /* Exists and is connected.    */
 typedef struct {
     unsigned int handle;
     unsigned int status;
-    unsigned int evtchn; /* status == BLKIF_INTERFACE_STATUS_UP */
+    unsigned int evtchn; /* status == BLKIF_INTERFACE_STATUS_CONNECTED */
 } blkif_fe_interface_status_changed_t;
 
 /*
@@ -109,30 +109,37 @@ typedef struct {
  *  If the driver goes DOWN while interfaces are still UP, the domain
  *  will automatically take the interfaces DOWN.
  */
-#define BLKIF_DRIVER_STATUS_DOWN         0
-#define BLKIF_DRIVER_STATUS_UP           1
+#define BLKIF_DRIVER_STATUS_DOWN   0
+#define BLKIF_DRIVER_STATUS_UP     1
 typedef struct {
     unsigned int status; /* BLKIF_DRIVER_STATUS_??? */
 } blkif_fe_driver_status_changed_t;
 
 /*
- * CMSG_BLKIF_FE_INTERFACE_UP:
- *  If successful, the domain controller will acknowledge with a STATUS_UP
- *  message.
+ * CMSG_BLKIF_FE_INTERFACE_CONNECT:
+ *  If successful, the domain controller will acknowledge with a
+ *  STATUS_CONNECTED message.
  */
 typedef struct {
     unsigned int  handle;
     unsigned long shmem_frame;
-} blkif_fe_interface_up_t;
+} blkif_fe_interface_connect_t;
 
 /*
- * CMSG_BLKIF_FE_INTERFACE_DOWN:
- *  If successful, the domain controller will acknowledge with a STATUS_DOWN
- *  message.
+ * CMSG_BLKIF_FE_INTERFACE_DISCONNECT:
+ *  If successful, the domain controller will acknowledge with a
+ *  STATUS_DISCONNECTED message.
  */
 typedef struct {
+    /* IN */
     unsigned int handle;
-} blkif_fe_interface_down_t;
+    /* OUT */
+    /*
+     * Tells driver how many interfaces it should expect to immediately
+     * receive notifications about.
+     */
+    unsigned int nr_interfaces;
+} blkif_fe_interface_disconnect_t;
 
 
 /******************************************************************************
@@ -142,10 +149,12 @@ typedef struct {
 /* Messages from domain controller. */
 #define CMSG_BLKIF_BE_CREATE      0  /* Create a new block-device interface. */
 #define CMSG_BLKIF_BE_DESTROY     1  /* Destroy a block-device interface.    */
-#define CMSG_BLKIF_BE_VBD_CREATE  2  /* Create a new VBD for an interface.   */
-#define CMSG_BLKIF_BE_VBD_DESTROY 3  /* Delete a VBD from an interface.      */
-#define CMSG_BLKIF_BE_VBD_GROW    4  /* Append an extent to a given VBD.     */
-#define CMSG_BLKIF_BE_VBD_SHRINK  5  /* Remove last extent from a given VBD. */
+#define CMSG_BLKIF_BE_CONNECT     2  /* Connect i/f to remote driver.        */
+#define CMSG_BLKIF_BE_DISCONNECT  3  /* Disconnect i/f from remote driver.   */
+#define CMSG_BLKIF_BE_VBD_CREATE  4  /* Create a new VBD for an interface.   */
+#define CMSG_BLKIF_BE_VBD_DESTROY 5  /* Delete a VBD from an interface.      */
+#define CMSG_BLKIF_BE_VBD_GROW    6  /* Append an extent to a given VBD.     */
+#define CMSG_BLKIF_BE_VBD_SHRINK  7  /* Remove last extent from a given VBD. */
 
 /* Messages to domain controller. */
 #define CMSG_BLKIF_BE_DRIVER_STATUS_CHANGED 32
@@ -167,36 +176,36 @@ typedef struct {
 /* The following are specific error returns. */
 #define BLKIF_BE_STATUS_INTERFACE_EXISTS    2
 #define BLKIF_BE_STATUS_INTERFACE_NOT_FOUND 3
-#define BLKIF_BE_STATUS_VBD_EXISTS          4
-#define BLKIF_BE_STATUS_VBD_NOT_FOUND       5
-#define BLKIF_BE_STATUS_OUT_OF_MEMORY       6
-#define BLKIF_BE_STATUS_EXTENT_NOT_FOUND    7
-#define BLKIF_BE_STATUS_MAPPING_ERROR       8
+#define BLKIF_BE_STATUS_INTERFACE_CONNECTED 4
+#define BLKIF_BE_STATUS_VBD_EXISTS          5
+#define BLKIF_BE_STATUS_VBD_NOT_FOUND       6
+#define BLKIF_BE_STATUS_OUT_OF_MEMORY       7
+#define BLKIF_BE_STATUS_EXTENT_NOT_FOUND    8
+#define BLKIF_BE_STATUS_MAPPING_ERROR       9
 
 /* This macro can be used to create an array of descriptive error strings. */
-#define BLKIF_BE_STATUS_ERRORS {   \
-    "Okay",                        \
-    "Non-specific error",          \
-    "Interface already exists",    \
-    "Interface not found",         \
-    "VBD already exists",          \
-    "VBD not found",               \
-    "Out of memory",               \
-    "Extent not found for VBD",    \
+#define BLKIF_BE_STATUS_ERRORS {    \
+    "Okay",                         \
+    "Non-specific error",           \
+    "Interface already exists",     \
+    "Interface not found",          \
+    "Interface is still connected", \
+    "VBD already exists",           \
+    "VBD not found",                \
+    "Out of memory",                \
+    "Extent not found for VBD",     \
     "Could not map domain memory" }
 
 /*
  * CMSG_BLKIF_BE_CREATE:
  *  When the driver sends a successful response then the interface is fully
- *  set up. The controller will send an UP notification to the front-end
+ *  created. The controller will send a DOWN notification to the front-end
  *  driver.
  */
 typedef struct { 
     /* IN */
     domid_t        domid;             /* Domain attached to new interface.   */
     unsigned int   blkif_handle;      /* Domain-specific interface handle.   */
-    unsigned int   evtchn;            /* Event channel for notifications.    */
-    unsigned long  shmem_frame;       /* Page cont. shared comms window.     */
     /* OUT */
     unsigned int   status;
 } blkif_be_create_t; 
@@ -204,8 +213,8 @@ typedef struct {
 /*
  * CMSG_BLKIF_BE_DESTROY:
  *  When the driver sends a successful response then the interface is fully
- *  torn down. The controller will send a DOWN notification to the front-end
- *  driver.
+ *  torn down. The controller will send a DESTROYED notification to the
+ *  front-end driver.
  */
 typedef struct { 
     /* IN */
@@ -215,6 +224,36 @@ typedef struct {
     unsigned int   status;
 } blkif_be_destroy_t; 
 
+/*
+ * CMSG_BLKIF_BE_CONNECT:
+ *  When the driver sends a successful response then the interface is fully
+ *  connected. The controller will send a CONNECTED notification to the
+ *  front-end driver.
+ */
+typedef struct { 
+    /* IN */
+    domid_t        domid;             /* Domain attached to new interface.   */
+    unsigned int   blkif_handle;      /* Domain-specific interface handle.   */
+    unsigned int   evtchn;            /* Event channel for notifications.    */
+    unsigned long  shmem_frame;       /* Page cont. shared comms window.     */
+    /* OUT */
+    unsigned int   status;
+} blkif_be_connect_t; 
+
+/*
+ * CMSG_BLKIF_BE_DISCONNECT:
+ *  When the driver sends a successful response then the interface is fully
+ *  disconnected. The controller will send a DOWN notification to the front-end
+ *  driver.
+ */
+typedef struct { 
+    /* IN */
+    domid_t        domid;             /* Domain attached to new interface.   */
+    unsigned int   blkif_handle;      /* Domain-specific interface handle.   */
+    /* OUT */
+    unsigned int   status;
+} blkif_be_disconnect_t; 
+
 /* CMSG_BLKIF_BE_VBD_CREATE */
 typedef struct { 
     /* IN */
@@ -264,7 +303,14 @@ typedef struct {
  *  will automatically send DOWN notifications.
  */
 typedef struct {
+    /* IN */
     unsigned int status; /* BLKIF_DRIVER_STATUS_??? */
+    /* OUT */
+    /*
+     * Tells driver how many interfaces it should expect to immediately
+     * receive notifications about.
+     */
+    unsigned int nr_interfaces;
 } blkif_be_driver_status_changed_t;
 
 #endif /* __DOMAIN_CONTROLLER_H__ */
index b870af55d1d3309347f0070faf37c8391e4535fa..7b5adbab8312ea25cdddfe4d4d85a292e06e39b7 100755 (executable)
@@ -5,7 +5,7 @@
 ###########################################################
 
 import errno, re, os, pwd, select, signal, socket, struct, sys, time
-import xend.console, xend.manager, xend.utils, Xc
+import xend.blkif, xend.console, xend.manager, xend.utils, Xc
 
 
 # The following parameters could be placed in a configuration file.
@@ -16,13 +16,35 @@ CONTROL_DIR  = '/var/run/xend'
 UNIX_SOCK    = 'management_sock' # relative to CONTROL_DIR
 
 
+CMSG_CONSOLE  = 0
+CMSG_BLKIF_BE = 1
+CMSG_BLKIF_FE = 2
+
+
+def port_from_dom(dom):
+    global port_list
+    for idx, port in port_list.items():
+        if port.remote_dom == dom:
+            return port
+    return None
+
+
+def send_management_response(response, addr):
+    try:
+        response = str(response)
+        print "Mgmt_rsp[%s]: %s" % (addr, response)
+        management_interface.sendto(response, addr)
+    except socket.error, error:
+        pass
+
+
 def daemon_loop():
     # Could we do this more nicely? The xend.manager functions need access
     # to this global state to do their work.
-    global control_list, notifier
+    global port_list, notifier, management_interface, mgmt_req_addr, dom0_port
 
-    # List of all control interfaces, indexed by local event-channel port.
-    control_list = {}
+    # Lists of all interfaces, indexed by local event-channel port.
+    port_list = {}
 
     xc = Xc.new()
 
@@ -46,13 +68,10 @@ def daemon_loop():
 
     # The DOM0 control interface is not set up via the management interface.
     # Note that console messages don't come our way (actually, only driver
-    # back-ends should use the DOM0 control interface) -- the console
-    # structures are dummies.
+    # back-ends should use the DOM0 control interface).
     dom0_port = xend.utils.port(0)
-    xend.main.notifier.bind(dom0_port.local_port)
-    xend.main.control_list[dom0_port.local_port] =          \
-      (dom0_port, xend.utils.buffer(), xend.utils.buffer(), \
-       xend.console.interface(0, dom0_port.local_port))
+    notifier.bind(dom0_port.local_port)
+    port_list[dom0_port.local_port] = dom0_port
 
     ##
     ## MAIN LOOP
@@ -68,10 +87,10 @@ def daemon_loop():
         waitset = select.poll()
         waitset.register(management_interface, select.POLLIN)
         waitset.register(notifier, select.POLLIN)
-        for idx, (port, rbuf, wbuf, con_if) in control_list.items():
+        for idx, con_if in xend.console.interface.list_by_fd.items():
             if not con_if.closed():
                 pflags = select.POLLIN
-                if not rbuf.empty() and con_if.connected():
+                if not con_if.rbuf.empty() and con_if.connected():
                     pflags = select.POLLIN | select.POLLOUT
                 waitset.register(con_if.sock.fileno(), pflags)
 
@@ -82,16 +101,16 @@ def daemon_loop():
         # These should consist of executable Python statements that call
         # well-known management functions (e.g., new_control_interface(dom=9)).
         try:
-            data, addr = management_interface.recvfrom(2048)
+            data, mgmt_req_addr = management_interface.recvfrom(2048)
         except socket.error, error:
             if error[0] != errno.EAGAIN:
                 raise
         else:
-            if addr:
+            if mgmt_req_addr:
                 # Evaluate the request in an exception-trapping sandbox.
                 try:
-                    print "Mgmt_req[%s]: %s" % (addr, data)
-                    response = str(eval('xend.manager.'+data))
+                    print "Mgmt_req[%s]: %s" % (mgmt_req_addr, data)
+                    response = eval('xend.manager.'+data)
 
                 except:
                     # Catch all exceptions and turn into an error response:
@@ -107,69 +126,20 @@ def daemon_loop():
                     response = str(response)
 
                 # Try to send a response to the requester.
-                try:
-                    print "Mgmt_rsp[%s]: %s" % (addr, response)
-                    management_interface.sendto(response, addr)
-                except socket.error, error:
-                    pass
+                if response:
+                    send_management_response(response, mgmt_req_addr)
                 
         # Do work for every console interface that hit in the poll set.
         for (fd, events) in fdset:
-            if not xend.console.interface.interface_list.has_key(fd):
-                continue
-            con_if = xend.console.interface.interface_list[fd]
-
-            # If the interface is listening, check for pending connections.
-            if con_if.listening():
-                con_if.connect()
-
-            # All done if the interface is not connected.
-            if not con_if.connected():
-                continue
-            (port, rbuf, wbuf, con_if) = control_list[con_if.key]
-
-            # Send as much pending data as possible via the socket.
-            while not rbuf.empty():
-                try:
-                    bytes = con_if.sock.send(rbuf.peek())
-                    if bytes > 0:
-                        rbuf.discard(bytes)
-                except socket.error, error:
-                    pass
-
-            # Read as much data as is available. Don't worry about
-            # overflowing our buffer: it's more important to read the
-            # incoming data stream and detect errors or closure of the
-            # remote end in a timely manner.
-            try:
-                while 1:
-                    data = con_if.sock.recv(2048)
-                    # Return of zero means the remote end has disconnected.
-                    # We therefore return the console interface to listening.
-                    if not data:
-                        con_if.listen()
-                        break
-                    wbuf.write(data)
-            except socket.error, error:
-                # Assume that most errors mean that the connection is dead.
-                # In such cases we return the interface to 'listening' state.
-                if error[0] != errno.EAGAIN:
-                    print "Better return to listening"
-                    con_if.listen()
-                    print "New status: " + str(con_if.status)
-
-            # We may now have pending data to send via the relevant
-            # inter-domain control interface. If so then we send all we can
-            # and notify the remote end.
-            work_done = False
-            while not wbuf.empty() and port.space_to_write_request():
-                msg = xend.utils.message(0, 0, 0)
-                msg.append_payload(wbuf.read(msg.MAX_PAYLOAD))
-                port.write_request(msg)
-                work_done = True
-            if work_done:
-                port.notify()
-
+            if xend.console.interface.list_by_fd.has_key(fd):
+                con_if = xend.console.interface.list_by_fd[fd]
+                con_if.socket_work()
+                # We may now have pending data to send via the control
+                # interface. If so then send all we can and notify the remote.
+                port = port_list[con_if.key]
+                if con_if.ctrlif_transmit_work(port):
+                    port.notify()
+                    
         # Process control-interface notifications from other guest OSes.
         while 1:            
             # Grab a notification, if there is one.
@@ -178,42 +148,69 @@ def daemon_loop():
                 break
             (idx, type) = notification
 
-            if not control_list.has_key(idx):
+            if not port_list.has_key(idx):
                 continue
 
-            (port, rbuf, wbuf, con_if) = control_list[idx]
+            port = port_list[idx]
             work_done = False
 
+            con_if = False
+            if xend.console.interface.list.has_key(idx):
+                con_if = xend.console.interface.list[idx]
+
+            blk_if = False
+            if xend.blkif.interface.list.has_key(idx):
+                blk_if = xend.blkif.interface.list[idx]
+
             # If we pick up a disconnect notification then we do any necessary
             # cleanup.
             if type == notifier.EXCEPTION:
                 ret = xc.evtchn_status(idx)
                 if ret['status'] == 'unbound':
                     notifier.unbind(idx)
-                    con_if.close()
-                    del control_list[idx], port, rbuf, wbuf, con_if
+                    del port_list[idx], port
+                    if con_if:
+                        con_if.destroy()
+                        del con_if
+                    if blk_if:
+                        blk_if.destroy()
+                        del blk_if
                     continue
 
-            # Read incoming requests. Currently assume that request
-            # message always containb console data.
+            # Process incoming requests.
             while port.request_to_read():
                 msg = port.read_request()
-                rbuf.write(msg.get_payload())
-                port.write_response(msg)
                 work_done = True
-
-            # Incoming responses are currently thrown on the floor.
+                type = (msg.get_header())['type']
+                if type == CMSG_CONSOLE and con_if:
+                    con_if.ctrlif_rx_req(port, msg)
+                elif type == CMSG_BLKIF_FE and blk_if:
+                    blk_if.ctrlif_rx_req(port, msg)
+                elif type == CMSG_BLKIF_BE and port == dom0_port:
+                    xend.blkif.backend_rx_req(port, msg)
+                else:
+                    port.write_response(msg)
+
+            # Process incoming responses.
             while port.response_to_read():
                 msg = port.read_response()
                 work_done = True
+                type = (msg.get_header())['type']
+                if type == CMSG_BLKIF_BE and port == dom0_port:
+                    xend.blkif.backend_rx_rsp(port, msg)
+
+            # Send console data.
+            if con_if and con_if.ctrlif_transmit_work(port):
+                work_done = True
 
-            # Send as much pending console data as there is room for.
-            while not wbuf.empty() and port.space_to_write_request():
-                msg = xend.utils.message(0, 0, 0)
-                msg.append_payload(wbuf.read(msg.MAX_PAYLOAD))
-                port.write_request(msg)
+            # Send blkif messages.
+            if blk_if and blk_if.ctrlif_transmit_work(port):
                 work_done = True
 
+            # Back-end block-device work.
+            if port == dom0_port and xend.blkif.backend_do_work(port):
+                work_done = True
+                
             # Finally, notify the remote end of any work that we did.
             if work_done:
                 port.notify()
index 42d66d3a95020fb8ff3734533dc09493981e7e44..ea7398cd4ce3a0a13a1a79d4bed95afbb7eef221 100644 (file)
@@ -4,13 +4,13 @@
 ## Copyright (c) 2004, K A Fraser (University of Cambridge)
 #############################################################
 
-import xend.console, xend.main, xend.utils
+import xend.blkif, xend.console, xend.main, xend.utils
 
 
 ##
 ## new_control_interface:
-##  Create a new control interface with the specified domain 'dom'.
-##  The console port may also be specified; otehrwise a suitable port is
+##  Create a new control interface with the specified domain @dom.
+##  The console port may also be specified; otherwise a suitable port is
 ##  automatically allocated.
 ##
 def new_control_interface(dom, console_port=-1):
@@ -26,9 +26,8 @@ def new_control_interface(dom, console_port=-1):
     con_if = xend.console.interface(console_port, port.local_port)
     con_if.listen()
 
-    # Add control state to the master list.
-    xend.main.control_list[port.local_port] = \
-      (port, xend.utils.buffer(), xend.utils.buffer(), con_if)
+    # Update the master port list.
+    xend.main.port_list[port.local_port] = port
 
     # Construct the successful response to be returned to the requester.
     response = { 'success': True }
@@ -36,3 +35,81 @@ def new_control_interface(dom, console_port=-1):
     response['remote_port']  = port.remote_port
     response['console_port'] = console_port
     return response
+
+
+##
+## new_block_interface:
+##  Create a new block interface for the specified domain @dom.
+##
+def new_block_interface(dom, handle=-1):
+    # By default we create an interface with handle zero.
+    if handle < 0:
+        handle = 0
+
+    # We only support one interface per domain, which must have handle zero.
+    if handle != 0:
+        response = { 'success': False }
+        response['error_type'] = 'Bad handle %d (only handle 0 ' + \
+                                 'is supported)' % handle
+        return response
+
+    # Find local event-channel port associated with the specified domain.
+    port = xend.main.port_from_dom(dom)
+    if not port:
+        response = { 'success': False }
+        response['error_type'] = 'Unknown domain %d' % dom
+        return response
+
+    # The interface must not already exist.
+    if xend.blkif.interface.list.has_key(port.local_port):
+        response = { 'success': False }
+        response['error_type'] = 'Interface (dom=%d,handle=%d) already ' + \
+                                 'exists' % (dom, handle)
+        return response
+
+    # Create the new interface. Initially no virtual devices are attached.
+    xend.blkif.interface(dom, port.local_port)
+
+    # Response is deferred until back-end driver sends acknowledgement.
+    return None
+
+
+##
+## new_block_device:
+##  Attach a new virtual block device to the specified block interface
+##  (@dom, @handle). The new device is identified by @vdev, and maps to
+##  the real block extent (@pdev, @start_sect, @nr_sect). If @readonly then
+##  write requests to @vdev will be rejected.
+##
+def new_block_device(dom, handle, vdev, pdev, start_sect, nr_sect, readonly):
+    # We only support one interface per domain, which must have handle zero.
+    if handle != 0:
+        response = { 'success': False }
+        response['error_type'] = 'Bad handle %d (only handle 0 ' + \
+                                 'is supported)' % handle
+        return response
+
+    # Find local event-channel port associated with the specified domain.
+    port = xend.main.port_from_dom(dom)
+    if not port:
+        response = { 'success': False }
+        response['error_type'] = 'Unknown domain %d' % dom
+        return response
+        
+    # The interface must exist.
+    if not xend.blkif.interface.list.has_key(port.local_port):
+        response = { 'success': False }
+        response['error_type'] = 'Interface (dom=%d,handle=%d) does not ' + \
+                                 'exists' % (dom, handle)
+        return response
+
+    # The virtual device must not yet exist.
+    blkif = xend.blkif.interface.list[port.local_port]
+    if not blkif.attach_device(vdev, pdev, start_sect, nr_sect, readonly):
+        response = { 'success': False }
+        response['error_type'] = 'Vdevice (dom=%d,handle=%d,vdevice=%d) ' + \
+                                 'already exists' % (dom, handle, vdev)
+        return response
+
+    # Response is deferred until back-end driver sends acknowledgement.
+    return None
index c0bea86320ae0083d3f77bc11db6a8c36c1a909f..c6011cebb44b968c7784b55349b9b72d37dd10df 100644 (file)
@@ -109,15 +109,18 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
         goto out;
     }
 
+    /* 'Allocate' port1 before searching for a free port2. */
+    p1->event_channel[port1].state = ECS_INTERDOMAIN;
+
     if ( (port2 = get_free_port(p2)) < 0 )
     {
+        p1->event_channel[port1].state = ECS_FREE;
         rc = port2;
         goto out;
     }
 
     p1->event_channel[port1].u.remote.dom  = p2;
     p1->event_channel[port1].u.remote.port = (u16)port2;
-    p1->event_channel[port1].state         = ECS_INTERDOMAIN;
 
     p2->event_channel[port2].u.remote.dom  = p1;
     p2->event_channel[port2].u.remote.port = (u16)port1;
index 646f4855f35e18fc70a6c36df4b4e477074c16f8..e6004b4a8e9806ed8fd9bd6bda7af515e0c8c7d0 100644 (file)
@@ -41,6 +41,12 @@ typedef struct blkif_st {
     rb_root_t        vbd_rb;        /* Mapping from 16-bit vdevices to VBDs. */
     spinlock_t       vbd_lock;      /* Protects VBD mapping. */
     /* Private fields. */
+    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
+    /*
+     * DISCONNECT response is deferred until pending requests are ack'ed.
+     * We therefore need to store the id from the original request.
+     */
+    u8               disconnect_rspid;
     struct blkif_st *hash_next;
     struct list_head blkdev_list;
     spinlock_t       blk_ring_lock;
@@ -49,13 +55,15 @@ typedef struct blkif_st {
 
 void blkif_create(blkif_be_create_t *create);
 void blkif_destroy(blkif_be_destroy_t *destroy);
-void __blkif_destroy(blkif_t *blkif);
+void blkif_connect(blkif_be_connect_t *connect);
+int  blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id);
+void __blkif_disconnect_complete(blkif_t *blkif);
 blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
 #define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
 #define blkif_put(_b)                             \
     do {                                          \
         if ( atomic_dec_and_test(&(_b)->refcnt) ) \
-            __blkif_destroy(_b);                  \
+            __blkif_disconnect_complete(_b);      \
     } while (0)
 
 /* An entry in a list of xen_extents. */
index 2baddcd6161524ebcb70fc8013006c7d510adebd..0746ecfab0951cb394af6eecedd01d802fd9eb4e 100644 (file)
@@ -10,6 +10,8 @@
 
 static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
 {
+    DPRINTK("Received blkif backend message, subtype=%d\n", msg->subtype);
+    
     switch ( msg->subtype )
     {
     case CMSG_BLKIF_BE_CREATE:
@@ -22,6 +24,17 @@ static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
             goto parse_error;
         blkif_destroy((blkif_be_destroy_t *)&msg->msg[0]);
         break;        
+    case CMSG_BLKIF_BE_CONNECT:
+        if ( msg->length != sizeof(blkif_be_connect_t) )
+            goto parse_error;
+        blkif_connect((blkif_be_connect_t *)&msg->msg[0]);
+        break;        
+    case CMSG_BLKIF_BE_DISCONNECT:
+        if ( msg->length != sizeof(blkif_be_disconnect_t) )
+            goto parse_error;
+        if ( !blkif_disconnect((blkif_be_disconnect_t *)&msg->msg[0],msg->id) )
+            return; /* Sending the response is deferred until later. */
+        break;        
     case CMSG_BLKIF_BE_VBD_CREATE:
         if ( msg->length != sizeof(blkif_be_vbd_create_t) )
             goto parse_error;
@@ -50,6 +63,8 @@ static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
     return;
 
  parse_error:
+    DPRINTK("Parse error while reading message subtype %d, len %d\n",
+            msg->subtype, msg->length);
     msg->length = 0;
     ctrl_if_send_response(msg);
 }
index 87925681da33e745768d02d6c11e94075559fcd4..9acbac35ab4f772853cad1a6ea5d0107b8e9b218 100644 (file)
 
 static kmem_cache_t *blkif_cachep;
 static blkif_t      *blkif_hash[BLKIF_HASHSZ];
-static spinlock_t    blkif_hash_lock;
 
 blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
 {
-    blkif_t      *blkif;
-    unsigned long flags;
-    
-    spin_lock_irqsave(&blkif_hash_lock, flags);
-    blkif = blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( blkif != NULL )
-    {
-        if ( (blkif->domid == domid) && (blkif->handle == handle) )
-        {
-            blkif_get(blkif);
-            break;
-        }
+    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
+    while ( (blkif != NULL) && 
+            ((blkif->domid != domid) || (blkif->handle != handle)) )
         blkif = blkif->hash_next;
-    }
-    spin_unlock_irqrestore(&blkif_hash_lock, flags);
-
     return blkif;
 }
 
-void __blkif_destroy(blkif_t *blkif)
+void __blkif_disconnect_complete(blkif_t *blkif)
 {
-    free_irq(blkif->irq, NULL);
+    ctrl_msg_t            cmsg;
+    blkif_be_disconnect_t disc;
+
+    /*
+     * These can't be done in __blkif_disconnect() because at that point there
+     * may be outstanding requests at the disc whose asynchronous responses
+     * must still be notified to the remote driver.
+     */
     unbind_evtchn_from_irq(blkif->evtchn);
     vfree(blkif->blk_ring_base);
-    destroy_all_vbds(blkif);
-    kmem_cache_free(blkif_cachep, blkif);    
+
+    /* Construct the deferred response message. */
+    cmsg.type         = CMSG_BLKIF_BE;
+    cmsg.subtype      = CMSG_BLKIF_BE_DISCONNECT;
+    cmsg.id           = blkif->disconnect_rspid;
+    cmsg.length       = sizeof(blkif_be_disconnect_t);
+    disc.domid        = blkif->domid;
+    disc.blkif_handle = blkif->handle;
+    disc.status       = BLKIF_BE_STATUS_OKAY;
+    memcpy(cmsg.msg, &disc, sizeof(disc));
+
+    /*
+     * Make sure message is constructed /before/ status change, because
+     * after the status change the 'blkif' structure could be deallocated at
+     * any time. Also make sure we send the response /after/ status change,
+     * as otherwise a subsequent CONNECT request could spuriously fail if
+     * another CPU doesn't see the status change yet.
+     */
+    mb();
+    if ( blkif->status != DISCONNECTING )
+        BUG();
+    blkif->status = DISCONNECTED;
+    mb();
+
+    /* Send the successful response. */
+    ctrl_if_send_response(&cmsg);
 }
 
 void blkif_create(blkif_be_create_t *create)
 {
     domid_t       domid  = create->domid;
     unsigned int  handle = create->blkif_handle;
-    unsigned int  evtchn = create->evtchn;
-    unsigned long shmem_frame = create->shmem_frame;
-    unsigned long flags;
     blkif_t     **pblkif, *blkif;
-    struct vm_struct *vma;
-    pgprot_t      prot;
-    int           error;
 
-    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
+    if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_ATOMIC)) == NULL )
     {
+        DPRINTK("Could not create blkif: out of memory\n");
         create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
         return;
     }
 
-    if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL )
-    {
-        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        goto fail1;
-    }
-
-    prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
-    error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
-                                    shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
-                                    prot, domid);
-    if ( error != 0 )
-    {
-        if ( error == -ENOMEM )
-            create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        else if ( error == -EFAULT )
-            create->status = BLKIF_BE_STATUS_MAPPING_ERROR;
-        else
-            create->status = BLKIF_BE_STATUS_ERROR;
-        goto fail2;
-    }
-
     memset(blkif, 0, sizeof(*blkif));
-    blkif->domid         = domid;
-    blkif->handle        = handle;
-    blkif->evtchn        = evtchn;
-    blkif->irq           = bind_evtchn_to_irq(evtchn);
-    blkif->shmem_frame   = shmem_frame;
-    blkif->blk_ring_base = (blkif_ring_t *)vma->addr;
+    blkif->domid  = domid;
+    blkif->handle = handle;
+    blkif->status = DISCONNECTED;
     spin_lock_init(&blkif->vbd_lock);
     spin_lock_init(&blkif->blk_ring_lock);
-
-    spin_lock_irqsave(&blkif_hash_lock, flags);
+    atomic_set(&blkif->refcnt, 0);
 
     pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( *pblkif == NULL )
+    while ( *pblkif != NULL )
     {
         if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
         {
-            spin_unlock_irqrestore(&blkif_hash_lock, flags);
+            DPRINTK("Could not create blkif: already exists\n");
             create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
-            goto fail3;
+            kmem_cache_free(blkif_cachep, blkif);
+            return;
         }
         pblkif = &(*pblkif)->hash_next;
     }
 
-    atomic_set(&blkif->refcnt, 1);
     blkif->hash_next = *pblkif;
     *pblkif = blkif;
 
-    spin_unlock_irqrestore(&blkif_hash_lock, flags);
-
-    request_irq(blkif->irq, blkif_be_int, 0, "blkif-backend", blkif);
-
+    DPRINTK("Successfully created blkif\n");
     create->status = BLKIF_BE_STATUS_OKAY;
-    return;
-
- fail3: unbind_evtchn_from_irq(evtchn);
- fail2: kmem_cache_free(blkif_cachep, blkif);
- fail1: vfree(vma->addr);
 }
 
 void blkif_destroy(blkif_be_destroy_t *destroy)
 {
     domid_t       domid  = destroy->domid;
     unsigned int  handle = destroy->blkif_handle;
-    unsigned long flags;
     blkif_t     **pblkif, *blkif;
 
-    spin_lock_irqsave(&blkif_hash_lock, flags);
-
     pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif = *pblkif) == NULL )
+    while ( (blkif = *pblkif) != NULL )
     {
         if ( (blkif->domid == domid) && (blkif->handle == handle) )
         {
-            *pblkif = blkif->hash_next;
-            spin_unlock_irqrestore(&blkif_hash_lock, flags);
-            blkif_deschedule(blkif);
-            blkif_put(blkif);
-            destroy->status = BLKIF_BE_STATUS_OKAY;
-            return;
+            if ( blkif->status != DISCONNECTED )
+                goto still_connected;
+            goto destroy;
         }
         pblkif = &blkif->hash_next;
     }
 
-    spin_unlock_irqrestore(&blkif_hash_lock, flags);
-
     destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+    return;
+
+ still_connected:
+    destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
+    return;
+
+ destroy:
+    *pblkif = blkif->hash_next;
+    destroy_all_vbds(blkif);
+    kmem_cache_free(blkif_cachep, blkif);
+    destroy->status = BLKIF_BE_STATUS_OKAY;
+}
+
+void blkif_connect(blkif_be_connect_t *connect)
+{
+    domid_t       domid  = connect->domid;
+    unsigned int  handle = connect->blkif_handle;
+    unsigned int  evtchn = connect->evtchn;
+    unsigned long shmem_frame = connect->shmem_frame;
+    struct vm_struct *vma;
+    pgprot_t      prot;
+    int           error;
+    blkif_t      *blkif;
+
+    blkif = blkif_find_by_handle(domid, handle);
+    if ( unlikely(blkif == NULL) )
+    {
+        DPRINTK("blkif_connect attempted for non-existent blkif (%llu,%u)\n", 
+                connect->domid, connect->blkif_handle); 
+        connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+        return;
+    }
+
+    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
+    {
+        connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+        return;
+    }
+
+    prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
+    error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
+                                    shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
+                                    prot, domid);
+    if ( error != 0 )
+    {
+        if ( error == -ENOMEM )
+            connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+        else if ( error == -EFAULT )
+            connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
+        else
+            connect->status = BLKIF_BE_STATUS_ERROR;
+        vfree(vma->addr);
+        return;
+    }
+
+    if ( blkif->status != DISCONNECTED )
+    {
+        connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
+        vfree(vma->addr);
+        return;
+    }
+
+    blkif->evtchn        = evtchn;
+    blkif->irq           = bind_evtchn_to_irq(evtchn);
+    blkif->shmem_frame   = shmem_frame;
+    blkif->blk_ring_base = (blkif_ring_t *)vma->addr;
+    blkif->status        = CONNECTED;
+    blkif_get(blkif);
+
+    request_irq(blkif->irq, blkif_be_int, 0, "blkif-backend", blkif);
+
+    connect->status = BLKIF_BE_STATUS_OKAY;
+}
+
+int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id)
+{
+    domid_t       domid  = disconnect->domid;
+    unsigned int  handle = disconnect->blkif_handle;
+    blkif_t      *blkif;
+
+    blkif = blkif_find_by_handle(domid, handle);
+    if ( unlikely(blkif == NULL) )
+    {
+        DPRINTK("blkif_disconnect attempted for non-existent blkif"
+                " (%llu,%u)\n", disconnect->domid, disconnect->blkif_handle); 
+        disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+        return 1; /* Caller will send response error message. */
+    }
+
+    if ( blkif->status == CONNECTED )
+    {
+        blkif->status = DISCONNECTING;
+        blkif->disconnect_rspid = rsp_id;
+        wmb(); /* Let other CPUs see the status change. */
+        free_irq(blkif->irq, NULL);
+        blkif_deschedule(blkif);
+        blkif_put(blkif);
+    }
+
+    return 0; /* Caller should not send response message. */
 }
 
 void __init blkif_interface_init(void)
@@ -159,5 +231,4 @@ void __init blkif_interface_init(void)
     blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 
                                      0, 0, NULL, NULL);
     memset(blkif_hash, 0, sizeof(blkif_hash));
-    spin_lock_init(&blkif_hash_lock);
 }
index 886279825087e66728eb52623cf4a7b6b90344bf..2582287360b8a3e3cd323930f4e06dea82a4aaa0 100644 (file)
@@ -33,8 +33,8 @@ static struct vm_struct *mmap_vma;
     (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
 #define MMAP_VADDR(_req,_seg)            \
     ((unsigned long)mmap_vma->addr +     \
-     ((_req) * MMAP_PAGES_PER_REQUEST) + \
-     ((_seg) * MMAP_PAGES_PER_SEGMENT))
+     ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
+     ((_seg) * MMAP_PAGES_PER_SEGMENT * PAGE_SIZE))
 
 /*
  * Each outstanding request that we've passed to the lower device layers has a 
@@ -96,7 +96,7 @@ static void add_to_blkdev_list_tail(blkif_t *blkif)
     unsigned long flags;
     if ( __on_blkdev_list(blkif) ) return;
     spin_lock_irqsave(&io_schedule_list_lock, flags);
-    if ( !__on_blkdev_list(blkif) )
+    if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
     {
         list_add_tail(&blkif->blkdev_list, &io_schedule_list);
         blkif_get(blkif);
@@ -168,7 +168,8 @@ static void end_block_io_op(struct buffer_head *bh, int uptodate)
     if ( atomic_dec_and_test(&pending_req->pendcnt) )
     {
         int pending_idx = pending_req - pending_reqs;
-        vmfree_area_pages(MMAP_VADDR(pending_idx, 0), MMAP_PAGES_PER_REQUEST);
+        vmfree_area_pages(MMAP_VADDR(pending_idx, 0), 
+                          MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
         make_response(pending_req->blkif, pending_req->id,
                       pending_req->operation, pending_req->status);
         blkif_put(pending_req->blkif);
@@ -260,10 +261,11 @@ static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
     {
         if ( (req->buffer_and_sects[i] & ~PAGE_MASK) != (PAGE_SIZE / 512) )
             goto bad_descriptor;
-        if ( direct_remap_area_pages(&init_mm, 
+        rc = direct_remap_area_pages(&init_mm, 
                                      MMAP_VADDR(pending_idx, i),
                                      req->buffer_and_sects[i] & PAGE_MASK, 
-                                     PAGE_SIZE, prot, blkif->domid) != 0 )
+                                     PAGE_SIZE, prot, blkif->domid);
+        if ( rc != 0 )
             goto bad_descriptor;
     }
 
@@ -271,12 +273,13 @@ static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
                    (req->nr_segments * PAGE_SIZE) / sizeof(vdisk_t));
 
     vmfree_area_pages(MMAP_VADDR(pending_idx, 0), 
-                      MMAP_PAGES_PER_REQUEST);
+                      MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
     make_response(blkif, req->id, req->operation, rc);
     return;
 
  bad_descriptor:
-    vmfree_area_pages(MMAP_VADDR(pending_idx, 0), MMAP_PAGES_PER_REQUEST);
+    vmfree_area_pages(MMAP_VADDR(pending_idx, 0), 
+                      MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
     make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
 }
 
@@ -284,7 +287,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
 {
     extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
     struct buffer_head *bh;
-    int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ;
+    int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
     unsigned short nr_sects;
     unsigned long buffer;
     int i, tot_sects, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
@@ -358,14 +361,15 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
         unsigned long sz = ((phys_seg[i].buffer & ~PAGE_MASK) + 
                             (phys_seg[i].nr_sects << 9) + 
                             (PAGE_SIZE - 1)) & PAGE_MASK;
-        if ( direct_remap_area_pages(&init_mm, 
-                                     MMAP_VADDR(pending_idx, i),
-                                     phys_seg[i].buffer & PAGE_MASK, 
-                                     sz, prot, blkif->domid) != 0 )
+        int rc = direct_remap_area_pages(&init_mm, 
+                                         MMAP_VADDR(pending_idx, i),
+                                         phys_seg[i].buffer & PAGE_MASK, 
+                                         sz, prot, blkif->domid);
+        if ( rc != 0 )
         {
             DPRINTK("invalid buffer\n");
             vmfree_area_pages(MMAP_VADDR(pending_idx, 0), 
-                              MMAP_PAGES_PER_REQUEST);
+                              MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
             goto bad_descriptor;
         }
     }
@@ -374,7 +378,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
     pending_req->blkif     = blkif;
     pending_req->id        = req->id;
     pending_req->operation = operation;
-    pending_req->status    = BLKIF_RSP_ERROR;
+    pending_req->status    = BLKIF_RSP_OKAY;
     atomic_set(&pending_req->pendcnt, nr_psegs);
 
     blkif_get(blkif);
@@ -382,29 +386,30 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
     /* Now we pass each segment down to the real blkdev layer. */
     for ( i = 0; i < nr_psegs; i++ )
     {
-        bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
+        bh = kmem_cache_alloc(buffer_head_cachep, GFP_ATOMIC);
         if ( unlikely(bh == NULL) )
             panic("bh is null\n");
         memset(bh, 0, sizeof (struct buffer_head));
-    
+
+        init_waitqueue_head(&bh->b_wait);
         bh->b_size          = phys_seg[i].nr_sects << 9;
         bh->b_dev           = phys_seg[i].dev;
+        bh->b_rdev          = phys_seg[i].dev;
         bh->b_rsector       = (unsigned long)phys_seg[i].sector_number;
-        bh->b_data          = (char *)MMAP_VADDR(pending_idx, i) + 
+        bh->b_data          = (char *)MMAP_VADDR(pending_idx, i) +
             (phys_seg[i].buffer & ~PAGE_MASK);
-        /* SMH: bh_phys() uses the below field as a 'cheap' virt_to_phys */
-        bh->b_page          = &mem_map[phys_seg[i].buffer>>PAGE_SHIFT]; 
         bh->b_end_io        = end_block_io_op;
         bh->b_private       = pending_req;
 
-        bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock);
+        bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | 
+            (1 << BH_Req) | (1 << BH_Launder);
         if ( operation == WRITE )
             bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
 
         atomic_set(&bh->b_count, 1);
 
         /* Dispatch a single request. We'll flush it to disc later. */
-        submit_bh(operation, bh);
+        generic_make_request(operation, bh);
     }
 
     pending_cons++;
@@ -444,16 +449,7 @@ static void make_response(blkif_t *blkif, unsigned long id,
 
 void blkif_deschedule(blkif_t *blkif)
 {
-    unsigned long flags;
-
-    spin_lock_irqsave(&io_schedule_list_lock, flags);
-    if ( __on_blkdev_list(blkif) )
-    {
-        list_del(&blkif->blkdev_list);
-        blkif->blkdev_list.next = (void *)0xdeadbeef;
-        blkif_put(blkif);
-    }
-    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
+    remove_from_blkdev_list(blkif);
 }
 
 static int __init init_module(void)
index bc5390eeb9959bc0f42348d0b63b4b5d193895a2..19b0b3015dff7a978ecc263f2e22fecabdec523f 100644 (file)
@@ -47,7 +47,7 @@ void vbd_create(blkif_be_vbd_create_t *create)
         }
     }
 
-    if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
+    if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_ATOMIC)) == NULL) )
     {
         DPRINTK("vbd_create: out of memory\n");
         create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
@@ -62,11 +62,12 @@ void vbd_create(blkif_be_vbd_create_t *create)
     rb_link_node(&vbd->rb, rb_parent, rb_p);
     rb_insert_color(&vbd->rb, &blkif->vbd_rb);
 
+    DPRINTK("Successful creation of vdev=%04x (dom=%llu)\n",
+            vdevice, create->domid);
     create->status = BLKIF_BE_STATUS_OKAY;
 
  out:
     spin_unlock(&blkif->vbd_lock);
-    blkif_put(blkif);
 }
 
 
@@ -110,7 +111,7 @@ void vbd_grow(blkif_be_vbd_grow_t *grow)
     } 
 
     if ( unlikely((x = kmalloc(sizeof(blkif_extent_le_t), 
-                               GFP_KERNEL)) == NULL) )
+                               GFP_ATOMIC)) == NULL) )
     {
         DPRINTK("vbd_grow: out of memory\n");
         grow->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
@@ -127,11 +128,12 @@ void vbd_grow(blkif_be_vbd_grow_t *grow)
 
     *px = x;
 
+    DPRINTK("Successful grow of vdev=%04x (dom=%llu)\n",
+            vdevice, grow->domid);
     grow->status = BLKIF_BE_STATUS_OKAY;
 
  out:
     spin_unlock(&blkif->vbd_lock);
-    blkif_put(blkif);
 }
 
 
@@ -190,7 +192,6 @@ void vbd_shrink(blkif_be_vbd_shrink_t *shrink)
 
  out:
     spin_unlock(&blkif->vbd_lock);
-    blkif_put(blkif);
 }
 
 
@@ -242,7 +243,6 @@ void vbd_destroy(blkif_be_vbd_destroy_t *destroy)
     
  out:
     spin_unlock(&blkif->vbd_lock);
-    blkif_put(blkif);
 }
 
 
index 2936d78ea2c5decddee3d7e97ec8fab737c89de0..29cc01d08749397ee2ee313fe2f954cbac08100d 100644 (file)
@@ -18,9 +18,9 @@
 
 typedef unsigned char byte; /* from linux/ide.h */
 
-#define BLKIF_STATE_CLOSED    0
-#define BLKIF_STATE_DOWN      1
-#define BLKIF_STATE_UP        2
+#define BLKIF_STATE_CLOSED       0
+#define BLKIF_STATE_DISCONNECTED 1
+#define BLKIF_STATE_CONNECTED    2
 static unsigned int blkif_state = BLKIF_STATE_CLOSED;
 static unsigned int blkif_evtchn, blkif_irq;
 
@@ -35,7 +35,7 @@ static BLK_RING_IDX req_prod;  /* Private request producer.         */
 
 /* We plug the I/O ring if the driver is suspended or if the ring is full. */
 #define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \
-                      (blkif_state != BLKIF_STATE_UP))
+                      (blkif_state != BLKIF_STATE_CONNECTED))
 
 
 /*
@@ -123,8 +123,10 @@ int blkif_release(struct inode *inode, struct file *filep)
      */
     if ( --disk->usage == 0 )
     {
+#if 0
         update_tq.routine = update_vbds_task;
         schedule_task(&update_tq);
+#endif
     }
 
     return 0;
@@ -306,7 +308,7 @@ static int blkif_queue_request(unsigned long   id,
     if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
         BUG();
 
-    if ( unlikely(blkif_state != BLKIF_STATE_UP) )
+    if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
         return 1;
 
     switch ( operation )
@@ -498,7 +500,7 @@ static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
             {
                 next_bh = bh->b_reqnext;
                 bh->b_reqnext = NULL;
-                bh->b_end_io(bh, !bret->status);
+                bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY);
             }
             break;
         case BLKIF_OP_PROBE:
@@ -556,18 +558,18 @@ void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
 
 static void blkif_bringup_phase1(void *unused)
 {
-    ctrl_msg_t              cmsg;
-    blkif_fe_interface_up_t up;
+    ctrl_msg_t                   cmsg;
+    blkif_fe_interface_connect_t up;
 
-    /* Move from CLOSED to DOWN state. */
+    /* Move from CLOSED to DISCONNECTED state. */
     blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
     blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
-    blkif_state  = BLKIF_STATE_DOWN;
+    blkif_state  = BLKIF_STATE_DISCONNECTED;
 
-    /* Construct an interface-UP message for the domain controller. */
+    /* Construct an interface-CONNECT message for the domain controller. */
     cmsg.type      = CMSG_BLKIF_FE;
-    cmsg.subtype   = CMSG_BLKIF_FE_INTERFACE_UP;
-    cmsg.length    = sizeof(blkif_fe_interface_up_t);
+    cmsg.subtype   = CMSG_BLKIF_FE_INTERFACE_CONNECT;
+    cmsg.length    = sizeof(blkif_fe_interface_connect_t);
     up.handle      = 0;
     up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT;
     memcpy(cmsg.msg, &up, sizeof(up));
@@ -578,14 +580,14 @@ static void blkif_bringup_phase1(void *unused)
 
 static void blkif_bringup_phase2(void *unused)
 {
-    /* Move from DOWN to UP state. */
     blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
     (void)request_irq(blkif_irq, blkif_int, 0, "blkif", NULL);
-    blkif_state = BLKIF_STATE_UP;
 
     /* Probe for discs that are attached to the interface. */
     xlvbd_init();
 
+    blkif_state = BLKIF_STATE_CONNECTED;
+
     /* Kick pending requests. */
     spin_lock_irq(&io_request_lock);
     kick_pending_request_queues();
@@ -608,22 +610,22 @@ static void blkif_status_change(blkif_fe_interface_status_changed_t *status)
                blkif_state);
         break;
 
-    case BLKIF_INTERFACE_STATUS_DOWN:
+    case BLKIF_INTERFACE_STATUS_DISCONNECTED:
         if ( blkif_state != BLKIF_STATE_CLOSED )
         {
-            printk(KERN_WARNING "Unexpected blkif-DOWN message in state %d\n",
-                   blkif_state);
+            printk(KERN_WARNING "Unexpected blkif-DISCONNECTED message"
+                   " in state %d\n", blkif_state);
             break;
         }
         blkif_statechange_tq.routine = blkif_bringup_phase1;
         schedule_task(&blkif_statechange_tq);
         break;
 
-    case BLKIF_INTERFACE_STATUS_UP:
+    case BLKIF_INTERFACE_STATUS_CONNECTED:
         if ( blkif_state == BLKIF_STATE_CLOSED )
         {
-            printk(KERN_WARNING "Unexpected blkif-UP message in state %d\n",
-                   blkif_state);
+            printk(KERN_WARNING "Unexpected blkif-CONNECTED message"
+                   " in state %d\n", blkif_state);
             break;
         }
         blkif_evtchn = status->evtchn;
@@ -683,6 +685,17 @@ int __init xlblk_init(void)
     memcpy(cmsg.msg, &st, sizeof(st));
     ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
 
+    /*
+     * We should read 'nr_interfaces' from response message and wait
+     * for notifications before proceeding. For now we assume that we
+     * will be notified of exactly one interface.
+     */
+    while ( blkif_state != BLKIF_STATE_CONNECTED )
+    {
+        set_current_state(TASK_INTERRUPTIBLE);
+        schedule_timeout(1);
+    }
+
     return 0;
 }
 
index 944bf7eace85b8f300a3d741789ee9b4c5c52f1a..b26907192af3145ef14592ab6e76a93c9b95b346 100644 (file)
@@ -512,7 +512,7 @@ void xlvbd_update_vbds(void)
  * linux -- this is just for convenience as it means e.g. that the same 
  * /etc/fstab can be used when booting with or without Xen.
  */
-int __init xlvbd_init(void)
+int xlvbd_init(void)
 {
     int i;
     
@@ -559,8 +559,3 @@ int __init xlvbd_init(void)
 
     return 0;
 }
-
-
-#ifdef MODULE
-module_init(xlvbd_init);
-#endif
index 3d78e209502c113d17da2f9b360ac1d5f8931815..f5243bb6a7761be4ae1b454b20ddb201980ca7ed 100644 (file)
@@ -159,13 +159,47 @@ extern void iounmap(void *addr);
 extern void *bt_ioremap(unsigned long offset, unsigned long size);
 extern void bt_iounmap(void *addr, unsigned long size);
 
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+
+#ifdef CONFIG_HIGHMEM
+#error "Highmem is not yet compatible with physical device access"
+#endif
+
 /*
- * IO bus memory addresses are also 1:1 with the physical address
+ * The bus translation macros need special care if we are executing device
+ * accesses to/from other domains' memory. In these cases the virtual address
+ * is actually a temporary mapping in the 'vmalloc' space. The physical
+ * address will therefore be >max_low_pfn, and will not have a valid entry
+ * in the phys_to_mach mapping table.
  */
+static inline unsigned long phys_to_bus(unsigned long phys)
+{
+    extern unsigned long max_pfn;
+    pgd_t *pgd; pmd_t *pmd; pte_t *pte;
+    void *addr;
+    unsigned long bus;
+    if ( (phys >> PAGE_SHIFT) < max_pfn )
+        return phys_to_machine(phys);
+    addr = phys_to_virt(phys);
+    pgd = pgd_offset_k(   (unsigned long)addr);
+    pmd = pmd_offset(pgd, (unsigned long)addr);
+    pte = pte_offset(pmd, (unsigned long)addr);
+    bus = (pte->pte_low & PAGE_MASK) | (phys & ~PAGE_MASK);
+    return bus;
+}
+
+#define virt_to_bus(_x) phys_to_bus(virt_to_phys(_x))
+#define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x))
+#define page_to_bus(_x) phys_to_bus(page_to_phys(_x))
+
+#else
+
 #define virt_to_bus(_x) phys_to_machine(virt_to_phys(_x))
 #define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x))
 #define page_to_bus(_x) phys_to_machine(page_to_phys(_x))
 
+#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
+
 /*
  * readX/writeX() are used to access memory mapped devices. On some
  * architectures the memory mapped IO stuff needs to be accessed