bitkeeper revision 1.891.1.5 (409ba2e8A6F60eP06BqyZUGapsn8XA)

author kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>

Fri, 7 May 2004 14:53:28 +0000 (14:53 +0000)

committer kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>

Fri, 7 May 2004 14:53:28 +0000 (14:53 +0000)
author kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Fri, 7 May 2004 14:53:28 +0000 (14:53 +0000)
committer kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Fri, 7 May 2004 14:53:28 +0000 (14:53 +0000)
diff --git a/.rootkeys b/.rootkeys

index 5a7a5d280390e5728d36c2b9c63406b828675e3c..4c888bbc8e59ac6a5285c31a9378fcb9affea04e 100644 (file)
--- a/.rootkeys
+++ b/.rootkeys
@@ -107,6 +107,7 @@
  4048c0ddsF0WrU7HUzTvg1MJoCIfWg tools/xend/lib/domain_controller.h
  4054a301VEag2GwrBrFBna5U1BGlLA tools/xend/lib/main.py
  4055ad9ah9IuC3sJT2c_gYIFY5Tw_g tools/xend/lib/manager.py
+409ba2e729HhE7fEra4B5EqX-F8Xzw tools/xend/lib/netif.py
  40431ac8wrUEj-XM7B8smFtx_HA7lQ tools/xend/lib/utils.c
  4054a2fdkdATEnRw-U7AUlgu-6JiUA tools/xend/setup.py
  4056cd26Qyp09iNoOjrvzg8KYzSqOw tools/xend/xend
@@ -735,6 +736,7 @@
  3f8707e7ZmZ6TxyX0ZUEfvhA2Pb_xQ xenolinux-2.4.26-sparse/include/asm-xen/msr.h
  3e7270deQqtGPSnFxcW4AvJZuTUWfg xenolinux-2.4.26-sparse/include/asm-xen/multicall.h
  3e5a4e67mnQfh-R8KcQCaVo2Oho6yg xenolinux-2.4.26-sparse/include/asm-xen/page.h
+409ba2e7ZfV5hqTvIzxLtpClnxtIzg xenolinux-2.4.26-sparse/include/asm-xen/pci.h
  3e5a4e67uTYU5oEnIDjxuaez8njjqg xenolinux-2.4.26-sparse/include/asm-xen/pgalloc.h
  3e5a4e67X7JyupgdYkgDX19Huj2sAw xenolinux-2.4.26-sparse/include/asm-xen/pgtable-2level.h
  3e5a4e67gr4NLGtQ5CvSLimMYZlkOA xenolinux-2.4.26-sparse/include/asm-xen/pgtable.h
@@ -762,6 +764,7 @@
  3e5a4e68GxCIaFH4sy01v1wjapetaA xenolinux-2.4.26-sparse/mm/memory.c
  3f108af5VxPkLv13tXpXgoRKALQtXQ xenolinux-2.4.26-sparse/mm/mprotect.c
  3e5a4e681xMPdF9xCMwpyfuYMySU5g xenolinux-2.4.26-sparse/mm/mremap.c
+409ba2e7akOFqQUg6Qyg2s28xcXiMg xenolinux-2.4.26-sparse/mm/page_alloc.c
  3e5a4e683HKVU-sxtagrDasRB8eBVw xenolinux-2.4.26-sparse/mm/swapfile.c
  3f108af81Thhb242EmKjGCYkjx-GJA xenolinux-2.4.26-sparse/mm/vmalloc.c
  407eb087XaNDLn8thVDLH-rI0hG-Xw xenolinux-sparse
diff --git a/tools/examples/xc_dom_create.py b/tools/examples/xc_dom_create.py

index 22479a9d14f000a43676e2f657b2aa2f9e00746a..19bb2ac9df5d608068f71cf2cc79dde7cb47a778 100755 (executable)
--- a/tools/examples/xc_dom_create.py
+++ b/tools/examples/xc_dom_create.py
@@ -333,7 +333,18 @@ def make_domain():
                  xc.domain_destroy ( dom=id )
                  sys.exit()
  
-    if not new_io_world:
+    if new_io_world:
+        cmsg = 'new_network_interface(dom='+str(id)+')'
+        xend_response = xenctl.utils.xend_control_message(cmsg)
+        if not xend_response['success']:
+            print "Error creating network interface"
+            print "Error type: " + xend_response['error_type']
+            if xend_response['error_type'] == 'exception':
+                print "Exception type: " + xend_response['exception_type']
+                print "Exception val:  " + xend_response['exception_value']
+            xc.domain_destroy ( dom=id )
+            sys.exit()
+    else:
          # setup virtual firewall rules for all aliases
          for ip in vfr_ipaddr:
              xenctl.utils.setup_vfr_rules_for_vif( id, 0, ip )
diff --git a/tools/xenctl/lib/utils.py b/tools/xenctl/lib/utils.py

index 3f0914f73fd0ab6c78a8aa6633f1c060b6962f2d..11aadb4f088568dd8d1deb491e92853fd522c209 100644 (file)
--- a/tools/xenctl/lib/utils.py
+++ b/tools/xenctl/lib/utils.py
@@ -54,15 +54,13 @@ def get_current_ipmask(dev='eth0'):
              return m.group(1)
      return None
  
-def get_current_ipgw(dev='eth0'):
-    """Return a string containing the IP gateway for the given
-    network interface (default 'eth0').
-    """
+def get_current_ipgw():
+    """Return a string containing the default IP gateway."""
      fd = os.popen( '/sbin/route -n' )
      lines = fd.readlines()
      for line in lines:
-        m = re.search( '^\S+\s+([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)' +
-                       '\s+\S+\s+\S*G.*' + dev + '.*', line )
+        m = re.search( '^0.0.0.0+\s+([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)' +
+                       '\s+0.0.0.0+\s+\S*G.*', line )
          if m:
              return m.group(1)
      return None
diff --git a/tools/xend/lib/domain_controller.h b/tools/xend/lib/domain_controller.h

index d5c397fe96ef18ed040e1f029dbab34bb51a1cd6..566967dc38f1eb0f775dd175de55d48718babdd6 100644 (file)
--- a/tools/xend/lib/domain_controller.h
+++ b/tools/xend/lib/domain_controller.h
@@ -468,7 +468,6 @@ typedef struct {
      unsigned int   evtchn;            /* Event channel for notifications.    */
      unsigned long  tx_shmem_frame;    /* Page cont. tx shared comms window.  */
      unsigned long  rx_shmem_frame;    /* Page cont. rx shared comms window.  */
-    unsigned long  shmem_frame;       
      /* OUT */
      unsigned int   status;
  } netif_be_connect_t; 
diff --git a/tools/xend/lib/main.py b/tools/xend/lib/main.py

index 7b5adbab8312ea25cdddfe4d4d85a292e06e39b7..0056783d7fda95eda650f063e3ec7a1055003c01 100755 (executable)
--- a/tools/xend/lib/main.py
+++ b/tools/xend/lib/main.py
@@ -5,7 +5,7 @@
  ###########################################################
  
  import errno, re, os, pwd, select, signal, socket, struct, sys, time
-import xend.blkif, xend.console, xend.manager, xend.utils, Xc
+import xend.blkif, xend.netif, xend.console, xend.manager, xend.utils, Xc
  
  
  # The following parameters could be placed in a configuration file.
@@ -19,6 +19,8 @@ UNIX_SOCK    = 'management_sock' # relative to CONTROL_DIR
  CMSG_CONSOLE  = 0
  CMSG_BLKIF_BE = 1
  CMSG_BLKIF_FE = 2
+CMSG_NETIF_BE = 3
+CMSG_NETIF_FE = 4
  
  
  def port_from_dom(dom):
@@ -162,6 +164,10 @@ def daemon_loop():
              if xend.blkif.interface.list.has_key(idx):
                  blk_if = xend.blkif.interface.list[idx]
  
+            net_if = False
+            if xend.netif.interface.list.has_key(idx):
+                net_if = xend.netif.interface.list[idx]
+
              # If we pick up a disconnect notification then we do any necessary
              # cleanup.
              if type == notifier.EXCEPTION:
@@ -175,6 +181,9 @@ def daemon_loop():
                      if blk_if:
                          blk_if.destroy()
                          del blk_if
+                    if net_if:
+                        net_if.destroy()
+                        del net_if
                      continue
  
              # Process incoming requests.
@@ -188,6 +197,10 @@ def daemon_loop():
                      blk_if.ctrlif_rx_req(port, msg)
                  elif type == CMSG_BLKIF_BE and port == dom0_port:
                      xend.blkif.backend_rx_req(port, msg)
+                elif type == CMSG_NETIF_FE and net_if:
+                    net_if.ctrlif_rx_req(port, msg)
+                elif type == CMSG_NETIF_BE and port == dom0_port:
+                    xend.netif.backend_rx_req(port, msg)
                  else:
                      port.write_response(msg)
  
@@ -198,6 +211,8 @@ def daemon_loop():
                  type = (msg.get_header())['type']
                  if type == CMSG_BLKIF_BE and port == dom0_port:
                      xend.blkif.backend_rx_rsp(port, msg)
+                elif type == CMSG_NETIF_BE and port == dom0_port:
+                    xend.netif.backend_rx_rsp(port, msg)
  
              # Send console data.
              if con_if and con_if.ctrlif_transmit_work(port):
@@ -207,10 +222,18 @@ def daemon_loop():
              if blk_if and blk_if.ctrlif_transmit_work(port):
                  work_done = True
  
+            # Send netif messages.
+            if net_if and net_if.ctrlif_transmit_work(port):
+                work_done = True
+
              # Back-end block-device work.
              if port == dom0_port and xend.blkif.backend_do_work(port):
                  work_done = True
                  
+            # Back-end network-device work.
+            if port == dom0_port and xend.netif.backend_do_work(port):
+                work_done = True
+                
              # Finally, notify the remote end of any work that we did.
              if work_done:
                  port.notify()
diff --git a/tools/xend/lib/manager.py b/tools/xend/lib/manager.py

index ea7398cd4ce3a0a13a1a79d4bed95afbb7eef221..2f15683d6654d90e6b57822b1538ff01d274057c 100644 (file)
--- a/tools/xend/lib/manager.py
+++ b/tools/xend/lib/manager.py
@@ -4,7 +4,7 @@
  ## Copyright (c) 2004, K A Fraser (University of Cambridge)
  #############################################################
  
-import xend.blkif, xend.console, xend.main, xend.utils
+import xend.blkif, xend.netif, xend.console, xend.main, xend.utils
  
  
  ##
@@ -113,3 +113,40 @@ def new_block_device(dom, handle, vdev, pdev, start_sect, nr_sect, readonly):
  
      # Response is deferred until back-end driver sends acknowledgement.
      return None
+
+
+##
+## new_network_interface:
+##  Create a new network interface for the specified domain @dom.
+##
+def new_network_interface(dom, handle=-1):
+    # By default we create an interface with handle zero.
+    if handle < 0:
+        handle = 0
+
+    # We only support one interface per domain, which must have handle zero.
+    if handle != 0:
+        response = { 'success': False }
+        response['error_type'] = 'Bad handle %d (only handle 0 ' + \
+                                 'is supported)' % handle
+        return response
+
+    # Find local event-channel port associated with the specified domain.
+    port = xend.main.port_from_dom(dom)
+    if not port:
+        response = { 'success': False }
+        response['error_type'] = 'Unknown domain %d' % dom
+        return response
+
+    # The interface must not already exist.
+    if xend.netif.interface.list.has_key(port.local_port):
+        response = { 'success': False }
+        response['error_type'] = 'Interface (dom=%d,handle=%d) already ' + \
+                                 'exists' % (dom, handle)
+        return response
+
+    # Create the new interface. Initially no virtual devices are attached.
+    xend.netif.interface(dom, port.local_port)
+
+    # Response is deferred until back-end driver sends acknowledgement.
+    return None
diff --git a/tools/xend/lib/netif.py b/tools/xend/lib/netif.py

new file mode 100644 (file)

index 0000000..11756c5
--- /dev/null
+++ b/tools/xend/lib/netif.py
@@ -0,0 +1,144 @@
+
+###################################################################
+## xend/netif.py -- Network-interface management functions for Xend
+## Copyright (c) 2004, K A Fraser (University of Cambridge)
+###################################################################
+
+import errno, random, re, os, select, signal, socket, struct, sys
+import xend.main, xend.console, xend.manager, xend.utils, Xc
+
+CMSG_NETIF_BE = 3
+CMSG_NETIF_FE = 4
+CMSG_NETIF_FE_INTERFACE_STATUS_CHANGED =  0
+CMSG_NETIF_FE_DRIVER_STATUS_CHANGED    = 32
+CMSG_NETIF_FE_INTERFACE_CONNECT        = 33
+CMSG_NETIF_FE_INTERFACE_DISCONNECT     = 34
+CMSG_NETIF_BE_CREATE      = 0
+CMSG_NETIF_BE_DESTROY     = 1
+CMSG_NETIF_BE_CONNECT     = 2
+CMSG_NETIF_BE_DISCONNECT  = 3
+
+pendmsg = None
+pendaddr = None
+
+def backend_tx_req(msg):
+    port = xend.main.dom0_port
+    if port.space_to_write_request():
+        port.write_request(msg)
+        port.notify()
+    else:
+        xend.netif.pendmsg = msg
+
+def backend_rx_req(port, msg):
+    port.write_response(msg)
+
+def backend_rx_rsp(port, msg):
+    subtype = (msg.get_header())['subtype']
+    print "Received netif-be response, subtype %d" % subtype
+    if subtype == CMSG_NETIF_BE_CREATE:
+        rsp = { 'success': True }
+        xend.main.send_management_response(rsp, xend.netif.pendaddr)
+    elif subtype == CMSG_NETIF_BE_CONNECT:
+        (dom,hnd,evtchn,tx_frame,rx_frame,st) = \
+           struct.unpack("QIILLI", msg.get_payload())
+        netif = interface.list[xend.main.port_from_dom(dom).local_port]
+        msg = xend.utils.message(CMSG_NETIF_FE, \
+                                 CMSG_NETIF_FE_INTERFACE_STATUS_CHANGED, 0)
+        msg.append_payload(struct.pack("IIIBBBBBBBB",0,2, \
+                                       netif.evtchn['port2'], \
+                                       netif.mac[0],netif.mac[1], \
+                                       netif.mac[2],netif.mac[3], \
+                                       netif.mac[4],netif.mac[5], \
+                                       0,0))
+        netif.ctrlif_tx_req(xend.main.port_list[netif.key], msg)
+
+def backend_do_work(port):
+    global pendmsg
+    if pendmsg and port.space_to_write_request():
+        port.write_request(pendmsg)
+        pendmsg = None
+        return True
+    return False
+
+
+class interface:
+
+    # Dictionary of all network-device interfaces.
+    list = {}
+
+
+    # NB. 'key' is an opaque value that has no meaning in this class.
+    def __init__(self, dom, key):
+        self.dom     = dom
+        self.key     = key
+        self.pendmsg = None
+
+        # VIFs get a random MAC address with a "special" vendor id.
+        # 
+        # NB. The vendor is currently an "obsolete" one that used to belong
+        # to DEC (AA-00-00). Using it is probably a bit rude :-)
+        # 
+        # NB2. The first bit of the first random octet is set to zero for
+        # all dynamic MAC addresses. This may allow us to manually specify
+        # MAC addresses for some VIFs with no fear of clashes.
+        self.mac = [ 0xaa, 0x00, 0x00 ]
+        self.mac.append(int(random.random()*128))
+        self.mac.append(int(random.random()*256))
+        self.mac.append(int(random.random()*256))
+                
+        interface.list[key] = self
+        msg = xend.utils.message(CMSG_NETIF_BE, CMSG_NETIF_BE_CREATE, 0)
+        msg.append_payload(struct.pack("QIBBBBBBBBI",dom,0, \
+                                       self.mac[0],self.mac[1], \
+                                       self.mac[2],self.mac[3], \
+                                       self.mac[4],self.mac[5], \
+                                       0,0,0))
+        xend.netif.pendaddr = xend.main.mgmt_req_addr
+        backend_tx_req(msg)
+
+
+    # Completely destroy this interface.
+    def destroy(self):
+        del interface.list[self.key]
+        msg = xend.utils.message(CMSG_NETIF_BE, CMSG_NETIF_BE_DESTROY, 0)
+        msg.append_payload(struct.pack("QII",self.dom,0,0))
+        backend_tx_req(msg)        
+
+
+    # The parameter @port is the control-interface event channel. This method
+    # returns True if messages were written to the control interface.
+    def ctrlif_transmit_work(self, port):
+        if self.pendmsg and port.space_to_write_request():
+            port.write_request(self.pendmsg)
+            self.pendmsg = None
+            return True
+        return False
+
+    def ctrlif_tx_req(self, port, msg):
+        if port.space_to_write_request():
+            port.write_request(msg)
+            port.notify()
+        else:
+            self.pendmsg = msg
+
+    def ctrlif_rx_req(self, port, msg):
+        port.write_response(msg)
+        subtype = (msg.get_header())['subtype']
+        if subtype == CMSG_NETIF_FE_DRIVER_STATUS_CHANGED:
+            msg = xend.utils.message(CMSG_NETIF_FE, \
+                                     CMSG_NETIF_FE_INTERFACE_STATUS_CHANGED, 0)
+            msg.append_payload(struct.pack("IIIBBBBBBBB",0,1,0,self.mac[0], \
+                                           self.mac[1],self.mac[2], \
+                                           self.mac[3],self.mac[4], \
+                                           self.mac[5],0,0))
+            self.ctrlif_tx_req(port, msg)
+        elif subtype == CMSG_NETIF_FE_INTERFACE_CONNECT:
+            (hnd,tx_frame,rx_frame) = struct.unpack("ILL", msg.get_payload())
+            xc = Xc.new()
+            self.evtchn = xc.evtchn_bind_interdomain(dom1=0,dom2=self.dom)
+            msg = xend.utils.message(CMSG_NETIF_BE, \
+                                     CMSG_NETIF_BE_CONNECT, 0)
+            msg.append_payload(struct.pack("QIILLI",self.dom,0, \
+                                           self.evtchn['port1'],tx_frame, \
+                                           rx_frame,0))
+            backend_tx_req(msg)
diff --git a/xen/common/dom_mem_ops.c b/xen/common/dom_mem_ops.c

index 7d596026f9385d39f48d8a4eeb6130fb66bfe8b6..79d0bb1df1563de7082b80b47f83334ff50fadaa 100644 (file)
--- a/xen/common/dom_mem_ops.c
+++ b/xen/common/dom_mem_ops.c
@@ -27,13 +27,21 @@ static long alloc_dom_mem(struct task_struct *p, reservation_increase_t op)
      {
          /* Leave some slack pages; e.g., for the network. */
          if ( unlikely(free_pfns < (SLACK_DOMAIN_MEM_KILOBYTES >> 
-                                   (PAGE_SHIFT-10))) ) 
+                                   (PAGE_SHIFT-10))) )
+        {
+            DPRINTK("Not enough slack: %u %u\n",
+                    free_pfns,
+                    SLACK_DOMAIN_MEM_KILOBYTES >> (PAGE_SHIFT-10));
              break;
+        }
  
          /* NB. 'alloc_domain_page' does limit checking on pages per domain. */
          if ( unlikely((page = alloc_domain_page(p)) == NULL) )
+        {
+            DPRINTK("Could not allocate a frame\n");
              break;
-        
+        }
+
          /* Inform the domain of the new page's machine address. */ 
          mpfn = (unsigned long)(page - frame_table);
          copy_to_user(op.pages, &mpfn, sizeof(mpfn));
diff --git a/xen/common/domain.c b/xen/common/domain.c

index a9c40ae98f22451621ac31c55722cd79bbf7941f..1b8759e912e15b4afc5b1a9b3b5ab8ecb7db935c 100644 (file)
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -334,6 +334,8 @@ struct pfn_info *alloc_domain_page(struct task_struct *p)
          spin_lock(&p->page_list_lock);
          if ( unlikely(p->tot_pages >= p->max_pages) )
          {
+            DPRINTK("Over-allocation for domain %llu: %u >= %u\n",
+                    p->domain, p->tot_pages, p->max_pages);
              spin_unlock(&p->page_list_lock);
              goto free_and_exit;
          }
@@ -884,7 +886,7 @@ int construct_dom0(struct task_struct *p,
          page->type_and_flags  = 0;
          page->count_and_flags = PGC_allocated | 1;
          list_add_tail(&page->list, &p->page_list);
-        p->tot_pages++;
+        p->tot_pages++; p->max_pages++;
      }
  
      mpt_alloc = (vpt_start - v_start) + alloc_start;
diff --git a/xen/common/kernel.c b/xen/common/kernel.c

index 7f814391cf5446490794a094dd1d9025097c7711..0d5fa023a14d5910ab8d2b6001f1ccd99ac4db98 100644 (file)
--- a/xen/common/kernel.c
+++ b/xen/common/kernel.c
@@ -105,7 +105,6 @@ static struct {
  void cmain(unsigned long magic, multiboot_info_t *mbi)
  {
      struct task_struct *new_dom;
-    dom0_createdomain_t dom0_params;
      unsigned long max_page;
      unsigned char *cmdline;
      module_t *mod = (module_t *)__va(mbi->mods_addr);
@@ -263,7 +262,6 @@ void cmain(unsigned long magic, multiboot_info_t *mbi)
      task_hash[TASK_HASH(IDLE_DOMAIN_ID)] = &idle0_task;
  
      /* Create initial domain 0. */
-    dom0_params.memory_kb = opt_dom0_mem;
      new_dom = do_createdomain(0, 0);
      if ( new_dom == NULL )
          panic("Error creating domain 0\n");
diff --git a/xen/common/memory.c b/xen/common/memory.c

index e4d0590a579e23573f8d68c84953f653b8cb1022..5acfae8482a139e2520ad5240104521cc1b39b4b 100644 (file)
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -940,17 +940,25 @@ static int do_extended_command(unsigned long ptr, unsigned long val)
          }
          break;
  
+        /* XXX This function is racey! */
      case MMUEXT_REASSIGN_PAGE:
-        if ( !IS_PRIV(current) )
+        if ( unlikely(!IS_PRIV(current)) )
          {
              MEM_LOG("Dom %llu has no privilege to reassign page ownership",
                      current->domain);
              okay = 0;
          }
-        else if ( percpu_info[cpu].gps != NULL )
+        else if ( likely(percpu_info[cpu].gps != NULL) )
          {
+            current->tot_pages--;
+            percpu_info[cpu].gps->tot_pages++;
              page->u.domain = percpu_info[cpu].gps;
          }
+        else
+        {
+            MEM_LOG("No GPS to reassign pfn %08lx to\n", pfn);
+            okay = 0;
+        }
          break;
  
      case MMUEXT_RESET_SUBJECTDOM:
diff --git a/xenolinux-2.4.26-sparse/arch/xen/config.in b/xenolinux-2.4.26-sparse/arch/xen/config.in

index 16fa5e66d44feb8102b1455125bc183db8a130c8..7f961d852108787093f641e57b3e825716ee8c65 100644 (file)
--- a/xenolinux-2.4.26-sparse/arch/xen/config.in
+++ b/xenolinux-2.4.26-sparse/arch/xen/config.in
@@ -101,6 +101,8 @@ if [ "$CONFIG_HIGHMEM" = "y" ]; then
     bool 'HIGHMEM I/O support' CONFIG_HIGHIO
  fi
  
+define_int CONFIG_FORCE_MAX_ZONEORDER 12
+
  #bool 'Symmetric multi-processing support' CONFIG_SMP
  #if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
  #   define_bool CONFIG_HAVE_DEC_LOCK y
diff --git a/xenolinux-2.4.26-sparse/arch/xen/defconfig b/xenolinux-2.4.26-sparse/arch/xen/defconfig

index eaa9171b1f4faf4ea59aee60ce989a740c15458a..013e732c3fa07e3742c2793edcefd4065781e4ff 100644 (file)
--- a/xenolinux-2.4.26-sparse/arch/xen/defconfig
+++ b/xenolinux-2.4.26-sparse/arch/xen/defconfig
@@ -50,6 +50,7 @@ CONFIG_X86_TSC=y
  CONFIG_X86_L1_CACHE_SHIFT=5
  CONFIG_NOHIGHMEM=y
  # CONFIG_HIGHMEM4G is not set
+CONFIG_FORCE_MAX_ZONEORDER=12
  
  #
  # General setup
@@ -156,6 +157,7 @@ CONFIG_IP_NF_TARGET_ULOG=y
  # Network testing
  #
  # CONFIG_NET_PKTGEN is not set
+CONFIG_NETDEVICES=y
  
  #
  # Block devices
diff --git a/xenolinux-2.4.26-sparse/arch/xen/defconfig-physdev b/xenolinux-2.4.26-sparse/arch/xen/defconfig-physdev

index 41b05aaaa7741d23ef6744ab64b8c8744d3155bc..3be5b50bfa7bb1f7a565faa19be487716fa8e1df 100644 (file)
--- a/xenolinux-2.4.26-sparse/arch/xen/defconfig-physdev
+++ b/xenolinux-2.4.26-sparse/arch/xen/defconfig-physdev
@@ -51,6 +51,7 @@ CONFIG_X86_TSC=y
  CONFIG_X86_L1_CACHE_SHIFT=5
  CONFIG_NOHIGHMEM=y
  # CONFIG_HIGHMEM4G is not set
+CONFIG_FORCE_MAX_ZONEORDER=12
  
  #
  # General setup
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h

index e6004b4a8e9806ed8fd9bd6bda7af515e0c8c7d0..e80435fbbb8f4e983845f7dbccfc4ed1ba3ff28a 100644 (file)
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h
@@ -10,6 +10,7 @@
  #include <linux/rbtree.h>
  #include <linux/interrupt.h>
  #include <linux/slab.h>
+#include <linux/blkdev.h>
  #include <asm/ctrl_if.h>
  #include <asm/io.h>
  #include "../blkif.h"
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c

index 0746ecfab0951cb394af6eecedd01d802fd9eb4e..0b2622465170313abbbe7de56581fb49e01e225a 100644 (file)
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c
@@ -74,7 +74,8 @@ void blkif_ctrlif_init(void)
      ctrl_msg_t                       cmsg;
      blkif_be_driver_status_changed_t st;
  
-    (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx);
+    (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, 
+                                    CALLBACK_IN_BLOCKING_CONTEXT);
  
      /* Send a driver-UP notification to the domain controller. */
      cmsg.type      = CMSG_BLKIF_BE;
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c

index 9acbac35ab4f772853cad1a6ea5d0107b8e9b218..14a6ab324dced0921196fa2f6039ee9d3debde9e 100644 (file)
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c
@@ -70,7 +70,7 @@ void blkif_create(blkif_be_create_t *create)
      unsigned int  handle = create->blkif_handle;
      blkif_t     **pblkif, *blkif;
  
-    if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_ATOMIC)) == NULL )
+    if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL )
      {
          DPRINTK("Could not create blkif: out of memory\n");
          create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c

index 4b11ad9a8eee1f59d6b39f9a19788a36145defbf..eb3e32c75f58fe7a8ea248ce460db8fa00ab339e 100644 (file)
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c
@@ -24,17 +24,15 @@
  #define MAX_PENDING_REQS 64
  #define BATCH_PER_DOMAIN 16
  
-static struct vm_struct *mmap_vma;
-#define MMAP_PAGES_PER_SEGMENT \
-    ((BLKIF_MAX_SEGMENTS_PER_REQUEST >> (PAGE_SHIFT-9)) + 1)
+static unsigned long mmap_vstart;
  #define MMAP_PAGES_PER_REQUEST \
-    (2 * BLKIF_MAX_SEGMENTS_PER_REQUEST * MMAP_PAGES_PER_SEGMENT)
+    (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
  #define MMAP_PAGES             \
      (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
-#define MMAP_VADDR(_req,_seg)            \
-    ((unsigned long)mmap_vma->addr +     \
+#define MMAP_VADDR(_req,_seg)                        \
+    (mmap_vstart +                                   \
       ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
-     ((_seg) * MMAP_PAGES_PER_SEGMENT * PAGE_SIZE))
+     ((_seg) * PAGE_SIZE))
  
  /*
   * Each outstanding request that we've passed to the lower device layers has a 
@@ -259,11 +257,13 @@ static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
      prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW);
      for ( i = 0; i < req->nr_segments; i++ )
      {
-        if ( (req->buffer_and_sects[i] & ~PAGE_MASK) != (PAGE_SIZE / 512) )
+        /* Make sure the buffer is page-sized. */
+        if ( (blkif_first_sect(req->frame_and_sects[i]) != 0) ||
+             (blkif_last_sect(req->frame_and_sects[i]) != 7) )
              goto bad_descriptor;
          rc = direct_remap_area_pages(&init_mm, 
                                       MMAP_VADDR(pending_idx, i),
-                                     req->buffer_and_sects[i] & PAGE_MASK, 
+                                     req->frame_and_sects[i] & PAGE_MASK, 
                                       PAGE_SIZE, prot, blkif->domid);
          if ( rc != 0 )
              goto bad_descriptor;
@@ -288,15 +288,15 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
      extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
      struct buffer_head *bh;
      int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
-    unsigned short nr_sects;
-    unsigned long buffer;
+    short nr_sects;
+    unsigned long buffer, fas;
      int i, tot_sects, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
      pending_req_t *pending_req;
      pgprot_t       prot;
  
      /* We map virtual scatter/gather segments to physical segments. */
      int new_segs, nr_psegs = 0;
-    phys_seg_t phys_seg[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
+    phys_seg_t phys_seg[BLKIF_MAX_SEGMENTS_PER_REQUEST + 1];
  
      /* Check that number of segments is sane. */
      if ( unlikely(req->nr_segments == 0) || 
@@ -314,17 +314,12 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
       */
      for ( i = tot_sects = 0; i < req->nr_segments; i++, tot_sects += nr_sects )
      {
-        buffer   = req->buffer_and_sects[i] & ~0x1FF;
-        nr_sects = req->buffer_and_sects[i] &  0x1FF;
+        fas      = req->frame_and_sects[i];
+        buffer   = (fas & PAGE_MASK) | (blkif_first_sect(fas) << 9);
+        nr_sects = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
  
-        if ( unlikely(nr_sects == 0) )
-            continue;
-
-        if ( unlikely(nr_sects > BLKIF_MAX_SECTORS_PER_SEGMENT) )
-        {
-            DPRINTK("Too many sectors in segment\n");
+        if ( nr_sects <= 0 )
              goto bad_descriptor;
-        }
  
          phys_seg[nr_psegs].dev           = req->device;
          phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects;
@@ -344,7 +339,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
          }
    
          nr_psegs += new_segs;
-        ASSERT(nr_psegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST*2);
+        ASSERT(nr_psegs <= (BLKIF_MAX_SEGMENTS_PER_REQUEST+1));
      }
  
      /* Nonsensical zero-sized request? */
@@ -358,13 +353,10 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
  
      for ( i = 0; i < nr_psegs; i++ )
      {
-        unsigned long sz = ((phys_seg[i].buffer & ~PAGE_MASK) + 
-                            (phys_seg[i].nr_sects << 9) + 
-                            (PAGE_SIZE - 1)) & PAGE_MASK;
          int rc = direct_remap_area_pages(&init_mm, 
                                           MMAP_VADDR(pending_idx, i),
                                           phys_seg[i].buffer & PAGE_MASK, 
-                                         sz, prot, blkif->domid);
+                                         PAGE_SIZE, prot, blkif->domid);
          if ( rc != 0 )
          {
              DPRINTK("invalid buffer\n");
@@ -372,6 +364,8 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
                                MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
              goto bad_descriptor;
          }
+        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
+            phys_seg[i].buffer >> PAGE_SHIFT;
      }
  
      pending_req = &pending_reqs[pending_idx];
@@ -399,6 +393,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
          bh->b_rsector       = (unsigned long)phys_seg[i].sector_number;
          bh->b_data          = (char *)MMAP_VADDR(pending_idx, i) +
              (phys_seg[i].buffer & ~PAGE_MASK);
+//        bh->b_page          = virt_to_page(MMAP_VADDR(pending_idx, i));
          bh->b_end_io        = end_block_io_op;
          bh->b_private       = pending_req;
  
@@ -456,13 +451,13 @@ static int __init init_module(void)
  {
      int i;
  
+    if ( !(start_info.flags & SIF_INITDOMAIN) )
+        return 0;
+
      blkif_interface_init();
  
-    if ( (mmap_vma = get_vm_area(MMAP_PAGES * PAGE_SIZE, VM_IOREMAP)) == NULL )
-    {
-        printk(KERN_WARNING "Could not allocate VMA for blkif backend.\n");
-        return -ENOMEM;
-    }
+    if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 )
+        BUG();
  
      pending_cons = 0;
      pending_prod = MAX_PENDING_REQS;
@@ -484,6 +479,7 @@ static int __init init_module(void)
  
  static void cleanup_module(void)
  {
+    BUG();
  }
  
  module_init(init_module);
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c

index 19b0b3015dff7a978ecc263f2e22fecabdec523f..bb5b6ea74363f03a1f9908f7b545387386fb4312 100644 (file)
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c
@@ -47,7 +47,7 @@ void vbd_create(blkif_be_vbd_create_t *create)
          }
      }
  
-    if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_ATOMIC)) == NULL) )
+    if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
      {
          DPRINTK("vbd_create: out of memory\n");
          create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
@@ -111,7 +111,7 @@ void vbd_grow(blkif_be_vbd_grow_t *grow)
      } 
  
      if ( unlikely((x = kmalloc(sizeof(blkif_extent_le_t), 
-                               GFP_ATOMIC)) == NULL) )
+                               GFP_KERNEL)) == NULL) )
      {
          DPRINTK("vbd_grow: out of memory\n");
          grow->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h

index 1938f68f8ec8874a15f588cefe1f37bc5a98f144..0a90744c59d41703fd969bca7a7e39e3c872769e 100644 (file)
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h
@@ -26,19 +26,22 @@
   */
  #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
  
-#define BLKIF_MAX_SECTORS_PER_SEGMENT  16
-
  typedef struct {
      u8             operation;        /* BLKIF_OP_???                         */
      u8             nr_segments;      /* number of segments                   */
      blkif_vdev_t   device;           /* only for read/write requests         */
      unsigned long  id;               /* private guest value, echoed in resp  */
      blkif_sector_t sector_number;    /* start sector idx on disk (r/w only)  */
-    /* Least 9 bits is 'nr_sects'. High 23 bits is the address.       */
-    /* We must have '0 <= nr_sects <= BLKIF_MAX_SECTORS_PER_SEGMENT'. */
-    unsigned long  buffer_and_sects[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    /* @f_a_s[2:0]=last_sect ; @f_a_s[5:3]=first_sect ; @f_a_s[:12]=frame.   */
+    /* @first_sect: first sector in frame to transfer (inclusive).           */
+    /* @last_sect: last sector in frame to transfer (inclusive).             */
+    /* @frame: machine page frame number.                                    */
+    unsigned long  frame_and_sects[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  } blkif_request_t;
  
+#define blkif_first_sect(_fas) (((_fas)>>3)&7)
+#define blkif_last_sect(_fas)  ((_fas)&7)
+
  typedef struct {
      unsigned long   id;              /* copied from request */
      u8              operation;       /* copied from request */
@@ -79,8 +82,8 @@ typedef struct {
   *  @device      == unused (zero)
   *  @id          == any value (echoed in response message)
   *  @sector_num  == unused (zero)
- *  @buffer_and_sects == list of page-aligned, page-sized buffers.
- *                       (i.e., nr_sects == 8).
+ *  @frame_and_sects == list of page-sized buffers.
+ *                       (i.e., @first_sect == 0, @last_sect == 7).
   * 
   * The response is a list of vdisk_t elements copied into the out-of-band
   * probe buffer. On success the response status field contains the number
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c

index 29cc01d08749397ee2ee313fe2f954cbac08100d..63f1aeea26294a01d0fa2f3fa3f30b0da7413acf 100644 (file)
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c
@@ -24,8 +24,6 @@ typedef unsigned char byte; /* from linux/ide.h */
  static unsigned int blkif_state = BLKIF_STATE_CLOSED;
  static unsigned int blkif_evtchn, blkif_irq;
  
-static struct tq_struct blkif_statechange_tq;
-
  static int blkif_control_rsp_valid;
  static blkif_response_t blkif_control_rsp;
  
@@ -302,11 +300,18 @@ static int blkif_queue_request(unsigned long   id,
      struct gendisk     *gd;
      blkif_request_t    *req;
      struct buffer_head *bh;
+    unsigned int        fsect, lsect;
  
-    if ( unlikely(nr_sectors >= (1<<9)) )
-        BUG();
+    fsect = (buffer_ma & ~PAGE_MASK) >> 9;
+    lsect = fsect + nr_sectors - 1;
+
+    /* Buffer must be sector-aligned. Extent mustn't cross a page boundary. */
      if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
          BUG();
+    if ( lsect > 7 )
+        BUG();
+
+    buffer_ma &= PAGE_MASK;
  
      if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
          return 1;
@@ -341,8 +346,9 @@ static int blkif_queue_request(unsigned long   id,
              bh = (struct buffer_head *)id;
              bh->b_reqnext = (struct buffer_head *)req->id;
              req->id = id;
-            req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors;
-            if ( ++req->nr_segments < MAX_BLK_SEGS )
+            req->frame_and_sects[req->nr_segments] = 
+                buffer_ma | (fsect<<3) | lsect;
+            if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST )
                  sg_next_sect += nr_sectors;
              else
                  DISABLE_SCATTERGATHER();
@@ -371,7 +377,7 @@ static int blkif_queue_request(unsigned long   id,
      req->sector_number = (blkif_sector_t)sector_number;
      req->device        = device; 
      req->nr_segments   = 1;
-    req->buffer_and_sects[0] = buffer_ma | nr_sectors;
+    req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect;
      req_prod++;
  
      return 0;
@@ -556,46 +562,11 @@ void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
  }
  
  
-static void blkif_bringup_phase1(void *unused)
+static void blkif_status_change(blkif_fe_interface_status_changed_t *status)
  {
      ctrl_msg_t                   cmsg;
      blkif_fe_interface_connect_t up;
  
-    /* Move from CLOSED to DISCONNECTED state. */
-    blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
-    blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
-    blkif_state  = BLKIF_STATE_DISCONNECTED;
-
-    /* Construct an interface-CONNECT message for the domain controller. */
-    cmsg.type      = CMSG_BLKIF_FE;
-    cmsg.subtype   = CMSG_BLKIF_FE_INTERFACE_CONNECT;
-    cmsg.length    = sizeof(blkif_fe_interface_connect_t);
-    up.handle      = 0;
-    up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT;
-    memcpy(cmsg.msg, &up, sizeof(up));
-
-    /* Tell the controller to bring up the interface. */
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
-static void blkif_bringup_phase2(void *unused)
-{
-    blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
-    (void)request_irq(blkif_irq, blkif_int, 0, "blkif", NULL);
-
-    /* Probe for discs that are attached to the interface. */
-    xlvbd_init();
-
-    blkif_state = BLKIF_STATE_CONNECTED;
-
-    /* Kick pending requests. */
-    spin_lock_irq(&io_request_lock);
-    kick_pending_request_queues();
-    spin_unlock_irq(&io_request_lock);
-}
-
-static void blkif_status_change(blkif_fe_interface_status_changed_t *status)
-{
      if ( status->handle != 0 )
      {
          printk(KERN_WARNING "Status change on unsupported blkif %d\n",
@@ -617,8 +588,22 @@ static void blkif_status_change(blkif_fe_interface_status_changed_t *status)
                     " in state %d\n", blkif_state);
              break;
          }
-        blkif_statechange_tq.routine = blkif_bringup_phase1;
-        schedule_task(&blkif_statechange_tq);
+
+        /* Move from CLOSED to DISCONNECTED state. */
+        blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
+        blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
+        blkif_state  = BLKIF_STATE_DISCONNECTED;
+
+        /* Construct an interface-CONNECT message for the domain controller. */
+        cmsg.type      = CMSG_BLKIF_FE;
+        cmsg.subtype   = CMSG_BLKIF_FE_INTERFACE_CONNECT;
+        cmsg.length    = sizeof(blkif_fe_interface_connect_t);
+        up.handle      = 0;
+        up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT;
+        memcpy(cmsg.msg, &up, sizeof(up));
+        
+        /* Tell the controller to bring up the interface. */
+        ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
          break;
  
      case BLKIF_INTERFACE_STATUS_CONNECTED:
@@ -628,9 +613,20 @@ static void blkif_status_change(blkif_fe_interface_status_changed_t *status)
                     " in state %d\n", blkif_state);
              break;
          }
+
          blkif_evtchn = status->evtchn;
-        blkif_statechange_tq.routine = blkif_bringup_phase2;
-        schedule_task(&blkif_statechange_tq);
+        blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
+        (void)request_irq(blkif_irq, blkif_int, 0, "blkif", NULL);
+        
+        /* Probe for discs that are attached to the interface. */
+        xlvbd_init();
+        
+        blkif_state = BLKIF_STATE_CONNECTED;
+        
+        /* Kick pending requests. */
+        spin_lock_irq(&io_request_lock);
+        kick_pending_request_queues();
+        spin_unlock_irq(&io_request_lock);
          break;
  
      default:
@@ -675,7 +671,11 @@ int __init xlblk_init(void)
      ctrl_msg_t                       cmsg;
      blkif_fe_driver_status_changed_t st;
  
-    (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx);
+    if ( start_info.flags & SIF_INITDOMAIN )
+        return 0;
+
+    (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
+                                    CALLBACK_IN_BLOCKING_CONTEXT);
  
      /* Send a driver-UP notification to the domain controller. */
      cmsg.type      = CMSG_BLKIF_FE;
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c

index b26907192af3145ef14592ab6e76a93c9b95b346..12ce976cb5f3b1ad819da409fb9bf8e37f8d8bf0 100644 (file)
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c
@@ -67,7 +67,7 @@ static int xlvbd_get_vbd_info(vdisk_t *disk_info)
      memset(&req, 0, sizeof(req));
      req.operation   = BLKIF_OP_PROBE;
      req.nr_segments = 1;
-    req.buffer_and_sects[0] = virt_to_machine(buf) | (PAGE_SIZE/512);
+    req.frame_and_sects[0] = virt_to_machine(buf) | 7;
  
      blkif_control_send(&req, &rsp);
  
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/console/console.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/console/console.c

index e01896385b5507854602a577ec3a0c40af675c26..244f309467cf08e5ad00e711523675467e908834 100644 (file)
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/console/console.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/console/console.c
@@ -513,7 +513,7 @@ static int __init xencons_init(void)
      }
      else
      {
-        (void)ctrl_if_register_receiver(CMSG_CONSOLE, xencons_rx);
+        (void)ctrl_if_register_receiver(CMSG_CONSOLE, xencons_rx, 0);
      }
  
      printk("Xen virtual console successfully installed\n");
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/control.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/control.c

index e0e43ff2ccdccc27f7b64d3d3d5580c0763a9ae4..cf1b07503100d7f352538b6a7869b5bacf9a0ac7 100644 (file)
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/control.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/control.c
@@ -10,8 +10,6 @@
  
  static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
  {
-    DPRINTK("Received netif backend message, subtype=%d\n", msg->subtype);
-    
      switch ( msg->subtype )
      {
      case CMSG_NETIF_BE_CREATE:
@@ -54,7 +52,8 @@ void netif_ctrlif_init(void)
      ctrl_msg_t                       cmsg;
      netif_be_driver_status_changed_t st;
  
-    (void)ctrl_if_register_receiver(CMSG_NETIF_BE, netif_ctrlif_rx);
+    (void)ctrl_if_register_receiver(CMSG_NETIF_BE, netif_ctrlif_rx,
+                                    CALLBACK_IN_BLOCKING_CONTEXT);
  
      /* Send a driver-UP notification to the domain controller. */
      cmsg.type      = CMSG_NETIF_BE;
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/interface.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/interface.c

index 8623d8214ba9f9cb52494d8440ed9f76a8daceb0..b6a9cff69242ba2e2e63033bd3324737aad34d50 100644 (file)
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/interface.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/interface.c
@@ -7,6 +7,7 @@
   */
  
  #include "common.h"
+#include <linux/rtnetlink.h>
  
  #define NETIF_HASHSZ 1024
  #define NETIF_HASH(_d,_h) \
@@ -14,6 +15,7 @@
  
  static netif_t *netif_hash[NETIF_HASHSZ];
  static struct net_device *bridge_dev;
+static struct net_bridge *bridge_br;
  
  netif_t *netif_find_by_handle(domid_t domid, unsigned int handle)
  {
@@ -36,8 +38,10 @@ void __netif_disconnect_complete(netif_t *netif)
       */
      unbind_evtchn_from_irq(netif->evtchn);
      vfree(netif->tx); /* Frees netif->rx as well. */
-    (void)br_del_if((struct net_bridge *)bridge_dev->priv, netif->dev);
+    rtnl_lock();
+    (void)br_del_if(bridge_br, netif->dev);
      (void)dev_close(netif->dev);
+    rtnl_unlock();
  
      /* Construct the deferred response message. */
      cmsg.type         = CMSG_NETIF_BE;
@@ -73,7 +77,7 @@ void netif_create(netif_be_create_t *create)
      struct net_device *dev;
      netif_t          **pnetif, *netif;
  
-    dev = alloc_netdev(sizeof(netif_t), "netif-be-%d", ether_setup);
+    dev = alloc_netdev(sizeof(netif_t), "nbe-if%d", ether_setup);
      if ( dev == NULL )
      {
          DPRINTK("Could not create netif: out of memory\n");
@@ -111,7 +115,10 @@ void netif_create(netif_be_create_t *create)
      dev->hard_start_xmit = netif_be_start_xmit;
      dev->get_stats       = netif_be_get_stats;
      memcpy(dev->dev_addr, create->mac, ETH_ALEN);
-    
+
+    /* XXX In bridge mode we should force a different MAC from remote end. */
+    dev->dev_addr[2] ^= 1;
+
      if ( register_netdev(dev) != 0 )
      {
          DPRINTK("Could not register new net device\n");
@@ -225,15 +232,27 @@ void netif_connect(netif_be_connect_t *connect)
      netif->status         = CONNECTED;
      netif_get(netif);
  
+    rtnl_lock();
+
      (void)dev_open(netif->dev);
-    (void)br_add_if((struct net_bridge *)bridge_dev->priv, netif->dev);
-    /* At this point we try to ensure that eth0 is attached to the bridge. */
+    (void)br_add_if(bridge_br, netif->dev);
+
+    /*
+     * The default config is a very simple binding to eth0.
+     * If eth0 is being used as an IP interface by this OS then someone
+     * must add eth0's IP address to nbe-br, and change the routing table
+     * to refer to nbe-br instead of eth0.
+     */
+    (void)dev_open(bridge_dev);
      if ( (eth0_dev = __dev_get_by_name("eth0")) != NULL )
      {
          (void)dev_open(eth0_dev);
-        (void)br_add_if((struct net_bridge *)bridge_dev->priv, eth0_dev);
+        (void)br_add_if(bridge_br, eth0_dev);
      }
-    (void)request_irq(netif->irq, netif_be_int, 0, "netif-backend", netif);
+
+    rtnl_unlock();
+
+    (void)request_irq(netif->irq, netif_be_int, 0, netif->dev->name, netif);
      netif_start_queue(netif->dev);
  
      connect->status = NETIF_BE_STATUS_OKAY;
@@ -271,8 +290,11 @@ int netif_disconnect(netif_be_disconnect_t *disconnect, u8 rsp_id)
  void netif_interface_init(void)
  {
      memset(netif_hash, 0, sizeof(netif_hash));
-    if ( br_add_bridge("netif-backend") != 0 )
+    if ( br_add_bridge("nbe-br") != 0 )
          BUG();
-    bridge_dev = __dev_get_by_name("netif-be-bridge");
-    (void)dev_open(bridge_dev);
+    bridge_dev = __dev_get_by_name("nbe-br");
+    bridge_br  = (struct net_bridge *)bridge_dev->priv;
+    bridge_br->bridge_hello_time = bridge_br->hello_time = 0;
+    bridge_br->bridge_forward_delay = bridge_br->forward_delay = 0;
+    bridge_br->stp_enabled = 0;
  }
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c

index 5b84eba9bc77589c30dcaf4a3bb3ec66d05b8379..62a4adf27d80ff85a5be09c2349b2c1e57d33ecf 100644 (file)
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c
@@ -14,7 +14,7 @@
  #include <asm/hypervisor-ifs/dom_mem_ops.h>
  
  static void net_tx_action(unsigned long unused);
-static void tx_skb_release(struct sk_buff *skb);
+static void netif_page_release(struct page *page);
  static void make_tx_response(netif_t *netif, 
                               u16      id,
                               s8       st);
@@ -30,13 +30,13 @@ static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
  #define tx_work_exists(_if) (1)
  
  #define MAX_PENDING_REQS 256
-unsigned long mmap_vstart;
+static unsigned long mmap_vstart;
  #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
  
  #define PKT_PROT_LEN (ETH_HLEN + 20)
  
-/*static pending_req_t pending_reqs[MAX_PENDING_REQS];*/
  static u16 pending_id[MAX_PENDING_REQS];
+static netif_t *pending_netif[MAX_PENDING_REQS];
  static u16 pending_ring[MAX_PENDING_REQS];
  static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
  typedef unsigned int PEND_RING_IDX;
@@ -60,8 +60,7 @@ static void __refresh_mfn_list(void)
      op.u.increase.pages = mfn_list;
      if ( (ret = HYPERVISOR_dom_mem_op(&op)) != MAX_MFN_ALLOC )
      {
-        printk(KERN_WARNING "Unable to increase memory reservation (%d)\n",
-               ret);
+        printk(KERN_ALERT "Unable to increase memory reservation (%d)\n", ret);
          BUG();
      }
      alloc_index = MAX_MFN_ALLOC;
@@ -100,10 +99,10 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
  {
      netif_t *netif = (netif_t *)dev->priv;
      s8 status = NETIF_RSP_OKAY;
-    u16 size, id;
+    u16 size=0, id;
      mmu_update_t mmu[6];
      pgd_t *pgd; pmd_t *pmd; pte_t *pte;
-    unsigned long vdata, new_mfn;
+    unsigned long vdata, mdata=0, new_mfn;
  
      /* Drop the packet if the target domain has no receive buffers. */
      if ( (netif->rx_req_cons == netif->rx->req_prod) ||
@@ -126,16 +125,23 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
           (((unsigned long)skb->end ^ (unsigned long)skb->head) & PAGE_MASK) ||
           ((skb->end - skb->head) < (PAGE_SIZE/2)) )
      {
-        struct sk_buff *nskb = dev_alloc_skb(PAGE_SIZE-1024);
+        struct sk_buff *nskb = alloc_skb(PAGE_SIZE-1024, GFP_ATOMIC);
          int hlen = skb->data - skb->head;
+        if ( unlikely(nskb == NULL) )
+        {
+            DPRINTK("DOM%llu couldn't get memory for skb.\n", netif->domid);
+            status = NETIF_RSP_ERROR;
+            goto out;
+        }
          skb_reserve(nskb, hlen);
-        skb_put(nskb, skb->len);
+        __skb_put(nskb, skb->len);
          (void)skb_copy_bits(skb, -hlen, nskb->head, hlen + skb->len);
          dev_kfree_skb(skb);
          skb = nskb;
      }
  
      vdata = (unsigned long)skb->data;
+    mdata = virt_to_machine(vdata);
      size  = skb->tail - skb->data;
  
      new_mfn = get_new_mfn();
@@ -153,7 +159,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
      mmu[1].ptr |= MMU_EXTENDED_COMMAND;
      mmu[1].val |= MMUEXT_SET_SUBJECTDOM_H;
  
-    mmu[2].ptr  = virt_to_machine(vdata & PAGE_MASK) | MMU_EXTENDED_COMMAND;
+    mmu[2].ptr  = (mdata & PAGE_MASK) | MMU_EXTENDED_COMMAND;
      mmu[2].val  = MMUEXT_REASSIGN_PAGE;
  
      mmu[3].ptr  = MMU_EXTENDED_COMMAND;
@@ -167,6 +173,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
  
      if ( unlikely(HYPERVISOR_mmu_update(mmu, 6) < 0) )
      {
+        DPRINTK("Failed MMU update transferring to DOM%llu\n", netif->domid);
          dealloc_mfn(new_mfn);
          status = NETIF_RSP_ERROR;
          goto out;
@@ -174,12 +181,12 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
  
      phys_to_machine_mapping[__pa(vdata) >> PAGE_SHIFT] = new_mfn;
  
-    netif->stats.tx_bytes += size;
-    netif->stats.tx_packets++;
+    netif->stats.rx_bytes += size;
+    netif->stats.rx_packets++;
  
   out:
      spin_lock(&netif->rx_lock);
-    make_rx_response(netif, id, status, virt_to_machine(vdata), size);
+    make_rx_response(netif, id, status, mdata, size);
      spin_unlock(&netif->rx_lock);    
      dev_kfree_skb(skb);
      return 0;
@@ -220,6 +227,16 @@ static void add_to_net_schedule_list_tail(netif_t *netif)
      spin_unlock(&net_schedule_list_lock);
  }
  
+static inline void netif_schedule_work(netif_t *netif)
+{
+    if ( (netif->tx_req_cons != netif->tx->req_prod) &&
+         ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE) )
+    {
+        add_to_net_schedule_list_tail(netif);
+        maybe_schedule_tx_action();
+    }
+}
+
  void netif_deschedule(netif_t *netif)
  {
      remove_from_net_schedule_list(netif);
@@ -229,14 +246,8 @@ void netif_deschedule(netif_t *netif)
  static void tx_credit_callback(unsigned long data)
  {
      netif_t *netif = (netif_t *)data;
-
      netif->remaining_credit = netif->credit_bytes;
-
-    if ( tx_work_exists(netif) )
-    {
-        add_to_net_schedule_list_tail(netif);
-        maybe_schedule_tx_action();
-    }    
+    netif_schedule_work(netif);
  }
  #endif
  
@@ -249,6 +260,7 @@ static void net_tx_action(unsigned long unused)
      u16 pending_idx;
      NETIF_RING_IDX i;
      pgprot_t prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED);
+    struct page *page;
  
      while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
              !list_empty(&net_schedule_list) )
@@ -261,7 +273,7 @@ static void net_tx_action(unsigned long unused)
  
          /* Work to do? */
          i = netif->tx_req_cons;
-        if ( (i == netif->tx->req_prod) && 
+        if ( (i == netif->tx->req_prod) ||
               ((i-netif->tx_resp_prod) == NETIF_TX_RING_SIZE) )
          {
              netif_put(netif);
@@ -296,7 +308,7 @@ static void net_tx_action(unsigned long unused)
          netif->remaining_credit -= tx.size;
  #endif
  
-        add_to_net_schedule_list_tail(netif);
+        netif_schedule_work(netif);
  
          if ( unlikely(txreq.size <= PKT_PROT_LEN) || 
               unlikely(txreq.size > ETH_FRAME_LEN) )
@@ -335,6 +347,7 @@ static void net_tx_action(unsigned long unused)
  
          if ( unlikely((skb = alloc_skb(PKT_PROT_LEN, GFP_ATOMIC)) == NULL) )
          {
+            DPRINTK("Can't allocate a skb in start_xmit.\n");
              make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
              netif_put(netif);
              vmfree_area_pages(MMAP_VADDR(pending_idx), PAGE_SIZE);
@@ -346,29 +359,29 @@ static void net_tx_action(unsigned long unused)
                 (void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)),
                 PKT_PROT_LEN);
  
-        skb->dev        = netif->dev;
-        skb->protocol   = eth_type_trans(skb, skb->dev);
-        
+        page = virt_to_page(MMAP_VADDR(pending_idx));
+
          /* Append the packet payload as a fragment. */
-        skb_shinfo(skb)->frags[0].page        = 
-            virt_to_page(MMAP_VADDR(pending_idx));
-        skb_shinfo(skb)->frags[0].size        =
-            txreq.size - PKT_PROT_LEN;
+        skb_shinfo(skb)->frags[0].page        = page;
+        skb_shinfo(skb)->frags[0].size        = txreq.size - PKT_PROT_LEN;
          skb_shinfo(skb)->frags[0].page_offset = 
              (txreq.addr + PKT_PROT_LEN) & ~PAGE_MASK;
          skb_shinfo(skb)->nr_frags = 1;
          skb->data_len  = txreq.size - PKT_PROT_LEN;
          skb->len      += skb->data_len;
  
+        skb->dev      = netif->dev;
+        skb->protocol = eth_type_trans(skb, skb->dev);
+
          /* Destructor information. */
-        skb->destructor = tx_skb_release;
-        skb_shinfo(skb)->frags[MAX_SKB_FRAGS-1].page = (struct page *)netif;
-        skb_shinfo(skb)->frags[MAX_SKB_FRAGS-1].size = pending_idx;
+        atomic_set(&page->count, 1);
+        page->mapping = (struct address_space *)netif_page_release;
+        pending_id[pending_idx] = txreq.id;
+        pending_netif[pending_idx] = netif;
  
-        netif->stats.rx_bytes += txreq.size;
-        netif->stats.rx_packets++;
+        netif->stats.tx_bytes += txreq.size;
+        netif->stats.tx_packets++;
  
-        pending_id[pending_idx] = txreq.id;
          pending_cons++;
  
          netif_rx(skb);
@@ -376,28 +389,34 @@ static void net_tx_action(unsigned long unused)
      }
  }
  
-/* Destructor function for tx skbs. */
-static void tx_skb_release(struct sk_buff *skb)
+static void netif_page_release(struct page *page)
  {
      unsigned long flags;
-    netif_t *netif = (netif_t *)skb_shinfo(skb)->frags[MAX_SKB_FRAGS-1].page;
-    u16 pending_idx = skb_shinfo(skb)->frags[MAX_SKB_FRAGS-1].size;
+    netif_t *netif;
+    u16 pending_idx;
+
+    pending_idx = page - virt_to_page(mmap_vstart);
+
+    netif = pending_netif[pending_idx];
  
      vmfree_area_pages(MMAP_VADDR(pending_idx), PAGE_SIZE);
-    
-    skb_shinfo(skb)->nr_frags = 0; 
-    
+        
      spin_lock(&netif->tx_lock);
      make_tx_response(netif, pending_id[pending_idx], NETIF_RSP_OKAY);
      spin_unlock(&netif->tx_lock);
-    
+
+    /*
+     * Scheduling checks must happen after the above response is posted.
+     * This avoids a possible race with a guest OS on another CPU.
+     */
+    mb();
+    netif_schedule_work(netif);
+
      netif_put(netif);
   
      spin_lock_irqsave(&pend_prod_lock, flags);
      pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
      spin_unlock_irqrestore(&pend_prod_lock, flags);
- 
-    maybe_schedule_tx_action();        
  }
  
  #if 0
@@ -493,9 +512,26 @@ static void make_rx_response(netif_t     *netif,
  
  static int __init init_module(void)
  {
+    int i;
+
+    if ( !(start_info.flags & SIF_INITDOMAIN) )
+        return 0;
+
      netif_interface_init();
-    mmap_vstart = allocate_empty_lowmem_region(MAX_PENDING_REQS);
+
+    if ( (mmap_vstart = allocate_empty_lowmem_region(MAX_PENDING_REQS)) == 0 )
+        BUG();
+
+    pending_cons = 0;
+    pending_prod = MAX_PENDING_REQS;
+    for ( i = 0; i < MAX_PENDING_REQS; i++ )
+        pending_ring[i] = i;
+
+    spin_lock_init(&net_schedule_list_lock);
+    INIT_LIST_HEAD(&net_schedule_list);
+
      netif_ctrlif_init();
+
      return 0;
  }
  
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c

index af8e660b7c95119f5a769837368437610cb9074b..cc5ac31e826cc0e581c37ff2b42c0e3087d8a3e7 100644 (file)
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c
@@ -25,20 +25,18 @@
  #include <net/sock.h>
  #include <net/pkt_sched.h>
  
-#include "../netif.h"
+#include <asm/evtchn.h>
+#include <asm/ctrl_if.h>
+#include <asm/hypervisor-ifs/dom_mem_ops.h>
  
-static struct tq_struct netif_statechange_tq;
+#include "../netif.h"
  
  #define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */
  
-static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs);
  static void network_tx_buf_gc(struct net_device *dev);
  static void network_alloc_rx_buffers(struct net_device *dev);
  static void cleanup_module(void);
  
-/* Dynamically-mapped IRQs. */
-static int network_irq, debug_irq;
-
  static struct list_head dev_list;
  
  struct net_private
@@ -47,7 +45,7 @@ struct net_private
      struct net_device *dev;
  
      struct net_device_stats stats;
-    NET_RING_IDX rx_resp_cons, tx_resp_cons;
+    NETIF_RING_IDX rx_resp_cons, tx_resp_cons;
      unsigned int tx_full;
      
      netif_tx_interface_t *tx;
@@ -69,8 +67,8 @@ struct net_private
       * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
       * array is an index into a chain of free entries.
       */
-    struct sk_buff *tx_skbs[XENNET_TX_RING_SIZE+1];
-    struct sk_buff *rx_skbs[XENNET_RX_RING_SIZE+1];
+    struct sk_buff *tx_skbs[NETIF_TX_RING_SIZE+1];
+    struct sk_buff *rx_skbs[NETIF_RX_RING_SIZE+1];
  };
  
  /* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
@@ -91,7 +89,7 @@ static struct net_device *find_dev_by_handle(unsigned int handle)
      {
          np = list_entry(ent, struct net_private, list);
          if ( np->handle == handle )
-            return np;
+            return np->dev;
      }
      return NULL;
  }
@@ -100,8 +98,7 @@ static struct net_device *find_dev_by_handle(unsigned int handle)
  static int network_open(struct net_device *dev)
  {
      struct net_private *np = dev->priv;
-    netop_t netop;
-    int i, ret;
+    int i;
  
      if ( np->state != NETIF_STATE_CONNECTED )
          return -EINVAL;
@@ -111,15 +108,16 @@ static int network_open(struct net_device *dev)
      spin_lock_init(&np->tx_lock);
  
      /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
-    for ( i = 0; i <= XENNET_TX_RING_SIZE; i++ )
+    for ( i = 0; i <= NETIF_TX_RING_SIZE; i++ )
          np->tx_skbs[i] = (void *)(i+1);
-    for ( i = 0; i <= XENNET_RX_RING_SIZE; i++ )
+    for ( i = 0; i <= NETIF_RX_RING_SIZE; i++ )
          np->rx_skbs[i] = (void *)(i+1);
  
      wmb();
      np->state = NETIF_STATE_ACTIVE;
  
      network_alloc_rx_buffers(dev);
+    np->rx->event = np->rx_resp_cons + 1;
  
      netif_start_queue(dev);
  
@@ -131,18 +129,17 @@ static int network_open(struct net_device *dev)
  
  static void network_tx_buf_gc(struct net_device *dev)
  {
-    NET_RING_IDX i, prod;
+    NETIF_RING_IDX i, prod;
      unsigned short id;
      struct net_private *np = dev->priv;
      struct sk_buff *skb;
-    tx_entry_t *tx_ring = np->net_ring->tx_ring;
  
      do {
-        prod = np->net_idx->tx_resp_prod;
+        prod = np->tx->resp_prod;
  
          for ( i = np->tx_resp_cons; i != prod; i++ )
          {
-            id  = tx_ring[MASK_NET_TX_IDX(i)].resp.id;
+            id  = np->tx->ring[MASK_NET_TX_IDX(i)].resp.id;
              skb = np->tx_skbs[id];
              ADD_ID_TO_FREELIST(np->tx_skbs, id);
              dev_kfree_skb_any(skb);
@@ -158,14 +155,14 @@ static void network_tx_buf_gc(struct net_device *dev)
           * in such cases notification from Xen is likely to be the only kick
           * that we'll get.
           */
-        np->net_idx->tx_event = 
-            prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1;
+        np->tx->event = 
+            prod + ((np->tx->req_prod - prod) >> 1) + 1;
          mb();
      }
-    while ( prod != np->net_idx->tx_resp_prod );
+    while ( prod != np->tx->resp_prod );
  
      if ( np->tx_full && 
-         ((np->net_idx->tx_req_prod - prod) < XENNET_TX_RING_SIZE) )
+         ((np->tx->req_prod - prod) < NETIF_TX_RING_SIZE) )
      {
          np->tx_full = 0;
          if ( np->state == NETIF_STATE_ACTIVE )
@@ -189,10 +186,14 @@ static void network_alloc_rx_buffers(struct net_device *dev)
      unsigned short id;
      struct net_private *np = dev->priv;
      struct sk_buff *skb;
-    netop_t netop;
-    NET_RING_IDX i = np->net_idx->rx_req_prod;
-
-    if ( unlikely((i - np->rx_resp_cons) == XENNET_RX_RING_SIZE) || 
+    NETIF_RING_IDX i = np->rx->req_prod;
+    dom_mem_op_t op;
+    unsigned long pfn_array[NETIF_RX_RING_SIZE];
+    int ret, nr_pfns = 0;
+    pte_t *pte;
+
+    /* Make sure the batch is large enough to be worthwhile (1/2 ring). */
+    if ( unlikely((i - np->rx_resp_cons) > (NETIF_RX_RING_SIZE/2)) || 
           unlikely(np->state != NETIF_STATE_ACTIVE) )
          return;
  
@@ -209,13 +210,13 @@ static void network_alloc_rx_buffers(struct net_device *dev)
          id = GET_ID_FROM_FREELIST(np->rx_skbs);
          np->rx_skbs[id] = skb;
  
-        np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id   = id;
-        np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = 
-            virt_to_machine(get_ppte(skb->head));
-
-        np->rx_bufs_to_notify++;
+        np->rx->ring[MASK_NET_RX_IDX(i)].req.id = id;
+        
+        pte = get_ppte(skb->head);
+        pfn_array[nr_pfns++] = pte->pte_low >> PAGE_SHIFT;
+        queue_l1_entry_update(pte, 0);
      }
-    while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE );
+    while ( (++i - np->rx_resp_cons) != NETIF_RX_RING_SIZE );
  
      /*
       * We may have allocated buffers which have entries outstanding in the page
@@ -223,17 +224,16 @@ static void network_alloc_rx_buffers(struct net_device *dev)
       */
      flush_page_update_queue();
  
-    np->net_idx->rx_req_prod = i;
-    np->net_idx->rx_event    = np->rx_resp_cons + 1;
-        
-    /* Batch Xen notifications. */
-    if ( np->rx_bufs_to_notify > (XENNET_RX_RING_SIZE/4) )
+    op.op = MEMOP_RESERVATION_DECREASE;
+    op.u.decrease.size  = nr_pfns;
+    op.u.decrease.pages = pfn_array;
+    if ( (ret = HYPERVISOR_dom_mem_op(&op)) != nr_pfns )
      {
-        netop.cmd = NETOP_PUSH_BUFFERS;
-        netop.vif = np->idx;
-        (void)HYPERVISOR_net_io_op(&netop);
-        np->rx_bufs_to_notify = 0;
+        printk(KERN_WARNING "Unable to reduce memory reservation (%d)\n", ret);
+        BUG();
      }
+
+    np->rx->req_prod = i;
  }
  
  
@@ -241,9 +241,8 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
  {
      unsigned short id;
      struct net_private *np = (struct net_private *)dev->priv;
-    tx_req_entry_t *tx;
-    netop_t netop;
-    NET_RING_IDX i;
+    netif_tx_request_t *tx;
+    NETIF_RING_IDX i;
  
      if ( unlikely(np->tx_full) )
      {
@@ -262,27 +261,27 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
          memcpy(new_skb->data, skb->data, skb->len);
          dev_kfree_skb(skb);
          skb = new_skb;
-    }   
+    }
      
      spin_lock_irq(&np->tx_lock);
  
-    i = np->net_idx->tx_req_prod;
+    i = np->tx->req_prod;
  
      id = GET_ID_FROM_FREELIST(np->tx_skbs);
      np->tx_skbs[id] = skb;
  
-    tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req;
+    tx = &np->tx->ring[MASK_NET_TX_IDX(i)].req;
  
      tx->id   = id;
-    tx->addr = phys_to_machine(virt_to_phys(skb->data));
+    tx->addr = virt_to_machine(skb->data);
      tx->size = skb->len;
  
      wmb();
-    np->net_idx->tx_req_prod = i + 1;
+    np->tx->req_prod = i + 1;
  
      network_tx_buf_gc(dev);
  
-    if ( (i - np->tx_resp_cons) == (XENNET_TX_RING_SIZE - 1) )
+    if ( (i - np->tx_resp_cons) == (NETIF_TX_RING_SIZE - 1) )
      {
          np->tx_full = 1;
          netif_stop_queue(dev);
@@ -295,12 +294,8 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
  
      /* Only notify Xen if there are no outstanding responses. */
      mb();
-    if ( np->net_idx->tx_resp_prod == i )
-    {
-        netop.cmd = NETOP_PUSH_BUFFERS;
-        netop.vif = np->idx;
-        (void)HYPERVISOR_net_io_op(&netop);
-    }
+    if ( np->tx->resp_prod == i )
+        notify_via_evtchn(np->evtchn);
  
      return 0;
  }
@@ -312,22 +307,24 @@ static void netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
      struct net_private *np = dev->priv;
      unsigned long flags;
      struct sk_buff *skb;
-    rx_resp_entry_t *rx;
-    NET_RING_IDX i;
+    netif_rx_response_t *rx;
+    NETIF_RING_IDX i;
+    mmu_update_t mmu[2];
+    pte_t *pte;
  
      spin_lock_irqsave(&np->tx_lock, flags);
      network_tx_buf_gc(dev);
      spin_unlock_irqrestore(&np->tx_lock, flags);
  
   again:
-    for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ )
+    for ( i = np->rx_resp_cons; i != np->rx->resp_prod; i++ )
      {
-        rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp;
+        rx = &np->rx->ring[MASK_NET_RX_IDX(i)].resp;
  
          skb = np->rx_skbs[rx->id];
          ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
  
-        if ( unlikely(rx->status != RING_STATUS_OK) )
+        if ( unlikely(rx->status <= 0) )
          {
              /* Gate this error. We get a (valid) slew of them on suspend. */
              if ( np->state == NETIF_STATE_ACTIVE )
@@ -336,6 +333,17 @@ static void netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
              continue;
          }
  
+        /* Remap the page. */
+        pte = get_ppte(skb->head);
+        mmu[0].ptr  = virt_to_machine(pte);
+        mmu[0].val  = (rx->addr & PAGE_MASK) | __PAGE_KERNEL;
+        mmu[1].ptr  = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE;
+        mmu[1].val  = __pa(skb->head) >> PAGE_SHIFT;
+        if ( HYPERVISOR_mmu_update(mmu, 2) != 0 )
+            BUG();
+        phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = 
+            rx->addr >> PAGE_SHIFT;
+
          /*
           * Set up shinfo -- from alloc_skb This was particularily nasty:  the
           * shared info is hidden at the back of the data area (presumably so it
@@ -348,13 +356,13 @@ static void netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
          phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
              (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT;
  
-        skb->data = skb->tail = skb->head + rx->offset;
-        skb_put(skb, rx->size);
+        skb->data = skb->tail = skb->head + (rx->addr & ~PAGE_MASK);
+        skb_put(skb, rx->status);
          skb->protocol = eth_type_trans(skb, dev);
  
          np->stats.rx_packets++;
  
-        np->stats.rx_bytes += rx->size;
+        np->stats.rx_bytes += rx->status;
          netif_rx(skb);
          dev->last_rx = jiffies;
      }
@@ -362,10 +370,11 @@ static void netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
      np->rx_resp_cons = i;
  
      network_alloc_rx_buffers(dev);
+    np->rx->event = np->rx_resp_cons + 1;
      
      /* Deal with hypervisor racing our resetting of rx_event. */
      mb();
-    if ( np->net_idx->rx_resp_prod != i )
+    if ( np->rx->resp_prod != i )
          goto again;
  }
  
@@ -373,16 +382,11 @@ static void netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
  static int network_close(struct net_device *dev)
  {
      struct net_private *np = dev->priv;
-    netop_t netop;
  
      netif_stop_queue(np->dev);
  
-    netop.cmd = NETOP_FLUSH_BUFFERS;
-    netop.vif = np->idx;
-    (void)HYPERVISOR_net_io_op(&netop);
-
-    while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) ||
-            (np->tx_resp_cons != np->net_idx->tx_req_prod) )
+    while ( (np->rx_resp_cons != np->rx->req_prod) ||
+            (np->tx_resp_cons != np->tx->req_prod) )
      {
          barrier();
          current->state = TASK_INTERRUPTIBLE;
@@ -406,55 +410,12 @@ static struct net_device_stats *network_get_stats(struct net_device *dev)
  }
  
  
-static void netif_bringup_phase1(void *unused)
+static void netif_status_change(netif_fe_interface_status_changed_t *status)
  {
      ctrl_msg_t                   cmsg;
      netif_fe_interface_connect_t up;
      struct net_device *dev;
      struct net_private *np;
-
-    dev = find_dev_by_handle(0);
-    np  = dev->priv;
-    
-    /* Move from CLOSED to DISCONNECTED state. */
-    np->tx = (netif_tx_interface_t *)__get_free_page(GFP_KERNEL);
-    np->rx = (netif_rx_interface_t *)__get_free_page(GFP_KERNEL);
-    memset(np->tx, 0, PAGE_SIZE);
-    memset(np->rx, 0, PAGE_SIZE);
-    np->state  = NETIF_STATE_DISCONNECTED;
-
-    /* Construct an interface-CONNECT message for the domain controller. */
-    cmsg.type      = CMSG_NETIF_FE;
-    cmsg.subtype   = CMSG_NETIF_FE_INTERFACE_CONNECT;
-    cmsg.length    = sizeof(netif_fe_interface_connect_t);
-    up.handle      = 0;
-    up.tx_shmem_frame = virt_to_machine(np->tx) >> PAGE_SHIFT;
-    up.rx_shmem_frame = virt_to_machine(np->rx) >> PAGE_SHIFT;
-    memcpy(cmsg.msg, &up, sizeof(up));
-
-    /* Tell the controller to bring up the interface. */
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
-static void netif_bringup_phase2(void *unused)
-{
-    struct net_device *dev;
-    struct net_private *np;
-
-    dev = find_dev_by_handle(0);
-    np  = dev->priv;
-    
-    np->irq = bind_evtchn_to_irq(np->evtchn);
-    (void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, 
-                      "netif", dev);
-
-    np->state = NETIF_STATE_CONNECTED;
-}
-
-static void netif_status_change(netif_fe_interface_status_changed_t *status)
-{
-    struct net_device *dev;
-    struct net_private *np;
      
      if ( status->handle != 0 )
      {
@@ -470,31 +431,53 @@ static void netif_status_change(netif_fe_interface_status_changed_t *status)
      {
      case NETIF_INTERFACE_STATUS_DESTROYED:
          printk(KERN_WARNING "Unexpected netif-DESTROYED message in state %d\n",
-               netif_state);
+               np->state);
          break;
  
      case NETIF_INTERFACE_STATUS_DISCONNECTED:
          if ( np->state != NETIF_STATE_CLOSED )
          {
              printk(KERN_WARNING "Unexpected netif-DISCONNECTED message"
-                   " in state %d\n", netif_state);
+                   " in state %d\n", np->state);
              break;
          }
-        netif_statechange_tq.routine = netif_bringup_phase1;
-        schedule_task(&netif_statechange_tq);
+
+        /* Move from CLOSED to DISCONNECTED state. */
+        np->tx = (netif_tx_interface_t *)__get_free_page(GFP_KERNEL);
+        np->rx = (netif_rx_interface_t *)__get_free_page(GFP_KERNEL);
+        memset(np->tx, 0, PAGE_SIZE);
+        memset(np->rx, 0, PAGE_SIZE);
+        np->state  = NETIF_STATE_DISCONNECTED;
+
+        /* Construct an interface-CONNECT message for the domain controller. */
+        cmsg.type      = CMSG_NETIF_FE;
+        cmsg.subtype   = CMSG_NETIF_FE_INTERFACE_CONNECT;
+        cmsg.length    = sizeof(netif_fe_interface_connect_t);
+        up.handle      = 0;
+        up.tx_shmem_frame = virt_to_machine(np->tx) >> PAGE_SHIFT;
+        up.rx_shmem_frame = virt_to_machine(np->rx) >> PAGE_SHIFT;
+        memcpy(cmsg.msg, &up, sizeof(up));
+        
+        /* Tell the controller to bring up the interface. */
+        ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
          break;
  
      case NETIF_INTERFACE_STATUS_CONNECTED:
          if ( np->state == NETIF_STATE_CLOSED )
          {
              printk(KERN_WARNING "Unexpected netif-CONNECTED message"
-                   " in state %d\n", netif_state);
+                   " in state %d\n", np->state);
              break;
          }
-        np->evtchn = status->evtchn;
+
          memcpy(dev->dev_addr, status->mac, ETH_ALEN);
-        netif_statechange_tq.routine = netif_bringup_phase2;
-        schedule_task(&netif_statechange_tq);
+
+        np->evtchn = status->evtchn;
+        np->irq = bind_evtchn_to_irq(np->evtchn);
+        (void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, 
+                      dev->name, dev);
+        
+        np->state = NETIF_STATE_CONNECTED;
          break;
  
      default:
@@ -532,10 +515,13 @@ static int __init init_module(void)
  {
      ctrl_msg_t                       cmsg;
      netif_fe_driver_status_changed_t st;
-    int i, err;
+    int err;
      struct net_device *dev;
      struct net_private *np;
  
+    if ( start_info.flags & SIF_INITDOMAIN )
+        return 0;
+
      INIT_LIST_HEAD(&dev_list);
  
      if ( (dev = alloc_etherdev(sizeof(struct net_private))) == NULL )
@@ -562,7 +548,8 @@ static int __init init_module(void)
      np->dev = dev;
      list_add(&np->list, &dev_list);
  
-    (void)ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx);
+    (void)ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx,
+                                    CALLBACK_IN_BLOCKING_CONTEXT);
  
      /* Send a driver-UP notification to the domain controller. */
      cmsg.type      = CMSG_NETIF_FE;
diff --git a/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c b/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c

index 715f707eb091c3510a6d69ed8e96d6ec07c0633e..19cb9a33260b231b7b4251e8b13c542a862df74d 100644 (file)
--- a/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c
+++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c
@@ -33,8 +33,19 @@ static struct irqaction ctrl_if_irq_action;
  static CONTROL_RING_IDX ctrl_if_tx_resp_cons;
  static CONTROL_RING_IDX ctrl_if_rx_req_cons;
  
-/* Incoming message requests: primary message type -> message handler. */
+/* Incoming message requests. */
+    /* Primary message type -> message handler. */
  static ctrl_msg_handler_t ctrl_if_rxmsg_handler[256];
+    /* Primary message type -> callback in process context? */
+static unsigned long ctrl_if_rxmsg_blocking_context[256/sizeof(unsigned long)];
+    /* Is it late enough during bootstrap to use schedule_task()? */
+static int safe_to_schedule_task;
+    /* Passed to schedule_task(). */
+static struct tq_struct ctrl_if_rxmsg_deferred_tq;
+    /* Queue up messages to be handled in process context. */
+static ctrl_msg_t ctrl_if_rxmsg_deferred[CONTROL_RING_SIZE];
+static CONTROL_RING_IDX ctrl_if_rxmsg_deferred_prod;
+static CONTROL_RING_IDX ctrl_if_rxmsg_deferred_cons;
  
  /* Incoming message responses: message identifier -> message handler/id. */
  static struct {
@@ -99,22 +110,40 @@ static void __ctrl_if_tx_tasklet(unsigned long data)
      }
  }
  
+static void __ctrl_if_rxmsg_deferred(void *unused)
+{
+    ctrl_msg_t *msg;
+
+    while ( ctrl_if_rxmsg_deferred_cons != ctrl_if_rxmsg_deferred_prod )
+    {
+        msg = &ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX(
+            ctrl_if_rxmsg_deferred_cons++)];
+        (*ctrl_if_rxmsg_handler[msg->type])(msg, 0);
+    }
+}
+
  static void __ctrl_if_rx_tasklet(unsigned long data)
  {
      control_if_t *ctrl_if = get_ctrl_if();
-    ctrl_msg_t   *msg;
+    ctrl_msg_t    msg, *pmsg;
  
      while ( ctrl_if_rx_req_cons != ctrl_if->rx_req_prod )
      {
-        /*
-         * We need no locking or barriers here. There will be one and only one
-         * response as a result of each callback, so the callback handler
-         * doesn't need to worry about the 'msg' being overwritten until:
-         *  1. It returns (if the message must persist then it must be copied).
-         *  2. A response is sent (the response may overwrite the request).
-         */
-        msg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if_rx_req_cons++)];
-        (*ctrl_if_rxmsg_handler[msg->type])(msg, 0);
+        pmsg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if_rx_req_cons++)];
+        memcpy(&msg, pmsg, offsetof(ctrl_msg_t, msg));
+        if ( msg.length != 0 )
+            memcpy(msg.msg, pmsg->msg, msg.length);
+        if ( test_bit(msg.type, &ctrl_if_rxmsg_blocking_context) )
+        {
+            pmsg = &ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX(
+                ctrl_if_rxmsg_deferred_prod++)];
+            memcpy(pmsg, &msg, offsetof(ctrl_msg_t, msg) + msg.length);
+            schedule_task(&ctrl_if_rxmsg_deferred_tq);
+        }
+        else
+        {
+            (*ctrl_if_rxmsg_handler[msg.type])(&msg, 0);
+        }
      }
  }
  
@@ -243,22 +272,36 @@ void ctrl_if_send_response(ctrl_msg_t *msg)
      ctrl_if_notify_controller();
  }
  
-int ctrl_if_register_receiver(u8 type, ctrl_msg_handler_t hnd)
+int ctrl_if_register_receiver(
+    u8 type, 
+    ctrl_msg_handler_t hnd, 
+    unsigned int flags)
  {
-    unsigned long flags;
+    unsigned long _flags;
      int inuse;
  
-    spin_lock_irqsave(&ctrl_if_lock, flags);
+    spin_lock_irqsave(&ctrl_if_lock, _flags);
  
      inuse = (ctrl_if_rxmsg_handler[type] != ctrl_if_rxmsg_default_handler);
  
      if ( inuse )
+    {
          printk(KERN_INFO "Receiver %p already established for control "
                 "messages of type %d.\n", ctrl_if_rxmsg_handler[type], type);
+    }
      else
+    {
          ctrl_if_rxmsg_handler[type] = hnd;
+        clear_bit(type, &ctrl_if_rxmsg_blocking_context);
+        if ( flags == CALLBACK_IN_BLOCKING_CONTEXT )
+        {
+            set_bit(type, &ctrl_if_rxmsg_blocking_context);
+            if ( !safe_to_schedule_task )
+                BUG();
+        }
+    }
  
-    spin_unlock_irqrestore(&ctrl_if_lock, flags);
+    spin_unlock_irqrestore(&ctrl_if_lock, _flags);
  
      return !inuse;
  }
@@ -326,6 +369,7 @@ void __init ctrl_if_init(void)
  
      for ( i = 0; i < 256; i++ )
          ctrl_if_rxmsg_handler[i] = ctrl_if_rxmsg_default_handler;
+    ctrl_if_rxmsg_deferred_tq.routine = __ctrl_if_rxmsg_deferred;
  
      spin_lock_init(&ctrl_if_lock);
  
@@ -333,6 +377,15 @@ void __init ctrl_if_init(void)
  }
  
  
+/* This is called after it is safe to call schedule_task(). */
+static int __init ctrl_if_late_setup(void)
+{
+    safe_to_schedule_task = 1;
+    return 0;
+}
+__initcall(ctrl_if_late_setup);
+
+
  /*
   * !! The following are DANGEROUS FUNCTIONS !!
   * Use with care [for example, see xencons_force_flush()].
diff --git a/xenolinux-2.4.26-sparse/drivers/block/ll_rw_blk.c b/xenolinux-2.4.26-sparse/drivers/block/ll_rw_blk.c

index 20a934adddc2d7a08876628002a3fed6cfdcfc0a..d219c284030b8ae3636f8460f3fd4c267718f9a4 100644 (file)
--- a/xenolinux-2.4.26-sparse/drivers/block/ll_rw_blk.c
+++ b/xenolinux-2.4.26-sparse/drivers/block/ll_rw_blk.c
@@ -1626,7 +1626,7 @@ int __init blk_dev_init(void)
         jsfd_init();
  #endif
  
-#ifdef CONFIG_XEN_VBD
+#if defined(CONFIG_XEN_VBD) || defined(CONFIG_XEN_NEWIO)
      xlblk_init();
  #endif
  
diff --git a/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h b/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h

index a02e2471ea7d53d3933e3f3ebaf7afc77cd7f4fe..5bc6cc22b12bb4fb48184e5eb3ecb0969660ee70 100644 (file)
--- a/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h
+++ b/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h
@@ -80,8 +80,14 @@ void ctrl_if_send_response(ctrl_msg_t *msg);
   * Register a receiver for typed messages from the domain controller. The 
   * handler (@hnd) is called for every received message of specified @type.
   * Returns TRUE (non-zero) if the handler was successfully registered.
+ * If CALLBACK_IN_BLOCKING CONTEXT is specified in @flags then callbacks will
+ * occur in a context in which it is safe to yield (i.e., process context).
   */
-int ctrl_if_register_receiver(u8 type, ctrl_msg_handler_t hnd);
+#define CALLBACK_IN_BLOCKING_CONTEXT 1
+int ctrl_if_register_receiver(
+    u8 type, 
+    ctrl_msg_handler_t hnd,
+    unsigned int flags);
  
  /*
   * Unregister a receiver for typed messages from the domain controller. The 
diff --git a/xenolinux-2.4.26-sparse/include/asm-xen/io.h b/xenolinux-2.4.26-sparse/include/asm-xen/io.h

index f5243bb6a7761be4ae1b454b20ddb201980ca7ed..5ab5fe9bfcfbf7f336ac5efd39ecc387b770474f 100644 (file)
--- a/xenolinux-2.4.26-sparse/include/asm-xen/io.h
+++ b/xenolinux-2.4.26-sparse/include/asm-xen/io.h
@@ -159,46 +159,11 @@ extern void iounmap(void *addr);
  extern void *bt_ioremap(unsigned long offset, unsigned long size);
  extern void bt_iounmap(void *addr, unsigned long size);
  
-#ifdef CONFIG_XEN_PHYSDEV_ACCESS
-
-#ifdef CONFIG_HIGHMEM
-#error "Highmem is not yet compatible with physical device access"
-#endif
-
-/*
- * The bus translation macros need special care if we are executing device
- * accesses to/from other domains' memory. In these cases the virtual address
- * is actually a temporary mapping in the 'vmalloc' space. The physical
- * address will therefore be >max_low_pfn, and will not have a valid entry
- * in the phys_to_mach mapping table.
- */
-static inline unsigned long phys_to_bus(unsigned long phys)
-{
-    extern unsigned long max_pfn;
-    pgd_t *pgd; pmd_t *pmd; pte_t *pte;
-    void *addr;
-    unsigned long bus;
-    if ( (phys >> PAGE_SHIFT) < max_pfn )
-        return phys_to_machine(phys);
-    addr = phys_to_virt(phys);
-    pgd = pgd_offset_k(   (unsigned long)addr);
-    pmd = pmd_offset(pgd, (unsigned long)addr);
-    pte = pte_offset(pmd, (unsigned long)addr);
-    bus = (pte->pte_low & PAGE_MASK) | (phys & ~PAGE_MASK);
-    return bus;
-}
-
-#define virt_to_bus(_x) phys_to_bus(virt_to_phys(_x))
-#define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x))
-#define page_to_bus(_x) phys_to_bus(page_to_phys(_x))
-
-#else
-
  #define virt_to_bus(_x) phys_to_machine(virt_to_phys(_x))
  #define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x))
  #define page_to_bus(_x) phys_to_machine(page_to_phys(_x))
-
-#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
+#define bus_to_phys(_x) machine_to_phys(_x)
+#define bus_to_page(_x) (mem_map + (bus_to_phys(_x) >> PAGE_SHIFT))
  
  /*
   * readX/writeX() are used to access memory mapped devices. On some
diff --git a/xenolinux-2.4.26-sparse/include/asm-xen/pci.h b/xenolinux-2.4.26-sparse/include/asm-xen/pci.h

new file mode 100644 (file)

index 0000000..74ae5ba
--- /dev/null
+++ b/xenolinux-2.4.26-sparse/include/asm-xen/pci.h
@@ -0,0 +1,283 @@
+#ifndef __i386_PCI_H
+#define __i386_PCI_H
+
+#include <linux/config.h>
+
+#ifdef __KERNEL__
+
+/* Can be used to override the logic in pci_scan_bus for skipping
+   already-configured bus numbers - to be used for buggy BIOSes
+   or architectures with incomplete PCI setup by the loader */
+
+#ifdef CONFIG_PCI
+extern unsigned int pcibios_assign_all_busses(void);
+#else
+#define pcibios_assign_all_busses()    0
+#endif
+#define pcibios_scan_all_fns()         0
+
+extern unsigned long pci_mem_start;
+#define PCIBIOS_MIN_IO         0x1000
+#define PCIBIOS_MIN_MEM                (pci_mem_start)
+
+void pcibios_config_init(void);
+struct pci_bus * pcibios_scan_root(int bus);
+extern int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value);
+extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value);
+
+void pcibios_set_master(struct pci_dev *dev);
+void pcibios_penalize_isa_irq(int irq);
+struct irq_routing_table *pcibios_get_irq_routing_table(void);
+int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
+
+/* Dynamic DMA mapping stuff.
+ * i386 has everything mapped statically.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/scatterlist.h>
+#include <linux/string.h>
+#include <asm/io.h>
+
+struct pci_dev;
+
+/* The networking and block device layers use this boolean for bounce
+ * buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS    (0)
+
+/* Allocate and map kernel buffer using consistent mode DMA for a device.
+ * hwdev should be valid struct pci_dev pointer for PCI devices,
+ * NULL for PCI-like buses (ISA, EISA).
+ * Returns non-NULL cpu-view pointer to the buffer if successful and
+ * sets *dma_addrp to the pci side dma address as well, else *dma_addrp
+ * is undefined.
+ */
+extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
+                                 dma_addr_t *dma_handle);
+
+/* Free and unmap a consistent DMA buffer.
+ * cpu_addr is what was returned from pci_alloc_consistent,
+ * size must be the same as what as passed into pci_alloc_consistent,
+ * and likewise dma_addr must be the same as what *dma_addrp was set to.
+ *
+ * References to the memory and mappings associated with cpu_addr/dma_addr
+ * past this call are illegal.
+ */
+extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
+                               void *vaddr, dma_addr_t dma_handle);
+
+/* Map a single buffer of the indicated size for DMA in streaming mode.
+ * The 32-bit bus address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory
+ * until either pci_unmap_single or pci_dma_sync_single is performed.
+ */
+static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
+                                       size_t size, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               out_of_line_bug();
+       flush_write_buffers();
+       return virt_to_bus(ptr);
+}
+
+/* Unmap a single streaming mode DMA translation.  The dma_addr and size
+ * must match what was provided for in a previous pci_map_single call.  All
+ * other usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guarenteed to see
+ * whatever the device wrote there.
+ */
+static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
+                                   size_t size, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               out_of_line_bug();
+       /* Nothing to do */
+}
+
+/*
+ * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical
+ * to pci_map_single, but takes a struct page instead of a virtual address
+ */
+static inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page,
+                                     unsigned long offset, size_t size, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               out_of_line_bug();
+
+       return page_to_bus(page) + offset;
+}
+
+static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address,
+                                 size_t size, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               out_of_line_bug();
+       /* Nothing to do */
+}
+
+/* pci_unmap_{page,single} is a nop so... */
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
+#define pci_unmap_addr(PTR, ADDR_NAME)         (0)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)        do { } while (0)
+#define pci_unmap_len(PTR, LEN_NAME)           (0)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL)  do { } while (0)
+
+/* Map a set of buffers described by scatterlist in streaming
+ * mode for DMA.  This is the scather-gather version of the
+ * above pci_map_single interface.  Here the scatter gather list
+ * elements are each tagged with the appropriate dma address
+ * and length.  They are obtained via sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ *       DMA address/length pairs than there are SG table elements.
+ *       (for example via virtual mapping capabilities)
+ *       The routine returns the number of addr/length pairs actually
+ *       used, at most nents.
+ *
+ * Device ownership issues as mentioned above for pci_map_single are
+ * the same here.
+ */
+static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
+                            int nents, int direction)
+{
+       int i;
+
+       if (direction == PCI_DMA_NONE)
+               out_of_line_bug();
+ 
+       /*
+        * temporary 2.4 hack
+        */
+       for (i = 0; i < nents; i++ ) {
+               if (sg[i].address && sg[i].page)
+                       out_of_line_bug();
+               else if (!sg[i].address && !sg[i].page)
+                       out_of_line_bug();
+ 
+               if (sg[i].address)
+                       sg[i].dma_address = virt_to_bus(sg[i].address);
+               else
+                       sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset;
+       }
+ 
+       flush_write_buffers();
+       return nents;
+}
+
+/* Unmap a set of streaming mode DMA translations.
+ * Again, cpu read rules concerning calls here are the same as for
+ * pci_unmap_single() above.
+ */
+static inline void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
+                               int nents, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               out_of_line_bug();
+       /* Nothing to do */
+}
+
+/* Make physical memory consistent for a single
+ * streaming mode DMA translation after a transfer.
+ *
+ * If you perform a pci_map_single() but wish to interrogate the
+ * buffer using the cpu, yet do not wish to teardown the PCI dma
+ * mapping, you must call this function before doing so.  At the
+ * next point you give the PCI dma address back to the card, the
+ * device again owns the buffer.
+ */
+static inline void pci_dma_sync_single(struct pci_dev *hwdev,
+                                      dma_addr_t dma_handle,
+                                      size_t size, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               out_of_line_bug();
+       flush_write_buffers();
+}
+
+/* Make physical memory consistent for a set of streaming
+ * mode DMA translations after a transfer.
+ *
+ * The same as pci_dma_sync_single but for a scatter-gather list,
+ * same rules and usage.
+ */
+static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
+                                  struct scatterlist *sg,
+                                  int nelems, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               out_of_line_bug();
+       flush_write_buffers();
+}
+
+/* Return whether the given PCI device DMA address mask can
+ * be supported properly.  For example, if your device can
+ * only drive the low 24-bits during PCI bus mastering, then
+ * you would pass 0x00ffffff as the mask to this function.
+ */
+static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
+{
+        /*
+         * we fall back to GFP_DMA when the mask isn't all 1s,
+         * so we can't guarantee allocations that must be
+         * within a tighter range than GFP_DMA..
+         */
+        if(mask < 0x00ffffff)
+                return 0;
+
+       return 1;
+}
+
+/* This is always fine. */
+#define pci_dac_dma_supported(pci_dev, mask)   (1)
+
+static __inline__ dma64_addr_t
+pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction)
+{
+       return ((dma64_addr_t) page_to_bus(page) +
+               (dma64_addr_t) offset);
+}
+
+static __inline__ struct page *
+pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
+{
+       return bus_to_page(dma_addr);
+}
+
+static __inline__ unsigned long
+pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr)
+{
+       return (dma_addr & ~PAGE_MASK);
+}
+
+static __inline__ void
+pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
+{
+       flush_write_buffers();
+}
+
+/* These macros should be used after a pci_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns.
+ */
+#define sg_dma_address(sg)     ((sg)->dma_address)
+#define sg_dma_len(sg)         ((sg)->length)
+
+/* Return the index of the PCI controller for device. */
+static inline int pci_controller_num(struct pci_dev *dev)
+{
+       return 0;
+}
+
+#define HAVE_PCI_MMAP
+extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+                              enum pci_mmap_state mmap_state, int write_combine);
+
+#endif /* __KERNEL__ */
+
+#endif /* __i386_PCI_H */
diff --git a/xenolinux-2.4.26-sparse/mkbuildtree b/xenolinux-2.4.26-sparse/mkbuildtree

index 46fe4784ad86970bc44616d9b20f4f0f956d08bd..2e9f7b992099df0c10ab6500dcb26d3247da7366 100755 (executable)
--- a/xenolinux-2.4.26-sparse/mkbuildtree
+++ b/xenolinux-2.4.26-sparse/mkbuildtree
@@ -163,7 +163,6 @@ ln -sf ../asm-i386/mtrr.h
  ln -sf ../asm-i386/namei.h 
  ln -sf ../asm-i386/param.h 
  ln -sf ../asm-i386/parport.h 
-ln -sf ../asm-i386/pci.h
  ln -sf ../asm-i386/pgtable-3level.h 
  ln -sf ../asm-i386/poll.h 
  ln -sf ../asm-i386/posix_types.h 
diff --git a/xenolinux-2.4.26-sparse/mm/page_alloc.c b/xenolinux-2.4.26-sparse/mm/page_alloc.c

new file mode 100644 (file)

index 0000000..62ed775
--- /dev/null
+++ b/xenolinux-2.4.26-sparse/mm/page_alloc.c
@@ -0,0 +1,930 @@
+/*
+ *  linux/mm/page_alloc.c
+ *
+ *  Manages the free list, the system allocates free pages here.
+ *  Note that kmalloc() lives in slab.c
+ *
+ *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *  Swap reorganised 29.12.95, Stephen Tweedie
+ *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ *  Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999
+ *  Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
+ *  Zone balancing, Kanoj Sarcar, SGI, Jan 2000
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/swapctl.h>
+#include <linux/interrupt.h>
+#include <linux/pagemap.h>
+#include <linux/bootmem.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+int nr_swap_pages;
+int nr_active_pages;
+int nr_inactive_pages;
+LIST_HEAD(inactive_list);
+LIST_HEAD(active_list);
+pg_data_t *pgdat_list;
+
+/*
+ *
+ * The zone_table array is used to look up the address of the
+ * struct zone corresponding to a given zone number (ZONE_DMA,
+ * ZONE_NORMAL, or ZONE_HIGHMEM).
+ */
+zone_t *zone_table[MAX_NR_ZONES*MAX_NR_NODES];
+EXPORT_SYMBOL(zone_table);
+
+static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
+static int zone_balance_ratio[MAX_NR_ZONES] __initdata = { 128, 128, 128, };
+static int zone_balance_min[MAX_NR_ZONES] __initdata = { 20 , 20, 20, };
+static int zone_balance_max[MAX_NR_ZONES] __initdata = { 255 , 255, 255, };
+static int lower_zone_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 };
+
+int vm_gfp_debug = 0;
+
+/*
+ * Temporary debugging check.
+ */
+#define BAD_RANGE(zone, page)                                          \
+(                                                                      \
+       (((page) - mem_map) >= ((zone)->zone_start_mapnr+(zone)->size)) \
+       || (((page) - mem_map) < (zone)->zone_start_mapnr)              \
+       || ((zone) != page_zone(page))                                  \
+)
+
+/*
+ * Freeing function for a buddy system allocator.
+ * Contrary to prior comments, this is *NOT* hairy, and there
+ * is no reason for anyone not to understand it.
+ *
+ * The concept of a buddy system is to maintain direct-mapped tables
+ * (containing bit values) for memory blocks of various "orders".
+ * The bottom level table contains the map for the smallest allocatable
+ * units of memory (here, pages), and each level above it describes
+ * pairs of units from the levels below, hence, "buddies".
+ * At a high level, all that happens here is marking the table entry
+ * at the bottom level available, and propagating the changes upward
+ * as necessary, plus some accounting needed to play nicely with other
+ * parts of the VM system.
+ * At each level, we keep one bit for each pair of blocks, which
+ * is set to 1 iff only one of the pair is allocated.  So when we
+ * are allocating or freeing one, we can derive the state of the
+ * other.  That is, if we allocate a small block, and both were   
+ * free, the remainder of the region must be split into blocks.   
+ * If a block is freed, and its buddy is also free, then this
+ * triggers coalescing into a block of larger size.            
+ *
+ * -- wli
+ */
+
+static void FASTCALL(__free_pages_ok (struct page *page, unsigned int order));
+static void __free_pages_ok (struct page *page, unsigned int order)
+{
+       unsigned long index, page_idx, mask, flags;
+       free_area_t *area;
+       struct page *base;
+       zone_t *zone;
+
+       /*
+        * Yes, think what happens when other parts of the kernel take 
+        * a reference to a page in order to pin it for io. -ben
+        */
+       if (PageLRU(page)) {
+               if (unlikely(in_interrupt()))
+                       BUG();
+               lru_cache_del(page);
+       }
+
+       if (page->buffers)
+               BUG();
+       if (page->mapping)
+               return (*(void(*)(struct page *))page->mapping)(page);
+       if (!VALID_PAGE(page))
+               BUG();
+       if (PageLocked(page))
+               BUG();
+       if (PageActive(page))
+               BUG();
+       ClearPageReferenced(page);
+       ClearPageDirty(page);
+
+       if (current->flags & PF_FREE_PAGES)
+               goto local_freelist;
+ back_local_freelist:
+
+       zone = page_zone(page);
+
+       mask = (~0UL) << order;
+       base = zone->zone_mem_map;
+       page_idx = page - base;
+       if (page_idx & ~mask)
+               BUG();
+       index = page_idx >> (1 + order);
+
+       area = zone->free_area + order;
+
+       spin_lock_irqsave(&zone->lock, flags);
+
+       zone->free_pages -= mask;
+
+       while (mask + (1 << (MAX_ORDER-1))) {
+               struct page *buddy1, *buddy2;
+
+               if (area >= zone->free_area + MAX_ORDER)
+                       BUG();
+               if (!__test_and_change_bit(index, area->map))
+                       /*
+                        * the buddy page is still allocated.
+                        */
+                       break;
+               /*
+                * Move the buddy up one level.
+                * This code is taking advantage of the identity:
+                *      -mask = 1+~mask
+                */
+               buddy1 = base + (page_idx ^ -mask);
+               buddy2 = base + page_idx;
+               if (BAD_RANGE(zone,buddy1))
+                       BUG();
+               if (BAD_RANGE(zone,buddy2))
+                       BUG();
+
+               list_del(&buddy1->list);
+               mask <<= 1;
+               area++;
+               index >>= 1;
+               page_idx &= mask;
+       }
+       list_add(&(base + page_idx)->list, &area->free_list);
+
+       spin_unlock_irqrestore(&zone->lock, flags);
+       return;
+
+ local_freelist:
+       if (current->nr_local_pages)
+               goto back_local_freelist;
+       if (in_interrupt())
+               goto back_local_freelist;               
+
+       list_add(&page->list, &current->local_pages);
+       page->index = order;
+       current->nr_local_pages++;
+}
+
+#define MARK_USED(index, order, area) \
+       __change_bit((index) >> (1+(order)), (area)->map)
+
+static inline struct page * expand (zone_t *zone, struct page *page,
+        unsigned long index, int low, int high, free_area_t * area)
+{
+       unsigned long size = 1 << high;
+
+       while (high > low) {
+               if (BAD_RANGE(zone,page))
+                       BUG();
+               area--;
+               high--;
+               size >>= 1;
+               list_add(&(page)->list, &(area)->free_list);
+               MARK_USED(index, high, area);
+               index += size;
+               page += size;
+       }
+       if (BAD_RANGE(zone,page))
+               BUG();
+       return page;
+}
+
+static FASTCALL(struct page * rmqueue(zone_t *zone, unsigned int order));
+static struct page * rmqueue(zone_t *zone, unsigned int order)
+{
+       free_area_t * area = zone->free_area + order;
+       unsigned int curr_order = order;
+       struct list_head *head, *curr;
+       unsigned long flags;
+       struct page *page;
+
+       spin_lock_irqsave(&zone->lock, flags);
+       do {
+               head = &area->free_list;
+               curr = head->next;
+
+               if (curr != head) {
+                       unsigned int index;
+
+                       page = list_entry(curr, struct page, list);
+                       if (BAD_RANGE(zone,page))
+                               BUG();
+                       list_del(curr);
+                       index = page - zone->zone_mem_map;
+                       if (curr_order != MAX_ORDER-1)
+                               MARK_USED(index, curr_order, area);
+                       zone->free_pages -= 1UL << order;
+
+                       page = expand(zone, page, index, order, curr_order, area);
+                       spin_unlock_irqrestore(&zone->lock, flags);
+
+                       set_page_count(page, 1);
+                       if (BAD_RANGE(zone,page))
+                               BUG();
+                       if (PageLRU(page))
+                               BUG();
+                       if (PageActive(page))
+                               BUG();
+                       return page;    
+               }
+               curr_order++;
+               area++;
+       } while (curr_order < MAX_ORDER);
+       spin_unlock_irqrestore(&zone->lock, flags);
+
+       return NULL;
+}
+
+#ifndef CONFIG_DISCONTIGMEM
+struct page *_alloc_pages(unsigned int gfp_mask, unsigned int order)
+{
+       return __alloc_pages(gfp_mask, order,
+               contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK));
+}
+#endif
+
+static struct page * FASTCALL(balance_classzone(zone_t *, unsigned int, unsigned int, int *));
+static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask, unsigned int order, int * freed)
+{
+       struct page * page = NULL;
+       int __freed;
+
+       if (in_interrupt())
+               BUG();
+
+       current->allocation_order = order;
+       current->flags |= PF_MEMALLOC | PF_FREE_PAGES;
+
+       __freed = try_to_free_pages_zone(classzone, gfp_mask);
+
+       current->flags &= ~(PF_MEMALLOC | PF_FREE_PAGES);
+
+       if (current->nr_local_pages) {
+               struct list_head * entry, * local_pages;
+               struct page * tmp;
+               int nr_pages;
+
+               local_pages = &current->local_pages;
+
+               if (likely(__freed)) {
+                       /* pick from the last inserted so we're lifo */
+                       entry = local_pages->next;
+                       do {
+                               tmp = list_entry(entry, struct page, list);
+                               if (tmp->index == order && memclass(page_zone(tmp), classzone)) {
+                                       list_del(entry);
+                                       current->nr_local_pages--;
+                                       set_page_count(tmp, 1);
+                                       page = tmp;
+
+                                       if (page->buffers)
+                                               BUG();
+                                       if (page->mapping)
+                                               BUG();
+                                       if (!VALID_PAGE(page))
+                                               BUG();
+                                       if (PageLocked(page))
+                                               BUG();
+                                       if (PageLRU(page))
+                                               BUG();
+                                       if (PageActive(page))
+                                               BUG();
+                                       if (PageDirty(page))
+                                               BUG();
+
+                                       break;
+                               }
+                       } while ((entry = entry->next) != local_pages);
+               }
+
+               nr_pages = current->nr_local_pages;
+               /* free in reverse order so that the global order will be lifo */
+               while ((entry = local_pages->prev) != local_pages) {
+                       list_del(entry);
+                       tmp = list_entry(entry, struct page, list);
+                       __free_pages_ok(tmp, tmp->index);
+                       if (!nr_pages--)
+                               BUG();
+               }
+               current->nr_local_pages = 0;
+       }
+
+       *freed = __freed;
+       return page;
+}
+
+static inline unsigned long zone_free_pages(zone_t * zone, unsigned int order)
+{
+       long free = zone->free_pages - (1UL << order);
+       return free >= 0 ? free : 0;
+}
+
+/*
+ * This is the 'heart' of the zoned buddy allocator:
+ */
+struct page * __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)
+{
+       zone_t **zone, * classzone;
+       struct page * page;
+       int freed, class_idx;
+
+       zone = zonelist->zones;
+       classzone = *zone;
+       class_idx = zone_idx(classzone);
+
+       for (;;) {
+               zone_t *z = *(zone++);
+               if (!z)
+                       break;
+
+               if (zone_free_pages(z, order) > z->watermarks[class_idx].low) {
+                       page = rmqueue(z, order);
+                       if (page)
+                               return page;
+               }
+       }
+
+       classzone->need_balance = 1;
+       mb();
+       if (waitqueue_active(&kswapd_wait))
+               wake_up_interruptible(&kswapd_wait);
+
+       zone = zonelist->zones;
+       for (;;) {
+               unsigned long min;
+               zone_t *z = *(zone++);
+               if (!z)
+                       break;
+
+               min = z->watermarks[class_idx].min;
+               if (!(gfp_mask & __GFP_WAIT))
+                       min >>= 2;
+               if (zone_free_pages(z, order) > min) {
+                       page = rmqueue(z, order);
+                       if (page)
+                               return page;
+               }
+       }
+
+       /* here we're in the low on memory slow path */
+
+       if ((current->flags & PF_MEMALLOC) && 
+                       (!in_interrupt() || (current->flags & PF_MEMDIE))) {
+               zone = zonelist->zones;
+               for (;;) {
+                       zone_t *z = *(zone++);
+                       if (!z)
+                               break;
+
+                       page = rmqueue(z, order);
+                       if (page)
+                               return page;
+               }
+               return NULL;
+       }
+
+       /* Atomic allocations - we can't balance anything */
+       if (!(gfp_mask & __GFP_WAIT))
+               goto out;
+
+ rebalance:
+       page = balance_classzone(classzone, gfp_mask, order, &freed);
+       if (page)
+               return page;
+
+       zone = zonelist->zones;
+       if (likely(freed)) {
+               for (;;) {
+                       zone_t *z = *(zone++);
+                       if (!z)
+                               break;
+
+                       if (zone_free_pages(z, order) > z->watermarks[class_idx].min) {
+                               page = rmqueue(z, order);
+                               if (page)
+                                       return page;
+                       }
+               }
+               goto rebalance;
+       } else {
+               /* 
+                * Check that no other task is been killed meanwhile,
+                * in such a case we can succeed the allocation.
+                */
+               for (;;) {
+                       zone_t *z = *(zone++);
+                       if (!z)
+                               break;
+
+                       if (zone_free_pages(z, order) > z->watermarks[class_idx].high) {
+                               page = rmqueue(z, order);
+                               if (page)
+                                       return page;
+                       }
+               }
+       }
+
+ out:
+       printk(KERN_NOTICE "__alloc_pages: %u-order allocation failed (gfp=0x%x/%i)\n",
+              order, gfp_mask, !!(current->flags & PF_MEMALLOC));
+       if (unlikely(vm_gfp_debug))
+               dump_stack();
+       return NULL;
+}
+
+/*
+ * Common helper functions.
+ */
+unsigned long __get_free_pages(unsigned int gfp_mask, unsigned int order)
+{
+       struct page * page;
+
+       page = alloc_pages(gfp_mask, order);
+       if (!page)
+               return 0;
+       return (unsigned long) page_address(page);
+}
+
+unsigned long get_zeroed_page(unsigned int gfp_mask)
+{
+       struct page * page;
+
+       page = alloc_pages(gfp_mask, 0);
+       if (page) {
+               void *address = page_address(page);
+               clear_page(address);
+               return (unsigned long) address;
+       }
+       return 0;
+}
+
+void __free_pages(struct page *page, unsigned int order)
+{
+       if (!PageReserved(page) && put_page_testzero(page))
+               __free_pages_ok(page, order);
+}
+
+void free_pages(unsigned long addr, unsigned int order)
+{
+       if (addr != 0)
+               __free_pages(virt_to_page(addr), order);
+}
+
+/*
+ * Total amount of free (allocatable) RAM:
+ */
+unsigned int nr_free_pages (void)
+{
+       unsigned int sum = 0;
+       zone_t *zone;
+
+       for_each_zone(zone)
+               sum += zone->free_pages;
+
+       return sum;
+}
+
+/*
+ * Amount of free RAM allocatable as buffer memory:
+ */
+unsigned int nr_free_buffer_pages (void)
+{
+       pg_data_t *pgdat;
+       unsigned int sum = 0;
+       zonelist_t *zonelist;
+       zone_t **zonep, *zone;
+
+       for_each_pgdat(pgdat) {
+               int class_idx;
+               zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK);
+               zonep = zonelist->zones;
+               zone = *zonep;
+               class_idx = zone_idx(zone);
+
+               sum += zone->nr_cache_pages;
+               for (zone = pgdat->node_zones; zone < pgdat->node_zones + MAX_NR_ZONES; zone++) {
+                       int free = zone->free_pages - zone->watermarks[class_idx].high;
+                       if (free <= 0)
+                               continue;
+                       sum += free;
+               }
+       }
+
+       return sum;
+}
+
+#if CONFIG_HIGHMEM
+unsigned int nr_free_highpages (void)
+{
+       pg_data_t *pgdat;
+       unsigned int pages = 0;
+
+       for_each_pgdat(pgdat)
+               pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
+
+       return pages;
+}
+
+unsigned int freeable_lowmem(void)
+{
+       unsigned int pages = 0;
+       pg_data_t *pgdat;
+
+       for_each_pgdat(pgdat) {
+               pages += pgdat->node_zones[ZONE_DMA].free_pages;
+               pages += pgdat->node_zones[ZONE_DMA].nr_active_pages;
+               pages += pgdat->node_zones[ZONE_DMA].nr_inactive_pages;
+               pages += pgdat->node_zones[ZONE_NORMAL].free_pages;
+               pages += pgdat->node_zones[ZONE_NORMAL].nr_active_pages;
+               pages += pgdat->node_zones[ZONE_NORMAL].nr_inactive_pages;
+       }
+
+       return pages;
+}
+#endif
+
+#define K(x) ((x) << (PAGE_SHIFT-10))
+
+/*
+ * Show free area list (used inside shift_scroll-lock stuff)
+ * We also calculate the percentage fragmentation. We do this by counting the
+ * memory on each free list with the exception of the first item on the list.
+ */
+void show_free_areas_core(pg_data_t *pgdat)
+{
+       unsigned int order;
+       unsigned type;
+       pg_data_t *tmpdat = pgdat;
+
+       printk("Free pages:      %6dkB (%6dkB HighMem)\n",
+               K(nr_free_pages()),
+               K(nr_free_highpages()));
+
+       while (tmpdat) {
+               zone_t *zone;
+               for (zone = tmpdat->node_zones;
+                               zone < tmpdat->node_zones + MAX_NR_ZONES; zone++)
+                       printk("Zone:%s freepages:%6lukB\n", 
+                                       zone->name,
+                                       K(zone->free_pages));
+                       
+               tmpdat = tmpdat->node_next;
+       }
+
+       printk("( Active: %d, inactive: %d, free: %d )\n",
+              nr_active_pages,
+              nr_inactive_pages,
+              nr_free_pages());
+
+       for (type = 0; type < MAX_NR_ZONES; type++) {
+               struct list_head *head, *curr;
+               zone_t *zone = pgdat->node_zones + type;
+               unsigned long nr, total, flags;
+
+               total = 0;
+               if (zone->size) {
+                       spin_lock_irqsave(&zone->lock, flags);
+                       for (order = 0; order < MAX_ORDER; order++) {
+                               head = &(zone->free_area + order)->free_list;
+                               curr = head;
+                               nr = 0;
+                               for (;;) {
+                                       if ((curr = curr->next) == head)
+                                               break;
+                                       nr++;
+                               }
+                               total += nr * (1 << order);
+                               printk("%lu*%lukB ", nr, K(1UL) << order);
+                       }
+                       spin_unlock_irqrestore(&zone->lock, flags);
+               }
+               printk("= %lukB)\n", K(total));
+       }
+
+#ifdef SWAP_CACHE_INFO
+       show_swap_cache_info();
+#endif 
+}
+
+void show_free_areas(void)
+{
+       show_free_areas_core(pgdat_list);
+}
+
+/*
+ * Builds allocation fallback zone lists.
+ */
+static inline void build_zonelists(pg_data_t *pgdat)
+{
+       int i, j, k;
+
+       for (i = 0; i <= GFP_ZONEMASK; i++) {
+               zonelist_t *zonelist;
+               zone_t *zone;
+
+               zonelist = pgdat->node_zonelists + i;
+               memset(zonelist, 0, sizeof(*zonelist));
+
+               j = 0;
+               k = ZONE_NORMAL;
+               if (i & __GFP_HIGHMEM)
+                       k = ZONE_HIGHMEM;
+               if (i & __GFP_DMA)
+                       k = ZONE_DMA;
+
+               switch (k) {
+                       default:
+                               BUG();
+                       /*
+                        * fallthrough:
+                        */
+                       case ZONE_HIGHMEM:
+                               zone = pgdat->node_zones + ZONE_HIGHMEM;
+                               if (zone->size) {
+#ifndef CONFIG_HIGHMEM
+                                       BUG();
+#endif
+                                       zonelist->zones[j++] = zone;
+                               }
+                       case ZONE_NORMAL:
+                               zone = pgdat->node_zones + ZONE_NORMAL;
+                               if (zone->size)
+                                       zonelist->zones[j++] = zone;
+                       case ZONE_DMA:
+                               zone = pgdat->node_zones + ZONE_DMA;
+                               if (zone->size)
+                                       zonelist->zones[j++] = zone;
+               }
+               zonelist->zones[j++] = NULL;
+       } 
+}
+
+/*
+ * Helper functions to size the waitqueue hash table.
+ * Essentially these want to choose hash table sizes sufficiently
+ * large so that collisions trying to wait on pages are rare.
+ * But in fact, the number of active page waitqueues on typical
+ * systems is ridiculously low, less than 200. So this is even
+ * conservative, even though it seems large.
+ *
+ * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to
+ * waitqueues, i.e. the size of the waitq table given the number of pages.
+ */
+#define PAGES_PER_WAITQUEUE    256
+
+static inline unsigned long wait_table_size(unsigned long pages)
+{
+       unsigned long size = 1;
+
+       pages /= PAGES_PER_WAITQUEUE;
+
+       while (size < pages)
+               size <<= 1;
+
+       /*
+        * Once we have dozens or even hundreds of threads sleeping
+        * on IO we've got bigger problems than wait queue collision.
+        * Limit the size of the wait table to a reasonable size.
+        */
+       size = min(size, 4096UL);
+
+       return size;
+}
+
+/*
+ * This is an integer logarithm so that shifts can be used later
+ * to extract the more random high bits from the multiplicative
+ * hash function before the remainder is taken.
+ */
+static inline unsigned long wait_table_bits(unsigned long size)
+{
+       return ffz(~size);
+}
+
+#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
+
+/*
+ * Set up the zone data structures:
+ *   - mark all pages reserved
+ *   - mark all memory queues empty
+ *   - clear the memory bitmaps
+ */
+void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap,
+       unsigned long *zones_size, unsigned long zone_start_paddr, 
+       unsigned long *zholes_size, struct page *lmem_map)
+{
+       unsigned long i, j;
+       unsigned long map_size;
+       unsigned long totalpages, offset, realtotalpages;
+       const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1);
+
+       if (zone_start_paddr & ~PAGE_MASK)
+               BUG();
+
+       totalpages = 0;
+       for (i = 0; i < MAX_NR_ZONES; i++) {
+               unsigned long size = zones_size[i];
+               totalpages += size;
+       }
+       realtotalpages = totalpages;
+       if (zholes_size)
+               for (i = 0; i < MAX_NR_ZONES; i++)
+                       realtotalpages -= zholes_size[i];
+                       
+       printk("On node %d totalpages: %lu\n", nid, realtotalpages);
+
+       /*
+        * Some architectures (with lots of mem and discontinous memory
+        * maps) have to search for a good mem_map area:
+        * For discontigmem, the conceptual mem map array starts from 
+        * PAGE_OFFSET, we need to align the actual array onto a mem map 
+        * boundary, so that MAP_NR works.
+        */
+       map_size = (totalpages + 1)*sizeof(struct page);
+       if (lmem_map == (struct page *)0) {
+               lmem_map = (struct page *) alloc_bootmem_node(pgdat, map_size);
+               lmem_map = (struct page *)(PAGE_OFFSET + 
+                       MAP_ALIGN((unsigned long)lmem_map - PAGE_OFFSET));
+       }
+       *gmap = pgdat->node_mem_map = lmem_map;
+       pgdat->node_size = totalpages;
+       pgdat->node_start_paddr = zone_start_paddr;
+       pgdat->node_start_mapnr = (lmem_map - mem_map);
+       pgdat->nr_zones = 0;
+
+       offset = lmem_map - mem_map;    
+       for (j = 0; j < MAX_NR_ZONES; j++) {
+               zone_t *zone = pgdat->node_zones + j;
+               unsigned long mask;
+               unsigned long size, realsize;
+               int idx;
+
+               zone_table[nid * MAX_NR_ZONES + j] = zone;
+               realsize = size = zones_size[j];
+               if (zholes_size)
+                       realsize -= zholes_size[j];
+
+               printk("zone(%lu): %lu pages.\n", j, size);
+               zone->size = size;
+               zone->realsize = realsize;
+               zone->name = zone_names[j];
+               zone->lock = SPIN_LOCK_UNLOCKED;
+               zone->zone_pgdat = pgdat;
+               zone->free_pages = 0;
+               zone->need_balance = 0;
+                zone->nr_active_pages = zone->nr_inactive_pages = 0;
+
+
+               if (!size)
+                       continue;
+
+               /*
+                * The per-page waitqueue mechanism uses hashed waitqueues
+                * per zone.
+                */
+               zone->wait_table_size = wait_table_size(size);
+               zone->wait_table_shift =
+                       BITS_PER_LONG - wait_table_bits(zone->wait_table_size);
+               zone->wait_table = (wait_queue_head_t *)
+                       alloc_bootmem_node(pgdat, zone->wait_table_size
+                                               * sizeof(wait_queue_head_t));
+
+               for(i = 0; i < zone->wait_table_size; ++i)
+                       init_waitqueue_head(zone->wait_table + i);
+
+               pgdat->nr_zones = j+1;
+
+               mask = (realsize / zone_balance_ratio[j]);
+               if (mask < zone_balance_min[j])
+                       mask = zone_balance_min[j];
+               else if (mask > zone_balance_max[j])
+                       mask = zone_balance_max[j];
+               zone->watermarks[j].min = mask;
+               zone->watermarks[j].low = mask*2;
+               zone->watermarks[j].high = mask*3;
+               /* now set the watermarks of the lower zones in the "j" classzone */
+               for (idx = j-1; idx >= 0; idx--) {
+                       zone_t * lower_zone = pgdat->node_zones + idx;
+                       unsigned long lower_zone_reserve;
+                       if (!lower_zone->size)
+                               continue;
+
+                       mask = lower_zone->watermarks[idx].min;
+                       lower_zone->watermarks[j].min = mask;
+                       lower_zone->watermarks[j].low = mask*2;
+                       lower_zone->watermarks[j].high = mask*3;
+
+                       /* now the brainer part */
+                       lower_zone_reserve = realsize / lower_zone_reserve_ratio[idx];
+                       lower_zone->watermarks[j].min += lower_zone_reserve;
+                       lower_zone->watermarks[j].low += lower_zone_reserve;
+                       lower_zone->watermarks[j].high += lower_zone_reserve;
+
+                       realsize += lower_zone->realsize;
+               }
+
+               zone->zone_mem_map = mem_map + offset;
+               zone->zone_start_mapnr = offset;
+               zone->zone_start_paddr = zone_start_paddr;
+
+               if ((zone_start_paddr >> PAGE_SHIFT) & (zone_required_alignment-1))
+                       printk("BUG: wrong zone alignment, it will crash\n");
+
+               /*
+                * Initially all pages are reserved - free ones are freed
+                * up by free_all_bootmem() once the early boot process is
+                * done. Non-atomic initialization, single-pass.
+                */
+               for (i = 0; i < size; i++) {
+                       struct page *page = mem_map + offset + i;
+                       set_page_zone(page, nid * MAX_NR_ZONES + j);
+                       set_page_count(page, 0);
+                       SetPageReserved(page);
+                       INIT_LIST_HEAD(&page->list);
+                       if (j != ZONE_HIGHMEM)
+                               set_page_address(page, __va(zone_start_paddr));
+                       zone_start_paddr += PAGE_SIZE;
+               }
+
+               offset += size;
+               for (i = 0; ; i++) {
+                       unsigned long bitmap_size;
+
+                       INIT_LIST_HEAD(&zone->free_area[i].free_list);
+                       if (i == MAX_ORDER-1) {
+                               zone->free_area[i].map = NULL;
+                               break;
+                       }
+
+                       /*
+                        * Page buddy system uses "index >> (i+1)",
+                        * where "index" is at most "size-1".
+                        *
+                        * The extra "+3" is to round down to byte
+                        * size (8 bits per byte assumption). Thus
+                        * we get "(size-1) >> (i+4)" as the last byte
+                        * we can access.
+                        *
+                        * The "+1" is because we want to round the
+                        * byte allocation up rather than down. So
+                        * we should have had a "+7" before we shifted
+                        * down by three. Also, we have to add one as
+                        * we actually _use_ the last bit (it's [0,n]
+                        * inclusive, not [0,n[).
+                        *
+                        * So we actually had +7+1 before we shift
+                        * down by 3. But (n+8) >> 3 == (n >> 3) + 1
+                        * (modulo overflows, which we do not have).
+                        *
+                        * Finally, we LONG_ALIGN because all bitmap
+                        * operations are on longs.
+                        */
+                       bitmap_size = (size-1) >> (i+4);
+                       bitmap_size = LONG_ALIGN(bitmap_size+1);
+                       zone->free_area[i].map = 
+                         (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size);
+               }
+       }
+       build_zonelists(pgdat);
+}
+
+void __init free_area_init(unsigned long *zones_size)
+{
+       free_area_init_core(0, &contig_page_data, &mem_map, zones_size, 0, 0, 0);
+}
+
+static int __init setup_mem_frac(char *str)
+{
+       int j = 0;
+
+       while (get_option(&str, &zone_balance_ratio[j++]) == 2);
+       printk("setup_mem_frac: ");
+       for (j = 0; j < MAX_NR_ZONES; j++) printk("%d  ", zone_balance_ratio[j]);
+       printk("\n");
+       return 1;
+}
+
+__setup("memfrac=", setup_mem_frac);
+
+static int __init setup_lower_zone_reserve(char *str)
+{
+       int j = 0;
+
+       while (get_option(&str, &lower_zone_reserve_ratio[j++]) == 2);
+       printk("setup_lower_zone_reserve: ");
+       for (j = 0; j < MAX_NR_ZONES-1; j++) printk("%d  ", lower_zone_reserve_ratio[j]);
+       printk("\n");
+       return 1;
+}
+
+__setup("lower_zone_reserve=", setup_lower_zone_reserve);
author	kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
	Fri, 7 May 2004 14:53:28 +0000 (14:53 +0000)
committer	kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
	Fri, 7 May 2004 14:53:28 +0000 (14:53 +0000)
.rootkeys		patch \| blob \| history
tools/examples/xc_dom_create.py		patch \| blob \| history
tools/xenctl/lib/utils.py		patch \| blob \| history
tools/xend/lib/domain_controller.h		patch \| blob \| history
tools/xend/lib/main.py		patch \| blob \| history
tools/xend/lib/manager.py		patch \| blob \| history
tools/xend/lib/netif.py	[new file with mode: 0644]	patch \| blob
xen/common/dom_mem_ops.c		patch \| blob \| history
xen/common/domain.c		patch \| blob \| history
xen/common/kernel.c		patch \| blob \| history
xen/common/memory.c		patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/config.in		patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/defconfig		patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/defconfig-physdev		patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h		patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c		patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c		patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c		patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c		patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h		patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c		patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c		patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/console/console.c		patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/control.c		patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/interface.c		patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c		patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c		patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c		patch \| blob \| history
xenolinux-2.4.26-sparse/drivers/block/ll_rw_blk.c		patch \| blob \| history
xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h		patch \| blob \| history
xenolinux-2.4.26-sparse/include/asm-xen/io.h		patch \| blob \| history
xenolinux-2.4.26-sparse/include/asm-xen/pci.h	[new file with mode: 0644]	patch \| blob
xenolinux-2.4.26-sparse/mkbuildtree		patch \| blob \| history
xenolinux-2.4.26-sparse/mm/page_alloc.c	[new file with mode: 0644]	patch \| blob