--- /dev/null
+The xenstore ring is a datastructure stored within a single 4KiB page
+shared between the xenstore server and the guest. The ring contains
+two queues of bytes -- one in each direction -- and some signalling
+information. The [xenstore protocol](xenstore.txt) is layered on top of
+the byte streams.
+
+The xenstore ring datastructure
+===============================
+
+The following table describes the ring structure where
+ - offsets and lengths are in bytes;
+ - "Input" is used to describe the data sent to the server; and
+ - "Output" is used to describe the data sent to the domain.
+
+Offset Length Description
+-----------------------------------------------------------------
+0 1024 Input data
+1024 1024 Output data
+2048 4 Input consumer offset
+2052 4 Input producer offset
+2056 4 Output consumer offset
+2060 4 Output producer offset
+2064 4 Server feature bitmap
+2068 4 Connection state
+
+The Input data and Output data are circular buffers. Each buffer is
+associated with a pair of free-running offsets labelled "consumer" and
+"producer".
+
+A "producer" offset is the offset in the byte stream of the next byte
+to be written modulo 2^32. A "consumer" offset is the offset in the byte
+stream of the next byte to be read modulo 2^32. Implementations must
+take care to handle wraparound properly when performing arithmetic with
+these values.
+
+The byte at offset 'x' in the byte stream will be stored at offset
+'x modulo 1024' in the circular buffer.
+
+Implementations may only overwrite previously-written data if it has
+been marked as 'consumed' by the relevant consumer pointer.
+
+When the guest domain is created, there is no outstanding Input or Output
+data. However
+
+ - guests must not assume that producer or consumer pointers start
+ at zero; and
+ - guests must not assume that unused bytes in either the Input or
+ Output data buffers has any particular value.
+
+A xenstore ring is always associated with an event channel. Whenever the
+ring structure is updated the event channel must be signalled. The
+guest and server are free to inspect the contents of the ring at any
+time, not only in response to an event channel event. This implies that
+updates must be ordered carefully to ensure consistency.
+
+The xenstore server may decide to advertise some features via the
+"Server feature bitmap". The server can start advertising features
+at any time by setting bits but it will never stop advertising features
+i.e. bits will never be cleared. The guest is not permitted to write to
+the server feature bitmap. The server features are offered to the guest;
+it is up to the guest whether to use them or not. The guest should ignore
+any unknown feature bits.
+
+The following features are defined:
+
+Mask Description
+-----------------------------------------------------------------
+1 Ring reconnection (see the ring reconnection feature below)
+
+The "Connection state" field is used to request a ring close and reconnect.
+The "Connection state" field only contains valid data if the server has
+advertised the ring reconnection feature. If the feature has been advertised
+then the "Connection state" may take the following values:
+
+Value Description
+-----------------------------------------------------------------
+0 Ring is connected
+1 Ring close and reconnect is in progress (see the "ring
+ reconnection feature" described below)
+
+The ring reconnection feature
+=============================
+
+The ring reconnection feature allows the guest to ask the server to
+reset the ring to a valid initial state i.e. one in which the Input
+and Output queues contain no data and there are no outstanding requests,
+watches or transactions.
+
+The ring reconnection feature is only available if the 'Ring reconnection'
+feature bit has been set by the server in the "Server feature bitmap".
+If a server supports ring reconnection, it will guarantee to advertise
+the feature before producing or consuming any data from the Input or Output
+queues.
+
+Assuming the server has advertised the feature, the guest can initiate
+a reconnection by setting the the Connection state to 1 ("Ring close
+and reconnect is in progress") and signalling the event channel.
+The guest must now ignore all fields except the Connection state and
+wait for it to be set to 0 ("Ring is connected")
+
+The server will guarantee to
+
+ - drop any partially read or written higher-level
+ [xenstore protocol](xenstore.txt) packets it may have;
+ - empty the Input and Output queues in the xenstore ring;
+ - discard any in-flight requests
+ - discard any watches associated with the connection
+ - discard any transactions associated with the connection
+ - set the Connection state to 0 ("Ring is connected"); and
+ - signal the event channel.
+
+From the point of view of the guest, the connection has been reset on a
+packet boundary.
+
+Note that only the guest may set the Connection state to 1 and only the
+server may set it back to 0.
static evtchn_port_t event; /* Event-channel to dom0 */
static char payload[XENSTORE_PAYLOAD_MAX + 1]; /* Unmarshalling area */
+static void ring_wait(void)
+{
+ struct shared_info *shinfo = get_shared_info();
+ struct sched_poll poll;
+
+ memset(&poll, 0, sizeof(poll));
+ set_xen_guest_handle(poll.ports, &event);
+ poll.nr_ports = 1;
+
+ while ( !test_and_clear_bit(event, shinfo->evtchn_pending) )
+ hypercall_sched_op(SCHEDOP_poll, &poll);
+}
+
/* Connect our xenbus client to the backend.
* Call once, before any other xenbus actions. */
void xenbus_setup(void)
void xenbus_shutdown(void)
{
struct shared_info *shinfo = get_shared_info();
+ evtchn_send_t send;
ASSERT(rings != NULL);
- /* We zero out the whole ring -- the backend can handle this, and it's
- * not going to surprise any frontends since it's equivalent to never
- * having used the rings. */
- memset(rings, 0, sizeof *rings);
-
+ if (rings->server_features & XENSTORE_SERVER_FEATURE_RECONNECTION) {
+ rings->connection = XENSTORE_RECONNECT;
+ send.port = event;
+ hypercall_event_channel_op(EVTCHNOP_send, &send);
+ while (*(volatile uint32_t*)&rings->connection == XENSTORE_RECONNECT)
+ ring_wait ();
+ } else {
+ /* If the backend reads the state while we're erasing it then the
+ * ring state will become corrupted, preventing guest frontends from
+ * connecting. This is rare. To help diagnose the failure, we fill
+ * the ring with XS_INVALID packets. */
+ memset(rings->req, 0xff, XENSTORE_RING_SIZE);
+ memset(rings->rsp, 0xff, XENSTORE_RING_SIZE);
+ rings->req_cons = rings->req_prod = 0;
+ rings->rsp_cons = rings->rsp_prod = 0;
+ }
/* Clear the event-channel state too. */
memset(shinfo->vcpu_info, 0, sizeof(shinfo->vcpu_info));
memset(shinfo->evtchn_pending, 0, sizeof(shinfo->evtchn_pending));
rings = NULL;
}
-static void ring_wait(void)
-{
- struct shared_info *shinfo = get_shared_info();
- struct sched_poll poll;
-
- memset(&poll, 0, sizeof(poll));
- set_xen_guest_handle(poll.ports, &event);
- poll.nr_ports = 1;
-
- while ( !test_and_clear_bit(event, shinfo->evtchn_pending) )
- hypercall_sched_op(SCHEDOP_poll, &poll);
-}
-
/* Helper functions: copy data in and out of the ring */
static void ring_write(const char *data, uint32_t len)
{
exception Eagain
exception Noent
exception Invalid
+exception Reconnect
+
+let _ =
+ Callback.register_exception "Xb.Reconnect" Reconnect
type backend_mmap =
{
let init_partial_in () = NoHdr
(Partial.header_size (), String.make (Partial.header_size()) '\000')
+let reconnect t = match t.backend with
+ | Fd _ ->
+ (* should never happen, so close the connection *)
+ raise End_of_file
+ | Xenmmap backend ->
+ Xs_ring.close backend.mmap;
+ backend.eventchn_notify ();
+ (* Clear our old connection state *)
+ Queue.clear t.pkt_in;
+ Queue.clear t.pkt_out;
+ t.partial_in <- init_partial_in ();
+ t.partial_out <- ""
+
let queue con pkt = Queue.push pkt con.pkt_out
let read_fd back con s len =
| Fd backfd -> write_fd backfd con s len
| Xenmmap backmmap -> write_mmap backmmap con s len
+(* NB: can throw Reconnect *)
let output con =
(* get the output string from a string_of(packet) or partial_out *)
let s = if String.length con.partial_out > 0 then
(* after sending one packet, partial is empty *)
con.partial_out = ""
+(* NB: can throw Reconnect *)
let input con =
let newpacket = ref false in
let to_read =
let open_fd fd = newcon (Fd { fd = fd; })
let open_mmap mmap notifyfct =
+ (* Advertise XENSTORE_SERVER_FEATURE_RECONNECTION *)
+ Xs_ring.set_server_features mmap (Xs_ring.Server_features.singleton Xs_ring.Server_feature.Reconnection);
newcon (Xenmmap {
mmap = mmap;
eventchn_notify = notifyfct;
| Resume
| Set_target
| Restrict
- | Invalid (* Not a valid wire operation *)
+ | Invalid
val operation_c_mapping : operation array
val size : int
val array_search : 'a -> 'a array -> int
exception Eagain
exception Noent
exception Invalid
+exception Reconnect
type backend_mmap = {
mmap : Xenmmap.mmap_interface;
eventchn_notify : unit -> unit;
mutable partial_out : string;
}
val init_partial_in : unit -> partial_buf
+val reconnect : t -> unit
val queue : t -> Packet.t -> unit
val read_fd : backend_fd -> 'a -> string -> int -> int
val read_mmap : backend_mmap -> 'a -> string -> int -> int
* GNU Lesser General Public License for more details.
*)
+module Server_feature = struct
+ type t =
+ | Reconnection
+end
+
+module Server_features = Set.Make(struct
+ type t = Server_feature.t
+ let compare = compare
+end)
+
external read: Xenmmap.mmap_interface -> string -> int -> int = "ml_interface_read"
external write: Xenmmap.mmap_interface -> string -> int -> int = "ml_interface_write"
+
+external _internal_set_server_features: Xenmmap.mmap_interface -> int -> unit = "ml_interface_set_server_features" "noalloc"
+external _internal_get_server_features: Xenmmap.mmap_interface -> int = "ml_interface_get_server_features" "noalloc"
+
+
+let get_server_features mmap =
+ (* NB only one feature currently defined above *)
+ let x = _internal_get_server_features mmap in
+ if x = 0
+ then Server_features.empty
+ else Server_features.singleton Server_feature.Reconnection
+
+let set_server_features mmap set =
+ (* NB only one feature currently defined above *)
+ let x = if set = Server_features.empty then 0 else 1 in
+ _internal_set_server_features mmap x
+
+external close: Xenmmap.mmap_interface -> unit = "ml_interface_close" "noalloc"
#define GET_C_STRUCT(a) ((struct mmap_interface *) a)
-static int xs_ring_read(struct mmap_interface *interface,
- char *buffer, int len)
+CAMLprim value ml_interface_read(value ml_interface,
+ value ml_buffer,
+ value ml_len)
{
+ CAMLparam3(ml_interface, ml_buffer, ml_len);
+ CAMLlocal1(ml_result);
+
+ struct mmap_interface *interface = GET_C_STRUCT(ml_interface);
+ char *buffer = String_val(ml_buffer);
+ int len = Int_val(ml_len);
+ int result;
+
struct xenstore_domain_interface *intf = interface->addr;
XENSTORE_RING_IDX cons, prod; /* offsets only */
int to_read;
+ uint32_t connection;
cons = *(volatile uint32_t*)&intf->req_cons;
prod = *(volatile uint32_t*)&intf->req_prod;
+ connection = *(volatile uint32*)&intf->connection;
+
+ if (connection != XENSTORE_CONNECTED)
+ caml_raise_constant(*caml_named_value("Xb.Reconnect"));
+
xen_mb();
if ((prod - cons) > XENSTORE_RING_SIZE)
- return -1;
+ caml_failwith("bad connection");
- if (prod == cons)
- return 0;
+ if (prod == cons) {
+ result = 0;
+ goto exit;
+ }
cons = MASK_XENSTORE_IDX(cons);
prod = MASK_XENSTORE_IDX(prod);
if (prod > cons)
memcpy(buffer, intf->req + cons, len);
xen_mb();
intf->req_cons += len;
- return len;
+ result = len;
+exit:
+ ml_result = Val_int(result);
+ CAMLreturn(ml_result);
}
-static int xs_ring_write(struct mmap_interface *interface,
- char *buffer, int len)
+CAMLprim value ml_interface_write(value ml_interface,
+ value ml_buffer,
+ value ml_len)
{
+ CAMLparam3(ml_interface, ml_buffer, ml_len);
+ CAMLlocal1(ml_result);
+
+ struct mmap_interface *interface = GET_C_STRUCT(ml_interface);
+ char *buffer = String_val(ml_buffer);
+ int len = Int_val(ml_len);
+ int result;
+
struct xenstore_domain_interface *intf = interface->addr;
XENSTORE_RING_IDX cons, prod;
int can_write;
+ uint32_t connection;
cons = *(volatile uint32_t*)&intf->rsp_cons;
prod = *(volatile uint32_t*)&intf->rsp_prod;
+ connection = *(volatile uint32*)&intf->connection;
+
+ if (connection != XENSTORE_CONNECTED)
+ caml_raise_constant(*caml_named_value("Xb.Reconnect"));
+
xen_mb();
- if ( (prod - cons) >= XENSTORE_RING_SIZE )
- return 0;
+ if ( (prod - cons) >= XENSTORE_RING_SIZE ) {
+ result = 0;
+ goto exit;
+ }
if (MASK_XENSTORE_IDX(prod) >= MASK_XENSTORE_IDX(cons))
can_write = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
else
memcpy(intf->rsp + MASK_XENSTORE_IDX(prod), buffer, len);
xen_mb();
intf->rsp_prod += len;
- return len;
+ result = len;
+exit:
+ ml_result = Val_int(result);
+ CAMLreturn(ml_result);
}
-CAMLprim value ml_interface_read(value interface, value buffer, value len)
+CAMLprim value ml_interface_set_server_features(value interface, value v)
{
- CAMLparam3(interface, buffer, len);
- CAMLlocal1(result);
- int res;
+ CAMLparam2(interface, v);
+ struct xenstore_domain_interface *intf = GET_C_STRUCT(interface)->addr;
- res = xs_ring_read(GET_C_STRUCT(interface),
- String_val(buffer), Int_val(len));
- if (res == -1)
- caml_failwith("bad connection");
- result = Val_int(res);
- CAMLreturn(result);
+ intf->server_features = Int_val(v);
+
+ CAMLreturn(Val_unit);
+}
+
+CAMLprim value ml_interface_get_server_features(value interface)
+{
+ CAMLparam1(interface);
+ struct xenstore_domain_interface *intf = GET_C_STRUCT(interface)->addr;
+
+ CAMLreturn(Val_int (intf->server_features));
}
-CAMLprim value ml_interface_write(value interface, value buffer, value len)
+CAMLprim value ml_interface_close(value interface)
{
- CAMLparam3(interface, buffer, len);
- CAMLlocal1(result);
- int res;
-
- res = xs_ring_write(GET_C_STRUCT(interface),
- String_val(buffer), Int_val(len));
- result = Val_int(res);
- CAMLreturn(result);
+ CAMLparam1(interface);
+ struct xenstore_domain_interface *intf = GET_C_STRUCT(interface)->addr;
+ int i;
+
+ intf->req_cons = intf->req_prod = intf->rsp_cons = intf->rsp_prod = 0;
+ /* Ensure the unused space is full of invalid xenstore packets. */
+ for (i = 0; i < XENSTORE_RING_SIZE; i++) {
+ intf->req[i] = 0xff; /* XS_INVALID = 0xffff */
+ intf->rsp[i] = 0xff;
+ }
+ xen_mb ();
+ intf->connection = XENSTORE_CONNECTED;
+ CAMLreturn(Val_unit);
}
|None -> ()
| Some domain -> Domain.mark_as_bad domain
+let initial_next_tid = 1
+
+let reconnect con =
+ Xenbus.Xb.reconnect con.xb;
+ (* dom is the same *)
+ Hashtbl.clear con.transactions;
+ con.next_tid <- initial_next_tid;
+ Hashtbl.clear con.watches;
+ (* anonid is the same *)
+ con.nb_watches <- 0;
+ con.stat_nb_ops <- 0;
+ (* perm is the same *)
+ ()
+
let get_path con =
Printf.sprintf "/local/domain/%i/" (match con.dom with None -> 0 | Some d -> Domain.get_id d)
xb = xbcon;
dom = dom;
transactions = Hashtbl.create 5;
- next_tid = 1;
+ next_tid = initial_next_tid;
watches = Hashtbl.create 8;
nb_watches = 0;
anonid = id;
let newpacket =
try
Connection.do_input con
- with Failure exp ->
+ with Xenbus.Xb.Reconnect ->
+ info "%s requests a reconnect" (Connection.get_domstr con);
+ Connection.reconnect con;
+ info "%s reconnection complete" (Connection.get_domstr con);
+ false
+ | Failure exp ->
error "caught exception %s" exp;
error "got a bad client %s" (sprintf "%-8s" (Connection.get_domstr con));
Connection.mark_as_bad con;
(Xenbus.Xb.Op.to_string ty) (sanitize_data data);*)
write_answer_log ~ty ~tid ~con ~data;
);
- ignore (Connection.do_output con)
+ try
+ ignore (Connection.do_output con)
+ with Xenbus.Xb.Reconnect ->
+ info "%s requests a reconnect" (Connection.get_domstr con);
+ Connection.reconnect con;
+ info "%s reconnection complete" (Connection.get_domstr con)
)
XS_RESUME,
XS_SET_TARGET,
XS_RESTRICT,
- XS_RESET_WATCHES
+ XS_RESET_WATCHES,
+
+ XS_INVALID = 0xffff /* Guaranteed to remain an invalid type */
};
#define XS_WRITE_NONE "NONE"
char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */
XENSTORE_RING_IDX req_cons, req_prod;
XENSTORE_RING_IDX rsp_cons, rsp_prod;
+ uint32_t server_features; /* Bitmap of features supported by the server */
+ uint32_t connection;
};
/* Violating this is very bad. See docs/misc/xenstore.txt. */
#define XENSTORE_ABS_PATH_MAX 3072
#define XENSTORE_REL_PATH_MAX 2048
+/* The ability to reconnect a ring */
+#define XENSTORE_SERVER_FEATURE_RECONNECTION 1
+
+/* Valid values for the connection field */
+#define XENSTORE_CONNECTED 0 /* the steady-state */
+#define XENSTORE_RECONNECT 1 /* guest has initiated a reconnect */
+
#endif /* _XS_WIRE_H */
/*