#define MAX_BATCH_SIZE 1024
+#define DEBUG 0
+
+#if DEBUG
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+
/* This may allow us to create a 'quiet' command-line option, if necessary. */
#define verbose_printf(_f, _a...) \
do { \
goto out;
}
- printf("batch %d\n",j);
+ DPRINTF("batch %d\n",j);
if (j == 0)
break; // our work here is done
pfn = region_pfn_type[i] & ~PGT_type_mask;
-//if(n>=nr_pfns || ((region_pfn_type[i] & PGT_type_mask) == L2TAB) ) printf("pfn=%08lx mfn=%x\n",region_pfn_type[i],pfn_to_mfn_table[pfn]);
-
-
-//if(pfn_type[i])printf("^pfn=%d %08lx\n",pfn,pfn_type[i]);
-
if ((region_pfn_type[i]>>29) == 7)
continue;
mfn = pfn_to_mfn_table[pfn];
-//if(region_pfn_type[i])printf("i=%d pfn=%d mfn=%d type=%lx\n",i,pfn,mfn,region_pfn_type[i]);
-
ppage = (unsigned long*) (region_base + i*PAGE_SIZE);
if ( (*readerfn)(readerst, ppage, PAGE_SIZE) )
{
xpfn = ppage[k] >> PAGE_SHIFT;
-/*printf("L1 i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n",
- i,pfn,mfn,k,ppage[k],xpfn);*/
-
if ( xpfn >= nr_pfns )
{
ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=0x%x nr_pfns=%d",region_pfn_type[i]>>29,i,k,xpfn,nr_pfns);
goto out;
}
-#if 0
- if ( (region_pfn_type[xpfn] != NONE) && (ppage[k] & _PAGE_RW) )
- {
- ERROR("Write access requested for a restricted frame");
- goto out;
- }
-#endif
+
ppage[k] &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PAT);
ppage[k] |= pfn_to_mfn_table[xpfn] << PAGE_SHIFT;
}
{
xpfn = ppage[k] >> PAGE_SHIFT;
-/*printf("L2 i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n",
- i,pfn,mfn,k,ppage[k],xpfn);*/
-
if ( xpfn >= nr_pfns )
{
ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=%d nr_pfns=%d",region_pfn_type[i]>>29,i,k,xpfn,nr_pfns);
n+=j; // crude stats
}
-printf("RECEIVED ALL PAGES\n");
+
+ DPRINTF("Received all pages\n");
mfn_mapper_close( region_mapper );
p_srec->resume_info.flags = 0;
unmap_pfn(pm_handle, p_srec);
-printf("new shared info is %lx\n", shared_info_frame);
-
/* Uncanonicalise each GDT frame number. */
if ( ctxt.gdt_ents > 8192 )
{
op.u.builddomain.ctxt = &ctxt;
rc = do_dom0_op(xc_handle, &op);
-printf("NORMAL EXIT RESTORE\n");
+ DPRINTF("Everything OK!\n");
+
out:
-printf("EXIT RESTORE\n");
if ( mmu != NULL )
free(mmu);
#define BATCH_SIZE 1024 /* 1024 pages (4MB) at a time */
+#define DEBUG 0
+
+#if DEBUG
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
/* This may allow us to create a 'quiet' command-line option, if necessary. */
#define verbose_printf(_f, _a...) \
do { \
int rc = 1, i, j, k, n, last_iter, iter = 0;
unsigned long mfn;
int verbose = flags & XCFLAGS_VERBOSE;
- int live = 1; //flags & XCFLAGS_LIVE; // XXXXXXXXXXXXXXXXXXX
+ int live = flags & XCFLAGS_LIVE;
int sent_last_iter, sent_this_iter, max_iters;
/* Remember if we stopped the guest, so we can restart it on exit. */
printf("Sleep for 1ms\n");
}
-#if 1
/* A cheesy test to see whether the domain contains valid state. */
if ( ctxt.pt_base == 0 )
{
ERROR("Domain is not in a valid Linux guest OS state");
goto out;
}
-#endif
-
/* Map the suspend-record MFN to pin it. The page must be owned by
domid for this to succeed. */
goto out;
}
- for(i=0;i<(nr_pfns+1023)/1024 ;i++)
- printf("LF: %d %x\n",i,live_pfn_to_mfn_frame_list[i]);
+ /* Canonicalise the pfn-to-mfn table frame-number list. */
+ memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE );
+ for ( i = 0; i < nr_pfns; i += 1024 )
+ {
+ if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) )
+ {
+ ERROR("Frame # in pfn-to-mfn frame list is not in pseudophys");
+ goto out;
+ }
+ }
- /* At this point, we can start the domain again if we're doign a
+ /* At this point, we can start the domain again if we're doing a
live suspend */
if( live )
{
-#if 1
if ( xc_shadow_control( xc_handle, domid,
DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
NULL, 0 ) < 0 )
ERROR("Couldn't enable shadow mode");
goto out;
}
-#endif
+
if ( xc_domain_start( xc_handle, domid ) < 0 )
{
ERROR("Couldn't restart domain");
goto out;
}
-//exit(-1);
+
last_iter = 0;
sent_last_iter = 1<<20; // 4GB's worth of pages
- max_iters = 8; // limit us to 9 time round loop
+ max_iters = 9; // limit us to 10 time round loop
}
else
last_iter = 1;
/*
* Quick belt and braces sanity check.
*/
-
+#if DEBUG
for ( i = 0; i < nr_pfns; i++ )
{
mfn = live_pfn_to_mfn_table[i];
printf("i=0x%x mfn=%x live_mfn_to_pfn_table=%x\n",
i,mfn,live_mfn_to_pfn_table[mfn]);
}
-
- /* Canonicalise the pfn-to-mfn table frame-number list. */
- memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE );
- for ( i = 0; i < nr_pfns; i += 1024 )
- {
- if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) )
- {
- ERROR("Frame # in pfn-to-mfn frame list is not in pseudophys");
- goto out;
- }
- }
+#endif
/* Map the shared info frame */
live_shinfo = mfn_mapper_map_single(xc_handle, domid,
if( pfn_type[batch] == 0x80000004 )
{
- //printf("Skip netbuf pfn %lx. mfn %lx\n",n,pfn_type[batch]);
+ DPRINTF("Skip netbuf pfn %lx. mfn %lx\n",n,pfn_type[batch]);
continue;
}
-//if(iter>1) printf("pfn=%x mfn=%x\n",n,pfn_type[batch]);
+ if(iter>1) { DPRINTF("pfn=%x mfn=%x\n",n,pfn_type[batch]); }
batch++;
}
-
- for( j = 0; j < batch; j++ )
- {
-
- if( (pfn_type[j] &0xfffff) == 0x0000004 )
- {
- printf("XXXXXXXXSkip netbuf entry %d mfn %lx\n",j,pfn_type[j]);
- }
-
-
- }
-
- printf("batch %d:%d (n=%d)\n",iter,batch,n);
+ DPRINTF("batch %d:%d (n=%d)\n",iter,batch,n);
if(batch == 0) goto skip; // vanishingly unlikely...
{
if((pfn_type[j]>>29) == 7)
{
- //printf("type fail: page %i mfn %08lx\n",j,pfn_type[j]);
+ DPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]);
continue;
}
-//if((pfn_type[j] & PGT_type_mask) == L2TAB) printf("L2 pfn=%08lx mfn=%lx\n",pfn_type[j],live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask]);
/* canonicalise mfn->pfn */
pfn_type[j] = (pfn_type[j] & PGT_type_mask) |
if((pfn_type[j]>>29) == 7)
{
- //printf("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]);
+ DPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]);
continue;
}
if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
{
+ // I don't think this should ever happen
+
printf("FNI %d : [%08lx,%d] pte=%08lx, mfn=%08lx, pfn=%08lx [mfn]=%08lx\n",
j, pfn_type[j], k,
page[k], mfn, live_mfn_to_pfn_table[mfn],
(live_mfn_to_pfn_table[mfn]<nr_pfns)?
- live_pfn_to_mfn_table[live_mfn_to_pfn_table[mfn]]: 0xdeadbeef);
- pfn = 0; // be suspicious
+ live_pfn_to_mfn_table[live_mfn_to_pfn_table[mfn]]: 0xdeadbeef);
+
+ pfn = 0; // be suspicious, very suspicious
-// ERROR("Frame number in pagetable page is invalid");
-// goto out;
+ //goto out; // let's try our luck
}
page[k] &= PAGE_SIZE - 1;
page[k] |= pfn << PAGE_SHIFT;
- /*
- printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n",
- pfn_type[j]>>29,
- j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
- */
+#if DEBUG
+ printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n",
+ pfn_type[j]>>29,
+ j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
+#endif
} /* end of page table rewrite for loop */
if ( live )
{
- if ( sent_this_iter < (sent_last_iter * 0.95) && iter < max_iters )
- {
- // we seem to be doing OK, keep going
- }
- else
+ if ( ( sent_this_iter > (sent_last_iter * 0.95) ) ||
+ (iter >= max_iters) || (sent_this_iter < 10) )
{
printf("Start last iteration\n");
last_iter = 1;
goto out;
}
-#if 0
- if(last_iter) memset(to_send, 0xff, (nr_pfns+7)/8 );
-#endif
-
sent_last_iter = sent_this_iter;
}
} /* end of while 1 */
-printf("All memory is saved\n");
+ DPRINTF("All memory is saved\n");
/* Success! */
rc = 0;
PERROR("Could not get info on domain");
goto out;
}
-printf("A\n");
+
/* Canonicalise the suspend-record frame number. */
if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) )
{
ERROR("State record is not in range of pseudophys map");
goto out;
}
-printf("B\n");
+
/* Canonicalise each GDT frame number. */
for ( i = 0; i < ctxt.gdt_ents; i += 512 )
{
goto out;
}
}
-printf("C\n");
+
/* Canonicalise the page table base pointer. */
if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) )
{
goto out;
}
ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT;
-printf("D\n");
+
if ( (*writerfn)(writerst, &ctxt, sizeof(ctxt)) ||
(*writerfn)(writerst, live_shinfo, PAGE_SIZE) )
{
goto out;
}
munmap(live_shinfo, PAGE_SIZE);
-printf("E\n");
+
out:
/* Restart the domain if we had to stop it to save its state. */
if ( we_stopped_it )
int xc_domain_stop_sync( int xc_handle, domid_t domid )
{
dom0_op_t op;
+ int i;
+
- while (1)
+ op.cmd = DOM0_STOPDOMAIN;
+ op.u.stopdomain.domain = (domid_t)domid;
+ if ( do_dom0_op(xc_handle, &op) != 0 )
{
- op.cmd = DOM0_STOPDOMAIN;
- op.u.stopdomain.domain = (domid_t)domid;
- if ( do_dom0_op(xc_handle, &op) != 0 )
- {
- PERROR("Stopping target domain failed");
- goto out;
- }
+ PERROR("Stopping target domain failed");
+ goto out;
+ }
+
+ usleep(100); // 100us
- usleep(1000); // 1ms
- printf("Sleep for 1ms\n");
+ for(i=0;;i++)
+ {
+ if (i>0)
+ if (i==1) printf("Sleep.");
+ else printf(".");
op.cmd = DOM0_GETDOMAININFO;
op.u.getdomaininfo.domain = (domid_t)domid;
if ( op.u.getdomaininfo.state == DOMSTATE_STOPPED )
{
- printf("Domain %lld stopped\n",domid);
+ printf("\nDomain %lld stopped\n",domid);
return 0;
}
-
+
+ usleep(1000);
}
out:
u64 dom;
char *state_file;
- int progress = 1, live = 0;
+ int progress = 1, live = -1;
unsigned int flags = 0;
static char *kwd_list[] = { "dom", "state_file", "progress", "live", NULL };
&dom, &state_file, &progress, &live) )
return NULL;
- if (progress) flags |= XCFLAGS_VERBOSE;
- if (live) flags |= XCFLAGS_LIVE;
+ if (progress) flags |= XCFLAGS_VERBOSE;
+ if (live == 1) flags |= XCFLAGS_LIVE;
if ( strncmp(state_file,"tcp:", strlen("tcp:")) == 0 )
{
return 0;
}
+ if (live == -1) flags |= XCFLAGS_LIVE; // default to live for tcp
+
strncpy( server, state_file+strlen("tcp://"), max_namelen);
server[max_namelen-1]='\0';
if ( (port_s = strchr(server,':')) != NULL )
nodev ?= n
debug ?= n
+trace ?= n
TARGET := $(BASEDIR)/xen
HDRS := $(wildcard $(BASEDIR)/include/xen/*.h)
CFLAGS += -DNO_DEVICES_IN_XEN
endif
+ifeq ($(trace),y)
+CFLAGS += -DTRACE_BUFFER
+endif
+
%.o: %.c $(HDRS) Makefile
$(CC) $(CFLAGS) -c $< -o $@
#include <xen/shadow.h>
#include <hypervisor-ifs/sched_ctl.h>
+
+#define TRC_DOM0OP_START_BASE 0x00020000
+#define TRC_DOM0OP_FINISH_BASE 0x00030000
+
+
extern unsigned int alloc_new_dom_mem(struct task_struct *, unsigned int);
/* Basically used to protect the domain-id space. */
return -EACCES;
}
+ TRACE_5D( TRC_DOM0OP_START_BASE + op->cmd,
+ 0, op->u.dummy[0], op->u.dummy[1], op->u.dummy[2], op->u.dummy[3] );
+
switch ( op->cmd )
{
}
+ TRACE_5D( TRC_DOM0OP_FINISH_BASE + op->cmd, ret,
+ op->u.dummy[0], op->u.dummy[1], op->u.dummy[2], op->u.dummy[3] );
+
+
return ret;
}
#include <xen/console.h>
#include <xen/net_headers.h>
#include <xen/serial.h>
+#include <xen/shadow.h>
kmem_cache_t *task_struct_cachep;
set_bit(PF_PRIVILEGED, &new_dom->flags);
+ shadow_mode_init();
+
/*
* We're going to setup domain0 using the module(s) that we stashed safely
* above our MAX_DIRECTMAP_ADDRESS in boot/boot.S. The second module, if
#include <xen/shadow.h>
#include <asm/domain_page.h>
#include <asm/page.h>
+#include <xen/event.h>
+#include <xen/trace.h>
/********
********/
+static spinlock_t cpu_stall_lock;
+
static inline void free_shadow_page( struct mm_struct *m,
struct pfn_info *pfn_info )
{
}
+void shadow_mode_init(void)
+{
+ spin_lock_init( &cpu_stall_lock );
+}
+
int shadow_mode_enable( struct task_struct *p, unsigned int mode )
{
struct mm_struct *m = &p->mm;
}
static int shadow_mode_table_op( struct task_struct *p,
- dom0_shadow_control_t *sc )
+ dom0_shadow_control_t *sc )
{
- unsigned int op = sc->op;
+ unsigned int op = sc->op;
struct mm_struct *m = &p->mm;
- int rc = 0;
+ int rc = 0;
// since Dom0 did the hypercall, we should be running with it's page
// tables right now. Calling flush on yourself would be really
break;
case DOM0_SHADOW_CONTROL_OP_CLEAN:
+ {
+ int i,j,zero=1;
+
+ __scan_shadow_table( m, op );
+
+ if( p->tot_pages > sc->pages ||
+ !sc->dirty_bitmap || !p->mm.shadow_dirty_bitmap )
{
- int i;
-
- __scan_shadow_table( m, op );
-
- if( p->tot_pages > sc->pages ||
- !sc->dirty_bitmap || !p->mm.shadow_dirty_bitmap )
- {
- rc = -EINVAL;
- goto out;
- }
-
- sc->pages = p->tot_pages;
-
+ rc = -EINVAL;
+ goto out;
+ }
+
+ sc->pages = p->tot_pages;
+
#define chunk (8*1024) // do this in 1KB chunks for L1 cache
-
- for(i=0;i<p->tot_pages;i+=chunk)
+
+ for(i=0;i<p->tot_pages;i+=chunk)
+ {
+ int bytes = (( ((p->tot_pages-i) > (chunk))?
+ (chunk):(p->tot_pages-i) ) + 7) / 8;
+
+ copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
+ p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
+ bytes );
+
+ for(j=0; zero && j<bytes/sizeof(unsigned long);j++)
{
- int bytes = (( ((p->tot_pages-i) > (chunk))?
- (chunk):(p->tot_pages-i) ) + 7) / 8;
-
- copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
- p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
- bytes );
+ if( p->mm.shadow_dirty_bitmap[j] != 0 )
+ zero = 0;
+ }
- memset( p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
- 0, bytes);
- }
+ memset( p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
+ 0, bytes);
+ }
- break;
+ if (zero)
+ {
+ /* might as well stop the domain as an optimization. */
+ if ( p->state != TASK_STOPPED )
+ send_guest_virq(p, VIRQ_STOP);
}
+
+ break;
+ }
}
return rc;
}
-
int shadow_mode_control( struct task_struct *p, dom0_shadow_control_t *sc )
{
- int we_paused = 0;
- unsigned int cmd = sc->op;
- int rc = 0;
-
+ unsigned int cmd = sc->op;
+ int rc = 0, cpu;
+
// don't call if already shadowed...
- // sychronously stop domain
- if( 0 && !(p->state & TASK_STOPPED) && !(p->state & TASK_PAUSED))
- {
- printk("about to pause domain\n");
- sched_pause_sync(p);
- printk("paused domain\n");
- we_paused = 1;
+ /* The following is pretty hideous because we don't have a way of
+ synchronously pausing a domain. If it's assigned to the curernt CPU,
+ we don't have to worry -- it can't possibly actually be running.
+ If its on another CPU, for the moment, we do something really gross:
+ we cause the other CPU to spin regardless of what domain it is running.
+
+ I know this is really grim, but it only lasts a few 10's of
+ microseconds. It needs fixing as soon as the last of the Linux-isms
+ get removed from the task structure...
+
+ Oh, and let's hope someone doesn't repin the CPU while we're here.
+ Also, prey someone else doesn't do this in another domain.
+ At least there's only one dom0 at the moment...
+ */
+printk("SMC\n");
+ spin_lock( &cpu_stall_lock );
+ cpu = p->processor;
+printk("got %d %d\n",cpu, current->processor );
+ if ( cpu != current->processor )
+ {
+printk("CPU %d %d\n",cpu, current->processor );
+ static void cpu_stall(void * data)
+ {
+ if ( current->processor == (int) data )
+ {
+ printk("Stall %d\n",(int)data);
+ spin_lock( &cpu_stall_lock );
+ spin_unlock( &cpu_stall_lock );
+ }
+ }
+
+ smp_call_function(cpu_stall, (void*)cpu, 1, 0); // don't wait!
}
if ( p->mm.shadow_mode && cmd == DOM0_SHADOW_CONTROL_OP_OFF )
}
else
{
- if ( we_paused ) wake_up(p);
- return -EINVAL;
+ rc = -EINVAL;
}
- if ( we_paused ) wake_up(p);
+ spin_unlock( &cpu_stall_lock );
+printk("SMC-\n");
return rc;
}
unsigned long interface_version; /* DOM0_INTERFACE_VERSION */
union
{
+ unsigned long dummy[4];
dom0_createdomain_t createdomain;
dom0_startdomain_t startdomain;
dom0_stopdomain_t stopdomain;
#define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
#define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START+(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT))))
+extern void shadow_mode_init(void);
extern int shadow_mode_control( struct task_struct *p, dom0_shadow_control_t *sc );
extern int shadow_fault( unsigned long va, long error_code );
extern void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte,