#define BATCH_SIZE 1024 /* 1024 pages (4MB) at a time */
#define DEBUG 0
+#define DDEBUG 0
#if DEBUG
#define DPRINTF(_f, _a...) printf ( _f , ## _a )
#define DPRINTF(_f, _a...) ((void)0)
#endif
+#if DDEBUG
+#define DDPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DDPRINTF(_f, _a...) ((void)0)
+#endif
+
+
+
/* This may allow us to create a 'quiet' command-line option, if necessary. */
#define verbose_printf(_f, _a...) \
do { \
(nr % (sizeof(unsigned long)*8) ) ) & 1;
}
+inline void clear_bit ( int nr, volatile void * addr)
+{
+ ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] &=
+ ~(1 << (nr % (sizeof(unsigned long)*8) ) );
+}
+
+inline void set_bit ( int nr, volatile void * addr)
+{
+ ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] |=
+ (1 << (nr % (sizeof(unsigned long)*8) ) );
+}
+
int xc_linux_save(int xc_handle,
u64 domid,
unsigned long mfn;
int verbose = flags & XCFLAGS_VERBOSE;
int live = flags & XCFLAGS_LIVE;
+ int debug = flags & XCFLAGS_DEBUG;
int sent_last_iter, sent_this_iter, max_iters;
/* Remember if we stopped the guest, so we can restart it on exit. */
/* A table containg the type of each PFN (/not/ MFN!). */
unsigned long *pfn_type = NULL;
+ unsigned long *pfn_batch = NULL;
/* A temporary mapping, and a copy, of one frame of guest memory. */
unsigned long page[1024];
unsigned long nr_pfns;
/* bitmap of pages left to send */
- unsigned long *to_send;
+ unsigned long *to_send, *to_fix;
+
+//live=0;
if ( mlock(&ctxt, sizeof(ctxt) ) )
{
int sz = (nr_pfns/8) + 8; // includes slop at end of array
to_send = malloc( sz );
+ to_fix = calloc( 1, sz );
- if (!to_send)
+ if (!to_send || !to_fix)
{
ERROR("Couldn't allocate to_send array");
goto out;
/* We want zeroed memory so use calloc rather than malloc. */
pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
+ pfn_batch = calloc(BATCH_SIZE, sizeof(unsigned long));
- if ( (pfn_type == NULL) )
+ if ( (pfn_type == NULL) || (pfn_batch == NULL) )
{
errno = ENOMEM;
goto out;
for( batch = 0; batch < BATCH_SIZE && n < nr_pfns ; n++ )
{
- if ( !test_bit(n, to_send ) ) continue;
+ if(0 && debug)
+ fprintf(stderr,"%d pfn= %08lx mfn= %08lx %d [mfn]= %08lx\n",
+ iter, n, live_pfn_to_mfn_table[n],
+ test_bit(n,to_send),
+ live_mfn_to_pfn_table[live_pfn_to_mfn_table[n]&0xFFFFF]);
+
+
+ if ( !test_bit(n, to_send ) &&
+ !( last_iter && test_bit(n, to_fix ) ) ) continue;
+
+ pfn_batch[batch] = n;
pfn_type[batch] = live_pfn_to_mfn_table[n];
if( pfn_type[batch] == 0x80000004 )
{
- DPRINTF("Skip netbuf pfn %lx. mfn %lx\n",n,pfn_type[batch]);
+ set_bit( n, to_fix );
+ if( iter>1 )
+ DDPRINTF("Urk! netbuf race: iter %d, pfn %lx. mfn %lx\n",
+ iter,n,pfn_type[batch]);
continue;
}
- if(iter>1) { DPRINTF("pfn=%x mfn=%x\n",n,pfn_type[batch]); }
-
+ if ( last_iter && test_bit(n, to_fix ) && !test_bit(n, to_send ))
+ {
+ DPRINTF("Fix! iter %d, pfn %lx. mfn %lx\n",
+ iter,n,pfn_type[batch]);
+ }
+
+ clear_bit( n, to_fix );
+
batch++;
}
- DPRINTF("batch %d:%d (n=%d)\n",iter,batch,n);
+ DDPRINTF("batch %d:%d (n=%d)\n",iter,batch,n);
if(batch == 0) goto skip; // vanishingly unlikely...
{
if((pfn_type[j]>>29) == 7)
{
- DPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]);
+ DDPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]);
continue;
}
+ if(0 && debug)
+ fprintf(stderr,"%d pfn= %08lx mfn= %08lx [mfn]= %08lx sum= %08lx\n",
+ iter,
+ (pfn_type[j] & PGT_type_mask) | pfn_batch[j],
+ pfn_type[j],
+ live_mfn_to_pfn_table[pfn_type[j]&(~PGT_type_mask)],
+ csum_page(region_base + (PAGE_SIZE*j))
+ );
+
/* canonicalise mfn->pfn */
pfn_type[j] = (pfn_type[j] & PGT_type_mask) |
- live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask];
+ pfn_batch[j];
+ //live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask];
+
}
-
+
if ( (*writerfn)(writerst, &batch, sizeof(int) ) )
{
if((pfn_type[j]>>29) == 7)
{
- DPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]);
+ DDPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]);
continue;
}
} /* end of it's a PT page */
else
{ /* normal page */
+
if ( (*writerfn)(writerst, region_base + (PAGE_SIZE*j), PAGE_SIZE) )
{
ERROR("Error when writing to state file (5)");
verbose_printf("\b\b\b\b100%% (%d pages)\n", sent_this_iter );
+ if ( debug && last_iter )
+ {
+ int minusone = -1;
+ memset( to_send, 0xff, nr_pfns/8 );
+ debug = 0;
+ printf("Entering debug resend-all mode\n");
+
+ /* send "-1" to put receiver into debug mode */
+ if ( (*writerfn)(writerst, &minusone, sizeof(int)) )
+ {
+ ERROR("Error when writing to state file (6)");
+ goto out;
+ }
+
+ continue;
+ }
+
if ( last_iter )
break;
if ( ( sent_this_iter > (sent_last_iter * 0.95) ) ||
(iter >= max_iters) || (sent_this_iter < 10) )
{
- printf("Start last iteration\n");
+ DPRINTF("Start last iteration\n");
last_iter = 1;
xc_domain_stop_sync( xc_handle, domid );
}
sent_last_iter = sent_this_iter;
+
}
if ( pfn_type != NULL )
free(pfn_type);
+
+ DPRINTF("Save exit rc=%d\n",rc);
return !!rc;
********/
-static spinlock_t cpu_stall_lock;
+
+/**
+
+FIXME:
+
+1. Flush needs to avoid blowing away the L2 page that another CPU may be using!
+
+fix using cpu_raise_softirq
+
+have a flag to count in, (after switching to init's PTs)
+spinlock, reload cr3_shadow, unlock
+
+**/
static inline void free_shadow_page( struct mm_struct *m,
struct pfn_info *pfn_info )
for (i=0;i<ENTRIES_PER_L1_PAGETABLE;i++)
{
- if ( spl1e[i] & _PAGE_RW )
+ if ( (spl1e[i] & _PAGE_PRESENT ) && (spl1e[i] & _PAGE_RW) )
{
work++;
spl1e[i] &= ~_PAGE_RW;
unmap_domain_mem( spl1e );
}
}
+ break;
+
}
return work;
}
void shadow_mode_init(void)
{
- spin_lock_init( &cpu_stall_lock );
}
int shadow_mode_enable( struct task_struct *p, unsigned int mode )
// allocate space for first lot of extra nodes
m->shadow_ht_extras = kmalloc( sizeof(void*) +
- (shadow_ht_extra_size *
- sizeof(struct shadow_status)),
- GFP_KERNEL );
+ (shadow_ht_extra_size *
+ sizeof(struct shadow_status)),
+ GFP_KERNEL );
if( ! m->shadow_ht_extras )
goto nomem;
__shadow_mk_pagetable( m );
return 0;
- nomem:
+nomem:
return -ENOMEM;
}
}
static int shadow_mode_table_op( struct task_struct *p,
- dom0_shadow_control_t *sc )
+ dom0_shadow_control_t *sc )
{
unsigned int op = sc->op;
struct mm_struct *m = &p->mm;
// tables right now. Calling flush on yourself would be really
// stupid.
+ ASSERT(spin_is_locked(&p->mm.shadow_lock));
+
if ( m == ¤t->mm )
{
printk("Don't try and flush your own page tables!\n");
case DOM0_SHADOW_CONTROL_OP_CLEAN:
{
- int i,j,zero=1;
+ int i,j,zero=1;
- __scan_shadow_table( m, op );
+ __scan_shadow_table( m, op );
+ // __free_shadow_table( m );
- if( p->tot_pages > sc->pages ||
- !sc->dirty_bitmap || !p->mm.shadow_dirty_bitmap )
- {
- rc = -EINVAL;
- goto out;
- }
+ if( p->tot_pages > sc->pages ||
+ !sc->dirty_bitmap || !p->mm.shadow_dirty_bitmap )
+ {
+ rc = -EINVAL;
+ goto out;
+ }
- sc->pages = p->tot_pages;
+ sc->pages = p->tot_pages;
#define chunk (8*1024) // do this in 1KB chunks for L1 cache
- for(i=0;i<p->tot_pages;i+=chunk)
- {
- int bytes = (( ((p->tot_pages-i) > (chunk))?
- (chunk):(p->tot_pages-i) ) + 7) / 8;
+ for(i=0;i<p->tot_pages;i+=chunk)
+ {
+ int bytes = (( ((p->tot_pages-i) > (chunk))?
+ (chunk):(p->tot_pages-i) ) + 7) / 8;
- copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
- p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
- bytes );
+ copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
+ p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
+ bytes );
- for(j=0; zero && j<bytes/sizeof(unsigned long);j++)
- {
- if( p->mm.shadow_dirty_bitmap[j] != 0 )
- zero = 0;
- }
-
- memset( p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
- 0, bytes);
- }
+ for(j=0; zero && j<bytes/sizeof(unsigned long);j++)
+ {
+ if( p->mm.shadow_dirty_bitmap[j] != 0 )
+ zero = 0;
+ }
- if (zero)
- {
- /* might as well stop the domain as an optimization. */
- if ( p->state != TASK_STOPPED )
- send_guest_virq(p, VIRQ_STOP);
- }
-
- break;
+ memset( p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
+ 0, bytes);
+ }
+
+ if (zero)
+ {
+ /* might as well stop the domain as an optimization. */
+ if ( p->state != TASK_STOPPED )
+ send_guest_virq(p, VIRQ_STOP);
+ }
+
+ break;
}
}
int shadow_mode_control( struct task_struct *p, dom0_shadow_control_t *sc )
{
unsigned int cmd = sc->op;
- int rc = 0, cpu;
-
- // don't call if already shadowed...
-
- /* The following is pretty hideous because we don't have a way of
- synchronously pausing a domain. If it's assigned to the curernt CPU,
- we don't have to worry -- it can't possibly actually be running.
- If its on another CPU, for the moment, we do something really gross:
- we cause the other CPU to spin regardless of what domain it is running.
-
- I know this is really grim, but it only lasts a few 10's of
- microseconds. It needs fixing as soon as the last of the Linux-isms
- get removed from the task structure...
-
- Oh, and let's hope someone doesn't repin the CPU while we're here.
- Also, prey someone else doesn't do this in another domain.
- At least there's only one dom0 at the moment...
-
- */
+ int rc = 0;
-printk("XXX\n");
spin_lock(&p->mm.shadow_lock);
-printk("SMC irq=%d\n",local_irq_is_enabled());
- spin_lock( &cpu_stall_lock );
- cpu = p->processor;
-printk("got target cpu=%d this cpu=%d\n",cpu, current->processor );
- if ( cpu != current->processor )
- {
- static void cpu_stall(void * data)
- {
- if ( current->processor == (int) data )
- {
- printk("Stall cpu=%d is locked %d irq=%d\n",(int)data,spin_is_locked(&cpu_stall_lock),local_irq_is_enabled());
- spin_lock( &cpu_stall_lock );
- printk("release\n");
- spin_unlock( &cpu_stall_lock );
- }
- }
-printk("before\n");
- smp_call_function(cpu_stall, (void*)cpu, 1, 0); // don't wait!
-printk("after\n");
- }
-
if ( p->mm.shadow_mode && cmd == DOM0_SHADOW_CONTROL_OP_OFF )
{
shadow_mode_disable(p);
}
else if ( p->mm.shadow_mode && cmd >= DOM0_SHADOW_CONTROL_OP_FLUSH && cmd<=DOM0_SHADOW_CONTROL_OP_CLEAN )
{
-printk("+");
rc = shadow_mode_table_op(p, sc);
-printk("=");
}
else
{
rc = -EINVAL;
}
- spin_unlock( &cpu_stall_lock );
-printk("SMC- %d\n",rc);
-
+ flush_tlb_cpu(p->processor);
+
spin_unlock(&p->mm.shadow_lock);
return rc;
unsigned long gpte, spte;
struct mm_struct *m = ¤t->mm;
- // we know interrupts are always on entry to the page fault handler
-
SH_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code );
check_pagetable( current, current->mm.pagetable, "pre-sf" );
return 0;
}
- spin_lock(¤t->mm.shadow_lock);
// take the lock and reread gpte
+ while( unlikely(!spin_trylock(¤t->mm.shadow_lock)) )
+ {
+ extern volatile unsigned long flush_cpumask;
+ if ( test_and_clear_bit(smp_processor_id(), &flush_cpumask) )
+ local_flush_tlb();
+ rep_nop();
+ }
+
+ ASSERT(spin_is_locked(¤t->mm.shadow_lock));
+
if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
{
SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
extern int shadow_mode_control( struct task_struct *p, dom0_shadow_control_t *sc );
extern int shadow_fault( unsigned long va, long error_code );
extern void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte,
- unsigned long *prev_spfn_ptr,
- l1_pgentry_t **prev_spl1e_ptr );
+ unsigned long *prev_spfn_ptr,
+ l1_pgentry_t **prev_spl1e_ptr );
extern void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte );
extern void unshadow_table( unsigned long gpfn, unsigned int type );
extern int shadow_mode_enable( struct task_struct *p, unsigned int mode );
extern void shadow_mode_disable( struct task_struct *p );
extern unsigned long shadow_l2_table(
- struct mm_struct *m, unsigned long gpfn );
+ struct mm_struct *m, unsigned long gpfn );
#define SHADOW_DEBUG 0
#define SHADOW_HASH_DEBUG 0
#ifndef NDEBUG
#define SH_LOG(_f, _a...) \
- printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \
- current->domain , __LINE__ , ## _a )
+printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \
+ current->domain , __LINE__ , ## _a )
#else
#define SH_LOG(_f, _a...)
#endif
#if SHADOW_DEBUG
#define SH_VLOG(_f, _a...) \
- printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \
- current->domain , __LINE__ , ## _a )
+ printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \
+ current->domain , __LINE__ , ## _a )
#else
#define SH_VLOG(_f, _a...)
#endif
#if 0
#define SH_VVLOG(_f, _a...) \
- printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \
- current->domain , __LINE__ , ## _a )
+ printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \
+ current->domain , __LINE__ , ## _a )
#else
#define SH_VVLOG(_f, _a...)
#endif
/************************************************************************/
-static inline void mark_dirty( struct mm_struct *m, unsigned int mfn )
+ static inline void __mark_dirty( struct mm_struct *m, unsigned int mfn )
{
- unsigned int pfn;
+ unsigned int pfn;
- pfn = machine_to_phys_mapping[mfn];
+ ASSERT(spin_is_locked(&m->shadow_lock));
+
+ //printk("%08x %08lx\n", mfn, machine_to_phys_mapping[mfn] );
- /* We use values with the top bit set to mark MFNs that aren't
- really part of the domain's psuedo-physical memory map e.g.
- the shared info frame. Nothing to do here...
- */
- if ( unlikely(pfn & 0x80000000U) ) return;
+ pfn = machine_to_phys_mapping[mfn];
- ASSERT(m->shadow_dirty_bitmap);
- if( likely(pfn<m->shadow_dirty_bitmap_size) )
- {
- /* use setbit to be smp guest safe. Since the same page is likely to
- get marked dirty many times, examine the bit first before doing the
- expensive lock-prefixed opertion */
+ /* We use values with the top bit set to mark MFNs that aren't
+ really part of the domain's psuedo-physical memory map e.g.
+ the shared info frame. Nothing to do here...
+ */
+ if ( unlikely(pfn & 0x80000000U) ) return;
- if (! test_bit( pfn, m->shadow_dirty_bitmap ) )
- set_bit( pfn, m->shadow_dirty_bitmap );
- }
- else
- {
- extern void show_traceX(void);
- SH_LOG("mark_dirty OOR! mfn=%x pfn=%x max=%x (mm %p)",
- mfn, pfn, m->shadow_dirty_bitmap_size, m );
- SH_LOG("dom=%lld caf=%08x taf=%08x\n",
- frame_table[mfn].u.domain->domain,
- frame_table[mfn].count_and_flags,
- frame_table[mfn].type_and_flags );
- //show_traceX();
- }
+ ASSERT(m->shadow_dirty_bitmap);
+ if( likely(pfn<m->shadow_dirty_bitmap_size) )
+ {
+ /* These updates occur with mm.shadow_lock held */
+ __set_bit( pfn, m->shadow_dirty_bitmap );
+ }
+ else
+ {
+ extern void show_traceX(void);
+ SH_LOG("mark_dirty OOR! mfn=%x pfn=%x max=%x (mm %p)",
+ mfn, pfn, m->shadow_dirty_bitmap_size, m );
+ SH_LOG("dom=%lld caf=%08x taf=%08x\n",
+ frame_table[mfn].u.domain->domain,
+ frame_table[mfn].count_and_flags,
+ frame_table[mfn].type_and_flags );
+ //show_traceX();
+ }
}
+
+static inline void mark_dirty( struct mm_struct *m, unsigned int mfn )
+{
+ ASSERT(local_irq_is_enabled());
+ //if(spin_is_locked(&m->shadow_lock)) printk("+");
+ spin_lock(&m->shadow_lock);
+ __mark_dirty( m, mfn );
+ spin_unlock(&m->shadow_lock);
+}
+
+
/************************************************************************/
static inline void l1pte_write_fault( struct mm_struct *m,
- unsigned long *gpte_p, unsigned long *spte_p )
+ unsigned long *gpte_p, unsigned long *spte_p )
{
unsigned long gpte = *gpte_p;
unsigned long spte = *spte_p;
switch( m->shadow_mode )
{
case SHM_test:
- spte = gpte;
- gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
- spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
- break;
+ spte = gpte;
+ gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
+ spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
+ break;
case SHM_logdirty:
- spte = gpte;
- gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
- spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
- mark_dirty( m, (gpte >> PAGE_SHIFT) );
- break;
+ spte = gpte;
+ gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
+ spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
+ __mark_dirty( m, (gpte >> PAGE_SHIFT) );
+ break;
}
*gpte_p = gpte;
}
static inline void l1pte_read_fault( struct mm_struct *m,
- unsigned long *gpte_p, unsigned long *spte_p )
+ unsigned long *gpte_p, unsigned long *spte_p )
{
unsigned long gpte = *gpte_p;
unsigned long spte = *spte_p;
switch( m->shadow_mode )
{
case SHM_test:
- spte = gpte;
- gpte |= _PAGE_ACCESSED;
- spte |= _PAGE_ACCESSED;
- if ( ! (gpte & _PAGE_DIRTY ) )
- spte &= ~ _PAGE_RW;
- break;
+ spte = gpte;
+ gpte |= _PAGE_ACCESSED;
+ spte |= _PAGE_ACCESSED;
+ if ( ! (gpte & _PAGE_DIRTY ) )
+ spte &= ~ _PAGE_RW;
+ break;
case SHM_logdirty:
- spte = gpte;
- gpte |= _PAGE_ACCESSED;
- spte |= _PAGE_ACCESSED;
- spte &= ~ _PAGE_RW;
- break;
+ spte = gpte;
+ gpte |= _PAGE_ACCESSED;
+ spte |= _PAGE_ACCESSED;
+ spte &= ~ _PAGE_RW;
+ break;
}
*gpte_p = gpte;
}
static inline void l1pte_no_fault( struct mm_struct *m,
- unsigned long *gpte_p, unsigned long *spte_p )
+ unsigned long *gpte_p, unsigned long *spte_p )
{
unsigned long gpte = *gpte_p;
unsigned long spte = *spte_p;
switch( m->shadow_mode )
{
case SHM_test:
- spte = 0;
- if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
- (_PAGE_PRESENT|_PAGE_ACCESSED) )
- {
- spte = gpte;
- if ( ! (gpte & _PAGE_DIRTY ) )
- spte &= ~ _PAGE_RW;
- }
- break;
+ spte = 0;
+ if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
+ (_PAGE_PRESENT|_PAGE_ACCESSED) )
+ {
+ spte = gpte;
+ if ( ! (gpte & _PAGE_DIRTY ) )
+ spte &= ~ _PAGE_RW;
+ }
+ break;
case SHM_logdirty:
- spte = 0;
- if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
- (_PAGE_PRESENT|_PAGE_ACCESSED) )
- {
- spte = gpte;
- spte &= ~ _PAGE_RW;
- }
-
- break;
+ spte = 0;
+ if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
+ (_PAGE_PRESENT|_PAGE_ACCESSED) )
+ {
+ spte = gpte;
+ spte &= ~ _PAGE_RW;
+ }
+
+ break;
}
*gpte_p = gpte;
}
static inline void l2pde_general( struct mm_struct *m,
- unsigned long *gpde_p, unsigned long *spde_p,
- unsigned long sl1pfn)
+ unsigned long *gpde_p, unsigned long *spde_p,
+ unsigned long sl1pfn)
{
unsigned long gpde = *gpde_p;
unsigned long spde = *spde_p;
- spde = 0;
+ spde = 0;
- if ( sl1pfn )
- {
- spde = (gpde & ~PAGE_MASK) | (sl1pfn<<PAGE_SHIFT) |
- _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY;
- gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY;
-
- if ( unlikely( (sl1pfn<<PAGE_SHIFT) == (gpde & PAGE_MASK) ) )
- {
- // detect linear map, and keep pointing at guest
- SH_VLOG("4c: linear mapping ( %08lx )",sl1pfn);
- spde = gpde & ~_PAGE_RW;
- }
+ if ( sl1pfn )
+ {
+ spde = (gpde & ~PAGE_MASK) | (sl1pfn<<PAGE_SHIFT) |
+ _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY;
+ gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY;
+
+ if ( unlikely( (sl1pfn<<PAGE_SHIFT) == (gpde & PAGE_MASK) ) )
+ {
+ // detect linear map, and keep pointing at guest
+ SH_VLOG("4c: linear mapping ( %08lx )",sl1pfn);
+ spde = gpde & ~_PAGE_RW;
}
+ }
*gpde_p = gpde;
*spde_p = spde;
#if SHADOW_HASH_DEBUG
static void shadow_audit(struct mm_struct *m, int print)
{
- int live=0, free=0, j=0, abs;
- struct shadow_status *a;
+ int live=0, free=0, j=0, abs;
+ struct shadow_status *a;
for(j=0;j<shadow_ht_buckets;j++)
{
a = &m->shadow_ht[j];
- if(a->pfn){live++; ASSERT(a->spfn_and_flags&PSH_pfn_mask);}
- ASSERT((a->pfn&0xf0000000)==0);
- ASSERT(a->pfn<0x00100000);
- a=a->next;
+ if(a->pfn){live++; ASSERT(a->spfn_and_flags&PSH_pfn_mask);}
+ ASSERT((a->pfn&0xf0000000)==0);
+ ASSERT(a->pfn<0x00100000);
+ a=a->next;
while(a && live<9999)
- {
- live++;
- if(a->pfn == 0 || a->spfn_and_flags == 0)
- {
- printk("XXX live=%d pfn=%08lx sp=%08lx next=%p\n",
- live, a->pfn, a->spfn_and_flags, a->next);
- BUG();
- }
- ASSERT(a->pfn);
- ASSERT((a->pfn&0xf0000000)==0);
- ASSERT(a->pfn<0x00100000);
- ASSERT(a->spfn_and_flags&PSH_pfn_mask);
- a=a->next;
- }
- ASSERT(live<9999);
+ {
+ live++;
+ if(a->pfn == 0 || a->spfn_and_flags == 0)
+ {
+ printk("XXX live=%d pfn=%08lx sp=%08lx next=%p\n",
+ live, a->pfn, a->spfn_and_flags, a->next);
+ BUG();
+ }
+ ASSERT(a->pfn);
+ ASSERT((a->pfn&0xf0000000)==0);
+ ASSERT(a->pfn<0x00100000);
+ ASSERT(a->spfn_and_flags&PSH_pfn_mask);
+ a=a->next;
}
+ ASSERT(live<9999);
+ }
a = m->shadow_ht_free;
while(a) { free++; a=a->next; }
if(print) printk("Xlive=%d free=%d\n",live,free);
- abs=(perfc_value(shadow_l1_pages)+perfc_value(shadow_l2_pages))-live;
- if( abs < -1 || abs > 1 )
- {
- printk("live=%d free=%d l1=%d l2=%d\n",live,free,
- perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages) );
- BUG();
+ abs=(perfc_value(shadow_l1_pages)+perfc_value(shadow_l2_pages))-live;
+ if( abs < -1 || abs > 1 )
+ {
+ printk("live=%d free=%d l1=%d l2=%d\n",live,free,
+ perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages) );
+ BUG();
}
}
static inline struct shadow_status* hash_bucket( struct mm_struct *m,
- unsigned int gpfn )
+ unsigned int gpfn )
{
return &(m->shadow_ht[gpfn % shadow_ht_buckets]);
}
static inline unsigned long __shadow_status( struct mm_struct *m,
- unsigned int gpfn )
+ unsigned int gpfn )
{
- struct shadow_status **ob, *b, *B = hash_bucket( m, gpfn );
+ struct shadow_status **ob, *b, *B = hash_bucket( m, gpfn );
b = B;
ob = NULL;
- SH_VVLOG("lookup gpfn=%08x bucket=%p", gpfn, b );
- shadow_audit(m,0); // if in debug mode
+ SH_VVLOG("lookup gpfn=%08x bucket=%p", gpfn, b );
+ shadow_audit(m,0); // if in debug mode
- do
+ do
+ {
+ if ( b->pfn == gpfn )
{
- if ( b->pfn == gpfn )
- {
- unsigned long t;
- struct shadow_status *x;
-
- // swap with head
- t=B->pfn; B->pfn=b->pfn; b->pfn=t;
- t=B->spfn_and_flags; B->spfn_and_flags=b->spfn_and_flags;
- b->spfn_and_flags=t;
-
- if(ob)
- { // pull to front
- *ob=b->next;
- x=B->next;
- B->next=b;
- b->next=x;
- }
- return B->spfn_and_flags;
- }
+ unsigned long t;
+ struct shadow_status *x;
+
+ // swap with head
+ t=B->pfn; B->pfn=b->pfn; b->pfn=t;
+ t=B->spfn_and_flags; B->spfn_and_flags=b->spfn_and_flags;
+ b->spfn_and_flags=t;
+
+ if(ob)
+ { // pull to front
+ *ob=b->next;
+ x=B->next;
+ B->next=b;
+ b->next=x;
+ }
+ return B->spfn_and_flags;
+ }
#if SHADOW_HASH_DEBUG
- else
- {
- if(b!=B)ASSERT(b->pfn);
- }
-#endif
- ob=&b->next;
- b=b->next;
+ else
+ {
+ if(b!=B)ASSERT(b->pfn);
}
- while (b);
+#endif
+ ob=&b->next;
+ b=b->next;
+ }
+ while (b);
- return 0;
+ return 0;
}
/* we can make this locking more fine grained e.g. per shadow page if it
anyway its probably not worth being too clever. */
static inline unsigned long get_shadow_status( struct mm_struct *m,
- unsigned int gpfn )
+ unsigned int gpfn )
{
- unsigned long res;
-
- /* If we get here, we know that this domain is running in shadow mode.
- We also know that some sort of update has happened to the underlying
- page table page: either a PTE has been updated, or the page has
- changed type. If we're in log dirty mode, we should set the approrpiate
- bit in the dirty bitmap.
- NB: the VA update path doesn't use this so needs to be handled
- independnetly.
- */
-
- if( m->shadow_mode == SHM_logdirty )
- mark_dirty( m, gpfn );
+ unsigned long res;
+
+ /* If we get here, we know that this domain is running in shadow mode.
+ We also know that some sort of update has happened to the underlying
+ page table page: either a PTE has been updated, or the page has
+ changed type. If we're in log dirty mode, we should set the approrpiate
+ bit in the dirty bitmap.
+ NB: the VA update path doesn't use this so needs to be handled
+ independnetly.
+ */
+
+ ASSERT(local_irq_is_enabled());
+ //if(spin_is_locked(&m->shadow_lock)) printk("*");
+ spin_lock(&m->shadow_lock);
+
+ if( m->shadow_mode == SHM_logdirty )
+ __mark_dirty( m, gpfn );
- spin_lock(&m->shadow_lock);
- res = __shadow_status( m, gpfn );
- if (!res) spin_unlock(&m->shadow_lock);
- return res;
+ res = __shadow_status( m, gpfn );
+ if (!res) spin_unlock(&m->shadow_lock);
+ return res;
}
static inline void put_shadow_status( struct mm_struct *m )
{
- spin_unlock(&m->shadow_lock);
+ spin_unlock(&m->shadow_lock);
}
static inline void delete_shadow_status( struct mm_struct *m,
- unsigned int gpfn )
+ unsigned int gpfn )
{
- struct shadow_status *b, *B, **ob;
+ struct shadow_status *b, *B, **ob;
- B = b = hash_bucket( m, gpfn );
+ ASSERT(spin_is_locked(&m->shadow_lock));
- SH_VVLOG("delete gpfn=%08x bucket=%p", gpfn, b );
- shadow_audit(m,0);
- ASSERT(gpfn);
+ B = b = hash_bucket( m, gpfn );
+
+ SH_VVLOG("delete gpfn=%08x bucket=%p", gpfn, b );
+ shadow_audit(m,0);
+ ASSERT(gpfn);
- if( b->pfn == gpfn )
+ if( b->pfn == gpfn )
{
- if (b->next)
- {
- struct shadow_status *D=b->next;
- b->spfn_and_flags = b->next->spfn_and_flags;
- b->pfn = b->next->pfn;
-
- b->next = b->next->next;
- D->next = m->shadow_ht_free;
- D->pfn = 0;
- D->spfn_and_flags = 0;
- m->shadow_ht_free = D;
- }
- else
- {
- b->pfn = 0;
- b->spfn_and_flags = 0;
- }
+ if (b->next)
+ {
+ struct shadow_status *D=b->next;
+ b->spfn_and_flags = b->next->spfn_and_flags;
+ b->pfn = b->next->pfn;
+
+ b->next = b->next->next;
+ D->next = m->shadow_ht_free;
+ D->pfn = 0;
+ D->spfn_and_flags = 0;
+ m->shadow_ht_free = D;
+ }
+ else
+ {
+ b->pfn = 0;
+ b->spfn_and_flags = 0;
+ }
#if SHADOW_HASH_DEBUG
- if( __shadow_status(m,gpfn) ) BUG();
- shadow_audit(m,0);
+ if( __shadow_status(m,gpfn) ) BUG();
+ shadow_audit(m,0);
#endif
- return;
+ return;
}
- ob = &b->next;
- b=b->next;
+ ob = &b->next;
+ b=b->next;
- do
+ do
+ {
+ if ( b->pfn == gpfn )
{
- if ( b->pfn == gpfn )
- {
- b->pfn = 0;
- b->spfn_and_flags = 0;
+ b->pfn = 0;
+ b->spfn_and_flags = 0;
- // b is in the list
- *ob=b->next;
- b->next = m->shadow_ht_free;
- m->shadow_ht_free = b;
+ // b is in the list
+ *ob=b->next;
+ b->next = m->shadow_ht_free;
+ m->shadow_ht_free = b;
#if SHADOW_HASH_DEBUG
- if( __shadow_status(m,gpfn) ) BUG();
+ if( __shadow_status(m,gpfn) ) BUG();
#endif
- shadow_audit(m,0);
- return;
- }
-
- ob = &b->next;
- b=b->next;
+ shadow_audit(m,0);
+ return;
}
- while (b);
- // if we got here, it wasn't in the list
+ ob = &b->next;
+ b=b->next;
+ }
+ while (b);
+
+ // if we got here, it wasn't in the list
BUG();
}
static inline void set_shadow_status( struct mm_struct *m,
- unsigned int gpfn, unsigned long s )
+ unsigned int gpfn, unsigned long s )
{
- struct shadow_status *b, *B, *extra, **fptr;
+ struct shadow_status *b, *B, *extra, **fptr;
int i;
- B = b = hash_bucket( m, gpfn );
+ ASSERT(spin_is_locked(&m->shadow_lock));
+
+ B = b = hash_bucket( m, gpfn );
ASSERT(gpfn);
//ASSERT(s);
shadow_audit(m,0);
- do
+ do
+ {
+ if ( b->pfn == gpfn )
{
- if ( b->pfn == gpfn )
- {
- b->spfn_and_flags = s;
- shadow_audit(m,0);
- return;
- }
-
- b=b->next;
+ b->spfn_and_flags = s;
+ shadow_audit(m,0);
+ return;
}
- while (b);
- // if we got here, this is an insert rather than update
+ b=b->next;
+ }
+ while (b);
+
+ // if we got here, this is an insert rather than update
ASSERT( s ); // deletes must have succeeded by here
if ( B->pfn == 0 )
- {
- // we can use this head
- ASSERT( B->next == 0 );
- B->pfn = gpfn;
- B->spfn_and_flags = s;
- shadow_audit(m,0);
- return;
- }
+ {
+ // we can use this head
+ ASSERT( B->next == 0 );
+ B->pfn = gpfn;
+ B->spfn_and_flags = s;
+ shadow_audit(m,0);
+ return;
+ }
if( unlikely(m->shadow_ht_free == NULL) )
{
- SH_LOG("allocate more shadow hashtable blocks");
+ SH_LOG("allocate more shadow hashtable blocks");
- // we need to allocate more space
- extra = kmalloc( sizeof(void*) + (shadow_ht_extra_size *
- sizeof(struct shadow_status)), GFP_KERNEL );
+ // we need to allocate more space
+ extra = kmalloc( sizeof(void*) + (shadow_ht_extra_size *
+ sizeof(struct shadow_status)), GFP_KERNEL );
- if( ! extra ) BUG(); // should be more graceful here....
+ if( ! extra ) BUG(); // should be more graceful here....
- memset( extra, 0, sizeof(void*) + (shadow_ht_extra_size *
- sizeof(struct shadow_status)) );
+ memset( extra, 0, sizeof(void*) + (shadow_ht_extra_size *
+ sizeof(struct shadow_status)) );
- m->shadow_extras_count++;
+ m->shadow_extras_count++;
- // add extras to free list
- fptr = &m->shadow_ht_free;
- for ( i=0; i<shadow_ht_extra_size; i++ )
- {
- *fptr = &extra[i];
- fptr = &(extra[i].next);
- }
- *fptr = NULL;
+ // add extras to free list
+ fptr = &m->shadow_ht_free;
+ for ( i=0; i<shadow_ht_extra_size; i++ )
+ {
+ *fptr = &extra[i];
+ fptr = &(extra[i].next);
+ }
+ *fptr = NULL;
- *((struct shadow_status ** ) &extra[shadow_ht_extra_size]) =
- m->shadow_ht_extras;
- m->shadow_ht_extras = extra;
+ *((struct shadow_status ** ) &extra[shadow_ht_extra_size]) =
+ m->shadow_ht_extras;
+ m->shadow_ht_extras = extra;
}
- // should really put this in B to go right to front
- b = m->shadow_ht_free;
+ // should really put this in B to go right to front
+ b = m->shadow_ht_free;
m->shadow_ht_free = b->next;
b->spfn_and_flags = s;
- b->pfn = gpfn;
- b->next = B->next;
- B->next = b;
+ b->pfn = gpfn;
+ b->next = B->next;
+ B->next = b;
- shadow_audit(m,0);
+ shadow_audit(m,0);
- return;
+ return;
}
static inline void __shadow_mk_pagetable( struct mm_struct *mm )
{
- unsigned long gpfn, spfn=0;
+ unsigned long gpfn, spfn=0;
- gpfn = pagetable_val(mm->pagetable) >> PAGE_SHIFT;
+ gpfn = pagetable_val(mm->pagetable) >> PAGE_SHIFT;
- if ( unlikely((spfn=__shadow_status(mm, gpfn)) == 0 ) )
- {
- spfn = shadow_l2_table(mm, gpfn );
- }
- mm->shadow_table = mk_pagetable(spfn<<PAGE_SHIFT);
+ if ( unlikely((spfn=__shadow_status(mm, gpfn)) == 0 ) )
+ {
+ spfn = shadow_l2_table(mm, gpfn );
+ }
+ mm->shadow_table = mk_pagetable(spfn<<PAGE_SHIFT);
}
static inline void shadow_mk_pagetable( struct mm_struct *mm )
{
- SH_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
- pagetable_val(mm->pagetable), mm->shadow_mode );
+ SH_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
+ pagetable_val(mm->pagetable), mm->shadow_mode );
- if ( unlikely(mm->shadow_mode) )
- {
+ if ( unlikely(mm->shadow_mode) )
+ {
+ ASSERT(local_irq_is_enabled());
spin_lock(&mm->shadow_lock);
- __shadow_mk_pagetable( mm );
+ __shadow_mk_pagetable( mm );
spin_unlock(&mm->shadow_lock);
- }
+ }
- SH_VVLOG("leaving shadow_mk_pagetable( gptbase=%08lx, mode=%d ) sh=%08lx",
- pagetable_val(mm->pagetable), mm->shadow_mode,
- pagetable_val(mm->shadow_table) );
+ SH_VVLOG("leaving shadow_mk_pagetable( gptbase=%08lx, mode=%d ) sh=%08lx",
+ pagetable_val(mm->pagetable), mm->shadow_mode,
+ pagetable_val(mm->shadow_table) );
}
#endif /* XEN_SHADOW_H */
+
+