From 6213b696ba656fbc262f00e9846bf4f10a5967cf Mon Sep 17 00:00:00 2001 From: "kaf24@freefall.cl.cam.ac.uk" Date: Thu, 7 Oct 2004 16:30:38 +0000 Subject: [PATCH] bitkeeper revision 1.1159.1.216 (41656f2ek7HkbBXpAt8AAbtJEyjlTg) Grant-table interface redone. --- .../arch/xen/kernel/gnttab.c | 135 ++--- .../include/asm-xen/gnttab.h | 4 +- .../include/asm-xen/hypervisor.h | 111 ++-- xen/arch/x86/domain.c | 2 +- xen/arch/x86/memory.c | 42 +- xen/common/grant_table.c | 479 ++++++++++-------- xen/common/kernel.c | 12 +- xen/common/page_alloc.c | 4 +- xen/include/asm-x86/mm.h | 31 +- xen/include/hypervisor-ifs/grant_table.h | 155 ++++-- xen/include/xen/grant_table.h | 59 ++- 11 files changed, 532 insertions(+), 502 deletions(-) diff --git a/linux-2.6.8.1-xen-sparse/arch/xen/kernel/gnttab.c b/linux-2.6.8.1-xen-sparse/arch/xen/kernel/gnttab.c index c76df4b1e4..af892aeb2a 100644 --- a/linux-2.6.8.1-xen-sparse/arch/xen/kernel/gnttab.c +++ b/linux-2.6.8.1-xen-sparse/arch/xen/kernel/gnttab.c @@ -33,22 +33,9 @@ EXPORT_SYMBOL(gnttab_end_foreign_access); EXPORT_SYMBOL(gnttab_grant_foreign_transfer); EXPORT_SYMBOL(gnttab_end_foreign_transfer); -struct gntent_auxinfo { - u16 write_pin, read_pin; /* reference counts */ - u16 inuse; - grant_ref_t next; /* hash chain */ -}; - #define NR_GRANT_REFS 512 - -static struct gntent_auxinfo auxtab[NR_GRANT_REFS]; +static grant_ref_t gnttab_free_list[NR_GRANT_REFS]; static grant_ref_t gnttab_free_head; -static spinlock_t gnttab_lock; - -#define HASH_INVALID (0xFFFFU) -#define GNTTAB_HASH_SZ 512 -#define GNTTAB_HASH(_f) ((_f) & (GNTTAB_HASH_SZ-1)) -static grant_ref_t gnttab_hash[GNTTAB_HASH_SZ]; static grant_entry_t *shared; @@ -56,14 +43,14 @@ static grant_entry_t *shared; * Lock-free grant-entry allocator */ -static inline grant_ref_t +static inline int get_free_entry( void) { grant_ref_t fh, nfh = gnttab_free_head; - do { fh = nfh; } + do { if ( unlikely((fh = nfh) == NR_GRANT_REFS) ) return -1; } while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh, - auxtab[fh].next)) != fh) ); + gnttab_free_list[fh])) != fh) ); return fh; } @@ -72,109 +59,55 @@ put_free_entry( grant_ref_t ref) { grant_ref_t fh, nfh = gnttab_free_head; - do { auxtab[ref].next = fh = nfh; wmb(); } + do { gnttab_free_list[ref] = fh = nfh; wmb(); } while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh, ref)) != fh) ); } /* - * Public interface functions + * Public grant-issuing interface functions */ -grant_ref_t +int gnttab_grant_foreign_access( domid_t domid, unsigned long frame, int readonly) { - unsigned long flags; - grant_ref_t ref; - - spin_lock_irqsave(&gnttab_lock, flags); - - for ( ref = gnttab_hash[GNTTAB_HASH(frame)]; - ref != HASH_INVALID; - ref = auxtab[ref].next ) - { - if ( auxtab[ref].inuse && (shared[ref].frame == frame) ) - { - if ( readonly ) - auxtab[ref].read_pin++; - else if ( auxtab[ref].write_pin++ == 0 ) - clear_bit(_GTF_readonly, (unsigned long *)&shared[ref].flags); - goto done; - } - } - - ref = get_free_entry(); - auxtab[ref].inuse = 1; - auxtab[ref].read_pin = !!readonly; - auxtab[ref].write_pin = !readonly; - auxtab[ref].next = gnttab_hash[GNTTAB_HASH(frame)]; - gnttab_hash[GNTTAB_HASH(frame)] = ref; + int ref; + + if ( unlikely((ref = get_free_entry()) == -1) ) + return -ENOSPC; shared[ref].frame = frame; shared[ref].domid = domid; wmb(); shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); - done: - spin_unlock_irqrestore(&gnttab_lock, flags); - return 0; + return ref; } void gnttab_end_foreign_access( grant_ref_t ref, int readonly) { - unsigned long flags, frame = shared[ref].frame; - grant_ref_t *pref; - u16 sflags, nsflags; - - spin_lock_irqsave(&gnttab_lock, flags); + u16 flags, nflags; - if ( readonly ) - { - if ( (auxtab[ref].read_pin-- == 0) && (auxtab[ref].write_pin == 0) ) - goto delete; - } - else if ( auxtab[ref].write_pin-- == 0 ) - { - if ( auxtab[ref].read_pin == 0 ) - goto delete; - nsflags = shared[ref].flags; - do { - if ( (sflags = nsflags) & GTF_writing ) - printk(KERN_ALERT "WARNING: g.e. still in use for writing!\n"); - } - while ( (nsflags = cmpxchg(&shared[ref].flags, sflags, - sflags | GTF_readonly)) != sflags ); - } - - goto out; - - delete: - nsflags = shared[ref].flags; + nflags = shared[ref].flags; do { - if ( (sflags = nsflags) & (GTF_reading|GTF_writing) ) + if ( (flags = nflags) & (GTF_reading|GTF_writing) ) printk(KERN_ALERT "WARNING: g.e. still in use!\n"); } - while ( (nsflags = cmpxchg(&shared[ref].flags, sflags, 0)) != sflags ); + while ( (nflags = cmpxchg(&shared[ref].flags, flags, 0)) != flags ); - pref = &gnttab_hash[GNTTAB_HASH(frame)]; - while ( *pref != ref ) - pref = &auxtab[*pref].next; - *pref = auxtab[ref].next; - - auxtab[ref].inuse = 0; put_free_entry(ref); - - out: - spin_unlock_irqrestore(&gnttab_lock, flags); } -grant_ref_t +int gnttab_grant_foreign_transfer( domid_t domid) { - grant_ref_t ref = get_free_entry(); + int ref; + + if ( unlikely((ref = get_free_entry()) == -1) ) + return -ENOSPC; shared[ref].frame = 0; shared[ref].domid = domid; @@ -210,23 +143,19 @@ gnttab_end_foreign_transfer( void __init gnttab_init(void) { - int i; - gnttab_op_t gntop; - unsigned long frame; - - spin_lock_init(&gnttab_lock); + gnttab_setup_table_t setup; + unsigned long frame; + int i; - for ( i = 0; i < GNTTAB_HASH_SZ; i++ ) - { - gnttab_hash[i] = HASH_INVALID; - auxtab[i].next = i+1; - } + for ( i = 0; i < NR_GRANT_REFS; i++ ) + gnttab_free_list[i] = i + 1; - gntop.cmd = GNTTABOP_setup_table; - gntop.u.setup_table.dom = DOMID_SELF; - gntop.u.setup_table.nr_frames = 1; - gntop.u.setup_table.frame_list = &frame; - if ( HYPERVISOR_grant_table_op(&gntop) != 0 ) + setup.dom = DOMID_SELF; + setup.nr_frames = 1; + setup.frame_list = &frame; + if ( HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0 ) + BUG(); + if ( setup.status != 0 ) BUG(); set_fixmap_ma(FIX_GNTTAB, frame << PAGE_SHIFT); diff --git a/linux-2.6.8.1-xen-sparse/include/asm-xen/gnttab.h b/linux-2.6.8.1-xen-sparse/include/asm-xen/gnttab.h index f7e6ed3f78..6e52923b17 100644 --- a/linux-2.6.8.1-xen-sparse/include/asm-xen/gnttab.h +++ b/linux-2.6.8.1-xen-sparse/include/asm-xen/gnttab.h @@ -16,7 +16,7 @@ #include #include -grant_ref_t +int gnttab_grant_foreign_access( domid_t domid, unsigned long frame, int readonly); @@ -24,7 +24,7 @@ void gnttab_end_foreign_access( grant_ref_t ref, int readonly); -grant_ref_t +int gnttab_grant_foreign_transfer( domid_t domid); diff --git a/linux-2.6.8.1-xen-sparse/include/asm-xen/hypervisor.h b/linux-2.6.8.1-xen-sparse/include/asm-xen/hypervisor.h index b2f964be2a..f625361729 100644 --- a/linux-2.6.8.1-xen-sparse/include/asm-xen/hypervisor.h +++ b/linux-2.6.8.1-xen-sparse/include/asm-xen/hypervisor.h @@ -181,7 +181,9 @@ void deallocate_lowmem_region(unsigned long vstart, unsigned long pages); * Assembler stubs for hyper-calls. */ -static inline int HYPERVISOR_set_trap_table(trap_info_t *table) +static inline int +HYPERVISOR_set_trap_table( + trap_info_t *table) { int ret; __asm__ __volatile__ ( @@ -192,8 +194,9 @@ static inline int HYPERVISOR_set_trap_table(trap_info_t *table) return ret; } -static inline int HYPERVISOR_mmu_update(mmu_update_t *req, int count, - int *success_count) +static inline int +HYPERVISOR_mmu_update( + mmu_update_t *req, int count, int *success_count) { int ret; __asm__ __volatile__ ( @@ -204,7 +207,9 @@ static inline int HYPERVISOR_mmu_update(mmu_update_t *req, int count, return ret; } -static inline int HYPERVISOR_set_gdt(unsigned long *frame_list, int entries) +static inline int +HYPERVISOR_set_gdt( + unsigned long *frame_list, int entries) { int ret; __asm__ __volatile__ ( @@ -216,7 +221,9 @@ static inline int HYPERVISOR_set_gdt(unsigned long *frame_list, int entries) return ret; } -static inline int HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp) +static inline int +HYPERVISOR_stack_switch( + unsigned long ss, unsigned long esp) { int ret; __asm__ __volatile__ ( @@ -227,7 +234,8 @@ static inline int HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp) return ret; } -static inline int HYPERVISOR_set_callbacks( +static inline int +HYPERVISOR_set_callbacks( unsigned long event_selector, unsigned long event_address, unsigned long failsafe_selector, unsigned long failsafe_address) { @@ -241,7 +249,9 @@ static inline int HYPERVISOR_set_callbacks( return ret; } -static inline int HYPERVISOR_fpu_taskswitch(void) +static inline int +HYPERVISOR_fpu_taskswitch( + void) { int ret; __asm__ __volatile__ ( @@ -251,7 +261,9 @@ static inline int HYPERVISOR_fpu_taskswitch(void) return ret; } -static inline int HYPERVISOR_yield(void) +static inline int +HYPERVISOR_yield( + void) { int ret; __asm__ __volatile__ ( @@ -262,7 +274,9 @@ static inline int HYPERVISOR_yield(void) return ret; } -static inline int HYPERVISOR_block(void) +static inline int +HYPERVISOR_block( + void) { int ret; __asm__ __volatile__ ( @@ -273,7 +287,9 @@ static inline int HYPERVISOR_block(void) return ret; } -static inline int HYPERVISOR_shutdown(void) +static inline int +HYPERVISOR_shutdown( + void) { int ret; __asm__ __volatile__ ( @@ -285,7 +301,9 @@ static inline int HYPERVISOR_shutdown(void) return ret; } -static inline int HYPERVISOR_reboot(void) +static inline int +HYPERVISOR_reboot( + void) { int ret; __asm__ __volatile__ ( @@ -297,7 +315,9 @@ static inline int HYPERVISOR_reboot(void) return ret; } -static inline int HYPERVISOR_suspend(unsigned long srec) +static inline int +HYPERVISOR_suspend( + unsigned long srec) { int ret; /* NB. On suspend, control software expects a suspend record in %esi. */ @@ -310,7 +330,9 @@ static inline int HYPERVISOR_suspend(unsigned long srec) return ret; } -static inline long HYPERVISOR_set_timer_op(u64 timeout) +static inline long +HYPERVISOR_set_timer_op( + u64 timeout) { int ret; unsigned long timeout_hi = (unsigned long)(timeout>>32); @@ -323,7 +345,9 @@ static inline long HYPERVISOR_set_timer_op(u64 timeout) return ret; } -static inline int HYPERVISOR_dom0_op(dom0_op_t *dom0_op) +static inline int +HYPERVISOR_dom0_op( + dom0_op_t *dom0_op) { int ret; dom0_op->interface_version = DOM0_INTERFACE_VERSION; @@ -335,7 +359,9 @@ static inline int HYPERVISOR_dom0_op(dom0_op_t *dom0_op) return ret; } -static inline int HYPERVISOR_set_debugreg(int reg, unsigned long value) +static inline int +HYPERVISOR_set_debugreg( + int reg, unsigned long value) { int ret; __asm__ __volatile__ ( @@ -346,7 +372,9 @@ static inline int HYPERVISOR_set_debugreg(int reg, unsigned long value) return ret; } -static inline unsigned long HYPERVISOR_get_debugreg(int reg) +static inline unsigned long +HYPERVISOR_get_debugreg( + int reg) { unsigned long ret; __asm__ __volatile__ ( @@ -357,7 +385,8 @@ static inline unsigned long HYPERVISOR_get_debugreg(int reg) return ret; } -static inline int HYPERVISOR_update_descriptor( +static inline int +HYPERVISOR_update_descriptor( unsigned long ma, unsigned long word1, unsigned long word2) { int ret; @@ -369,7 +398,9 @@ static inline int HYPERVISOR_update_descriptor( return ret; } -static inline int HYPERVISOR_set_fast_trap(int idx) +static inline int +HYPERVISOR_set_fast_trap( + int idx) { int ret; __asm__ __volatile__ ( @@ -380,10 +411,10 @@ static inline int HYPERVISOR_set_fast_trap(int idx) return ret; } -static inline int HYPERVISOR_dom_mem_op(unsigned int op, - unsigned long *extent_list, - unsigned long nr_extents, - unsigned int extent_order) +static inline int +HYPERVISOR_dom_mem_op( + unsigned int op, unsigned long *extent_list, + unsigned long nr_extents, unsigned int extent_order) { int ret; __asm__ __volatile__ ( @@ -396,7 +427,9 @@ static inline int HYPERVISOR_dom_mem_op(unsigned int op, return ret; } -static inline int HYPERVISOR_multicall(void *call_list, int nr_calls) +static inline int +HYPERVISOR_multicall( + void *call_list, int nr_calls) { int ret; __asm__ __volatile__ ( @@ -407,7 +440,8 @@ static inline int HYPERVISOR_multicall(void *call_list, int nr_calls) return ret; } -static inline int HYPERVISOR_update_va_mapping( +static inline int +HYPERVISOR_update_va_mapping( unsigned long page_nr, pte_t new_val, unsigned long flags) { int ret; @@ -426,7 +460,9 @@ static inline int HYPERVISOR_update_va_mapping( return ret; } -static inline int HYPERVISOR_event_channel_op(void *op) +static inline int +HYPERVISOR_event_channel_op( + void *op) { int ret; __asm__ __volatile__ ( @@ -437,7 +473,9 @@ static inline int HYPERVISOR_event_channel_op(void *op) return ret; } -static inline int HYPERVISOR_xen_version(int cmd) +static inline int +HYPERVISOR_xen_version( + int cmd) { int ret; __asm__ __volatile__ ( @@ -448,7 +486,9 @@ static inline int HYPERVISOR_xen_version(int cmd) return ret; } -static inline int HYPERVISOR_console_io(int cmd, int count, char *str) +static inline int +HYPERVISOR_console_io( + int cmd, int count, char *str) { int ret; __asm__ __volatile__ ( @@ -459,7 +499,9 @@ static inline int HYPERVISOR_console_io(int cmd, int count, char *str) return ret; } -static inline int HYPERVISOR_physdev_op(void *physdev_op) +static inline int +HYPERVISOR_physdev_op( + void *physdev_op) { int ret; __asm__ __volatile__ ( @@ -470,18 +512,21 @@ static inline int HYPERVISOR_physdev_op(void *physdev_op) return ret; } -static inline int HYPERVISOR_grant_table_op(void *gnttab_op) +static inline int +HYPERVISOR_grant_table_op( + unsigned int cmd, void *uop, unsigned int count) { int ret; __asm__ __volatile__ ( TRAP_INSTR : "=a" (ret) : "0" (__HYPERVISOR_grant_table_op), - "b" (gnttab_op) : "memory" ); + "b" (cmd), "c" (count), "d" (uop) : "memory" ); return ret; } -static inline int HYPERVISOR_update_va_mapping_otherdomain( +static inline int +HYPERVISOR_update_va_mapping_otherdomain( unsigned long page_nr, pte_t new_val, unsigned long flags, domid_t domid) { int ret; @@ -494,7 +539,9 @@ static inline int HYPERVISOR_update_va_mapping_otherdomain( return ret; } -static inline int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type) +static inline int +HYPERVISOR_vm_assist( + unsigned int cmd, unsigned int type) { int ret; __asm__ __volatile__ ( diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 634771f1d1..ef77bb728b 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -708,7 +708,7 @@ int construct_dom0(struct domain *p, page = &frame_table[mfn]; page->u.inuse.domain = p; page->u.inuse.type_info = 0; - page->count_info = PGC_always_set | PGC_allocated | 1; + page->count_info = PGC_allocated | 1; list_add_tail(&page->list, &p->page_list); p->tot_pages++; p->max_pages++; } diff --git a/xen/arch/x86/memory.c b/xen/arch/x86/memory.c index 42a1c9fab1..ad6059e185 100644 --- a/xen/arch/x86/memory.c +++ b/xen/arch/x86/memory.c @@ -164,9 +164,6 @@ void arch_init_memory(void) memset(percpu_info, 0, sizeof(percpu_info)); - for ( mfn = 0; mfn < max_page; mfn++ ) - frame_table[mfn].count_info |= PGC_always_set; - /* Initialise to a magic of 0x55555555 so easier to spot bugs later. */ memset(machine_to_phys_mapping, 0x55, 4<<20); @@ -193,9 +190,9 @@ void arch_init_memory(void) mfn < virt_to_phys(&machine_to_phys_mapping[1<<20])>>PAGE_SHIFT; mfn++ ) { - frame_table[mfn].count_info |= PGC_allocated | 1; - frame_table[mfn].u.inuse.type_info = PGT_gdt_page | 1; /* non-RW */ - frame_table[mfn].u.inuse.domain = dom_xen; + frame_table[mfn].count_info = PGC_allocated | 1; + frame_table[mfn].u.inuse.type_info = PGT_gdt_page | 1; /* non-RW */ + frame_table[mfn].u.inuse.domain = dom_xen; } } @@ -403,8 +400,6 @@ get_page_from_l1e( if ( unlikely(!pfn_is_ram(pfn)) ) { - /* SPECIAL CASE 1. Mapping an I/O page. */ - /* Revert to caller privileges if FD == DOMID_IO. */ if ( d == dom_io ) d = current; @@ -420,33 +415,7 @@ get_page_from_l1e( } if ( unlikely(!get_page_from_pagenr(pfn, d)) ) - { - /* SPECIAL CASE 2. Mapping a foreign page via a grant table. */ - - int rc; - struct domain *e; - u32 count_info; - /* - * Yuk! Amazingly this is the simplest way to get a guaranteed atomic - * snapshot of a 64-bit value on IA32. x86/64 solves this of course! - * Basically it's a no-op CMPXCHG, to get us the current contents. - * No need for LOCK prefix -- we know that count_info is never zero - * because it contains PGC_always_set. - */ - ASSERT(test_bit(_PGC_always_set, &page->count_info)); - __asm__ __volatile__( - "cmpxchg8b %2" - : "=d" (e), "=a" (count_info), - "=m" (*(volatile u64 *)(&page->count_info)) - : "0" (0), "1" (0), "c" (0), "b" (0) ); - if ( unlikely((count_info & PGC_count_mask) == 0) || - unlikely(e == NULL) || unlikely(!get_domain(e)) ) - return 0; - rc = gnttab_try_map( - e, d, pfn, (l1v & _PAGE_RW) ? GNTTAB_MAP_RW : GNTTAB_MAP_RO); - put_domain(e); - return rc; - } + return 0; if ( l1v & _PAGE_RW ) { @@ -510,8 +479,7 @@ static void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d) * mappings and which unmappings are counted via the grant entry, but * really it doesn't matter as privileged domains have carte blanche. */ - if ( likely(gnttab_try_map(e, d, pfn, (l1v & _PAGE_RW) ? - GNTTAB_UNMAP_RW : GNTTAB_UNMAP_RO)) ) + if ( likely(gnttab_check_unmap(e, d, pfn, !(l1v & _PAGE_RW))) ) return; /* Assume this mapping was made via MMUEXT_SET_FOREIGNDOM... */ } diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c index aa682048a6..c40f0a5ed8 100644 --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -27,65 +27,50 @@ #define PIN_FAIL(_rc, _f, _a...) \ do { \ DPRINTK( _f, ## _a ); \ - rc = -(_rc); \ - goto out; \ + rc = (_rc); \ + goto fail; \ } while ( 0 ) -static inline void -check_tlb_flush( - active_grant_entry_t *a) +static inline int +get_maptrack_handle( + grant_table_t *t) { - if ( unlikely(NEED_FLUSH(tlbflush_time[smp_processor_id()], - a->tlbflush_timestamp)) ) - { - perfc_incr(need_flush_tlb_flush); - local_flush_tlb(); - } + unsigned int h; + if ( unlikely((h = t->maptrack_head) == NR_MAPTRACK_ENTRIES) ) + return -1; + t->maptrack_head = t->maptrack[h].ref_and_flags >> MAPTRACK_REF_SHIFT; + return h; } -static void -make_entry_mappable( - grant_table_t *t, active_grant_entry_t *a) +static inline void +put_maptrack_handle( + grant_table_t *t, int handle) { - u16 *ph = &t->maphash[GNT_MAPHASH(a->frame)]; - a->next = *ph; - *ph = a - t->active; + t->maptrack[handle].ref_and_flags = t->maptrack_head << MAPTRACK_REF_SHIFT; + t->maptrack_head = handle; } static void -make_entry_unmappable( - grant_table_t *t, active_grant_entry_t *a) -{ - active_grant_entry_t *p; - u16 *ph = &t->maphash[GNT_MAPHASH(a->frame)]; - while ( (p = &t->active[*ph]) != a ) - ph = &p->next; - *ph = a->next; - a->next = GNT_MAPHASH_INVALID; - check_tlb_flush(a); -} - -static long -gnttab_update_pin_status( - gnttab_update_pin_status_t *uop) +__gnttab_map_grant_ref( + gnttab_map_grant_ref_t *uop) { domid_t dom, sdom; grant_ref_t ref; - u16 pin_flags; struct domain *ld, *rd; - u16 sflags; + u16 flags, sflags; + int handle; active_grant_entry_t *act; grant_entry_t *sha; - long rc = 0; + s16 rc = 0; unsigned long frame; /* - * We bound the number of times we retry CMPXCHG on memory locations - * that we share with a guest OS. The reason is that the guest can modify - * that location at a higher rate than we can read-modify-CMPXCHG, so - * the guest could cause us to livelock. There are a few cases - * where it is valid for the guest to race our updates (e.g., to change - * the GTF_readonly flag), so we allow a few retries before failing. + * We bound the number of times we retry CMPXCHG on memory locations that + * we share with a guest OS. The reason is that the guest can modify that + * location at a higher rate than we can read-modify-CMPXCHG, so the guest + * could cause us to livelock. There are a few cases where it is valid for + * the guest to race our updates (e.g., to change the GTF_readonly flag), + * so we allow a few retries before failing. */ int retries = 0; @@ -94,21 +79,18 @@ gnttab_update_pin_status( /* Bitwise-OR avoids short-circuiting which screws control flow. */ if ( unlikely(__get_user(dom, &uop->dom) | __get_user(ref, &uop->ref) | - __get_user(pin_flags, &uop->pin_flags)) ) + __get_user(flags, &uop->flags)) ) { - DPRINTK("Fault while reading gnttab_update_pin_status_t.\n"); - return -EFAULT; + DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n"); + return; /* don't set status */ } - pin_flags &= (GNTPIN_dev_accessible | - GNTPIN_host_accessible | - GNTPIN_readonly); - if ( unlikely(ref >= NR_GRANT_ENTRIES) || - unlikely(pin_flags == GNTPIN_readonly) ) + unlikely((flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0) ) { - DPRINTK("Bad ref (%d) or flags (%x).\n", ref, pin_flags); - return -EINVAL; + DPRINTK("Bad ref (%d) or flags (%x).\n", ref, flags); + (void)__put_user(GNTST_bad_gntref, &uop->handle); + return; } if ( unlikely((rd = find_domain_by_id(dom)) == NULL) || @@ -117,19 +99,25 @@ gnttab_update_pin_status( if ( rd != NULL ) put_domain(rd); DPRINTK("Could not find domain %d\n", dom); - return -ESRCH; + (void)__put_user(GNTST_bad_domain, &uop->handle); + return; + } + + if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) ) + { + put_domain(rd); + DPRINTK("No more map handles available\n"); + (void)__put_user(GNTST_no_device_space, &uop->handle); + return; } act = &rd->grant_table->active[ref]; sha = &rd->grant_table->shared[ref]; spin_lock(&rd->grant_table->lock); - - if ( act->status == 0 ) + + if ( act->pin == 0 ) { - if ( unlikely(pin_flags == 0) ) - goto out; - /* CASE 1: Activating a previously inactive entry. */ sflags = sha->flags; @@ -141,7 +129,7 @@ gnttab_update_pin_status( if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) || unlikely(sdom != ld->domain) ) - PIN_FAIL(EINVAL, + PIN_FAIL(GNTST_general_error, "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n", sflags, sdom, ld->domain); @@ -150,11 +138,11 @@ gnttab_update_pin_status( prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags; new_scombo = scombo | GTF_reading; - if ( !(pin_flags & GNTPIN_readonly) ) + if ( !(flags & GNTMAP_readonly) ) { new_scombo |= GTF_writing; if ( unlikely(sflags & GTF_readonly) ) - PIN_FAIL(EINVAL, + PIN_FAIL(GNTST_general_error, "Attempt to write-pin a r/o grant entry.\n"); } @@ -162,7 +150,7 @@ gnttab_update_pin_status( if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo, new_scombo)) ) - PIN_FAIL(EINVAL, + PIN_FAIL(GNTST_general_error, "Fault while modifying shared flags and domid.\n"); /* Did the combined update work (did we see what we expected?). */ @@ -170,7 +158,7 @@ gnttab_update_pin_status( break; if ( retries++ == 4 ) - PIN_FAIL(EINVAL, + PIN_FAIL(GNTST_general_error, "Shared grant entry is unstable.\n"); /* Didn't see what we expected. Split out the seen flags & dom. */ @@ -182,140 +170,226 @@ gnttab_update_pin_status( /* rmb(); */ /* not on x86 */ frame = sha->frame; if ( unlikely(!pfn_is_ram(frame)) || - unlikely(!((pin_flags & GNTPIN_readonly) ? + unlikely(!((flags & GNTMAP_readonly) ? get_page(&frame_table[frame], rd) : get_page_and_type(&frame_table[frame], rd, PGT_writable_page))) ) { clear_bit(_GTF_writing, &sha->flags); clear_bit(_GTF_reading, &sha->flags); - PIN_FAIL(EINVAL, + PIN_FAIL(GNTST_general_error, "Could not pin the granted frame!\n"); } - act->status = pin_flags; - act->domid = sdom; - act->frame = frame; - - make_entry_mappable(rd->grant_table, act); - } - else if ( pin_flags == 0 ) - { - /* CASE 2: Deactivating a previously active entry. */ - - if ( unlikely((act->status & - (GNTPIN_wmap_mask|GNTPIN_rmap_mask)) != 0) ) - PIN_FAIL(EINVAL, - "Attempt to deactiv a mapped g.e. (%x)\n", act->status); - - frame = act->frame; - if ( !(act->status & GNTPIN_readonly) ) - put_page_type(&frame_table[frame]); - put_page(&frame_table[frame]); - - act->status = 0; - make_entry_unmappable(rd->grant_table, act); - - clear_bit(_GTF_writing, &sha->flags); - clear_bit(_GTF_reading, &sha->flags); + if ( flags & GNTMAP_device_map ) + act->pin += (flags & GNTMAP_readonly) ? + GNTPIN_devr_inc : GNTPIN_devw_inc; + if ( flags & GNTMAP_host_map ) + act->pin += (flags & GNTMAP_readonly) ? + GNTPIN_hstr_inc : GNTPIN_hstw_inc; + act->domid = sdom; + act->frame = frame; } else { - /* CASE 3: Active modications to an already active entry. */ + /* CASE 2: Active modications to an already active entry. */ /* - * Check mapping counts up front, as necessary. - * After this compound check, the operation cannot fail. + * A cheesy check for possible pin-count overflow. + * A more accurate check cannot be done with a single comparison. */ - if ( ((pin_flags & (GNTPIN_readonly|GNTPIN_host_accessible)) != - GNTPIN_host_accessible) && - (unlikely((act->status & GNTPIN_wmap_mask) != 0) || - (((pin_flags & GNTPIN_host_accessible) == 0) && - unlikely((act->status & GNTPIN_rmap_mask) != 0))) ) - PIN_FAIL(EINVAL, - "Attempt to reduce pinning of a mapped g.e. (%x,%x)\n", - pin_flags, act->status); - - /* Check for changes to host accessibility. */ - if ( pin_flags & GNTPIN_host_accessible ) - { - if ( !(act->status & GNTPIN_host_accessible) ) - make_entry_mappable(rd->grant_table, act); - } - else if ( act->status & GNTPIN_host_accessible ) - make_entry_unmappable(rd->grant_table, act); + if ( (act->pin & 0x80808080U) != 0 ) + PIN_FAIL(ENOSPC, "Risk of counter overflow %08x\n", act->pin); - /* Check for changes to write accessibility. */ - if ( pin_flags & GNTPIN_readonly ) + if ( !(flags & GNTMAP_readonly) && + !((sflags = sha->flags) & GTF_writing) ) { - if ( !(act->status & GNTPIN_readonly) ) - { - put_page_type(&frame_table[act->frame]); - check_tlb_flush(act); - clear_bit(_GTF_writing, &sha->flags); - } - } - else if ( act->status & GNTPIN_readonly ) - { - sflags = sha->flags; - for ( ; ; ) { u16 prev_sflags; if ( unlikely(sflags & GTF_readonly) ) - PIN_FAIL(EINVAL, + PIN_FAIL(GNTST_general_error, "Attempt to write-pin a r/o grant entry.\n"); - if ( unlikely(!get_page_type(&frame_table[act->frame], - PGT_writable_page)) ) - PIN_FAIL(EINVAL, - "Attempt to write-pin a unwritable page.\n"); - prev_sflags = sflags; /* NB. prev_sflags is updated in place to seen value. */ if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags, prev_sflags | GTF_writing)) ) - PIN_FAIL(EINVAL, + PIN_FAIL(GNTST_general_error, "Fault while modifying shared flags.\n"); if ( likely(prev_sflags == sflags) ) break; if ( retries++ == 4 ) - PIN_FAIL(EINVAL, + PIN_FAIL(GNTST_general_error, "Shared grant entry is unstable.\n"); sflags = prev_sflags; } + + if ( unlikely(!get_page_type(&frame_table[act->frame], + PGT_writable_page)) ) + { + clear_bit(_GTF_writing, &sha->flags); + PIN_FAIL(GNTST_general_error, + "Attempt to write-pin a unwritable page.\n"); + } } - /* Update status word -- this includes device accessibility. */ - act->status &= ~(GNTPIN_dev_accessible | - GNTPIN_host_accessible | - GNTPIN_readonly); - act->status |= pin_flags; + if ( flags & GNTMAP_device_map ) + act->pin += (flags & GNTMAP_readonly) ? + GNTPIN_devr_inc : GNTPIN_devw_inc; + if ( flags & GNTMAP_host_map ) + act->pin += (flags & GNTMAP_readonly) ? + GNTPIN_hstr_inc : GNTPIN_hstw_inc; } + ld->grant_table->maptrack[handle].domid = dom; + ld->grant_table->maptrack[handle].ref_and_flags = + (ref << MAPTRACK_REF_SHIFT) | (flags & MAPTRACK_GNTMAP_MASK); + /* Unchecked and unconditional. */ - (void)__put_user(act->frame, &uop->dev_bus_addr); - (void)__put_user(act->frame, &uop->host_phys_addr); + (void)__put_user(handle, &uop->handle); + (void)__put_user(act->frame, &uop->dev_bus_addr); - out: spin_unlock(&rd->grant_table->lock); put_domain(rd); - return rc; + return; + + fail: + (void)__put_user(rc, &uop->handle); + spin_unlock(&rd->grant_table->lock); + put_domain(rd); + put_maptrack_handle(ld->grant_table, handle); +} + +static long +gnttab_map_grant_ref( + gnttab_map_grant_ref_t *uop, unsigned int count) +{ + int i; + for ( i = 0; i < count; i++ ) + __gnttab_map_grant_ref(&uop[i]); + return 0; +} + +static void +__gnttab_unmap_grant_ref( + gnttab_unmap_grant_ref_t *uop) +{ + domid_t dom; + grant_ref_t ref; + u16 handle; + struct domain *ld, *rd; + + active_grant_entry_t *act; + grant_entry_t *sha; + grant_mapping_t *map; + s16 rc = 0; + unsigned long frame, virt; + + ld = current; + + /* Bitwise-OR avoids short-circuiting which screws control flow. */ + if ( unlikely(__get_user(virt, &uop->host_virt_addr) | + __get_user(frame, &uop->dev_bus_addr) | + __get_user(handle, &uop->handle)) ) + { + DPRINTK("Fault while reading gnttab_unmap_grant_ref_t.\n"); + return; /* don't set status */ + } + + map = &ld->grant_table->maptrack[handle]; + + if ( unlikely(handle >= NR_MAPTRACK_ENTRIES) || + unlikely(!(map->ref_and_flags & MAPTRACK_GNTMAP_MASK)) ) + { + DPRINTK("Bad handle (%d).\n", handle); + (void)__put_user(GNTST_bad_handle, &uop->status); + return; + } + + dom = map->domid; + ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT; + + if ( unlikely((rd = find_domain_by_id(dom)) == NULL) || + unlikely(ld == rd) ) + { + if ( rd != NULL ) + put_domain(rd); + DPRINTK("Could not find domain %d\n", dom); + (void)__put_user(GNTST_bad_domain, &uop->status); + return; + } + + act = &rd->grant_table->active[ref]; + sha = &rd->grant_table->shared[ref]; + + spin_lock(&rd->grant_table->lock); + + if ( frame != 0 ) + { + if ( unlikely(frame != act->frame) ) + PIN_FAIL(GNTST_general_error, + "Bad frame number doesn't match gntref.\n"); + if ( map->ref_and_flags & GNTMAP_device_map ) + act->pin -= (map->ref_and_flags & GNTMAP_readonly) ? + GNTPIN_devr_inc : GNTPIN_devw_inc; + } + else + { + frame = act->frame; + } + + if ( (virt != 0) && (map->ref_and_flags & GNTMAP_host_map) ) + { + act->pin -= (map->ref_and_flags & GNTMAP_readonly) ? + GNTPIN_hstr_inc : GNTPIN_hstw_inc; + } + + if ( ((act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) == 0) && + !(map->ref_and_flags & GNTMAP_readonly) ) + { + put_page_type(&frame_table[frame]); + clear_bit(_GTF_writing, &sha->flags); + } + + if ( act->pin == 0 ) + { + put_page(&frame_table[frame]); + clear_bit(_GTF_reading, &sha->flags); + } + + fail: + (void)__put_user(rc, &uop->status); + spin_unlock(&rd->grant_table->lock); + put_domain(rd); +} + +static long +gnttab_unmap_grant_ref( + gnttab_unmap_grant_ref_t *uop, unsigned int count) +{ + int i; + for ( i = 0; i < count; i++ ) + __gnttab_unmap_grant_ref(&uop[i]); + return 0; } static long gnttab_setup_table( - gnttab_setup_table_t *uop) + gnttab_setup_table_t *uop, unsigned int count) { gnttab_setup_table_t op; struct domain *d; - if ( unlikely(__copy_from_user(&op, uop, sizeof(op)) != 0) ) + if ( count != 1 ) + return -EINVAL; + + if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) ) { DPRINTK("Fault while reading gnttab_setup_table_t.\n"); return -EFAULT; @@ -324,29 +398,33 @@ gnttab_setup_table( if ( unlikely(op.nr_frames > 1) ) { DPRINTK("Xen only supports one grant-table frame per domain.\n"); - return -EINVAL; + (void)put_user(GNTST_general_error, &uop->status); + return 0; } if ( op.dom == DOMID_SELF ) + { op.dom = current->domain; + } + else if ( unlikely(!IS_PRIV(current)) ) + { + (void)put_user(GNTST_permission_denied, &uop->status); + return 0; + } if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) ) { DPRINTK("Bad domid %d.\n", op.dom); - return -ESRCH; + (void)put_user(GNTST_bad_domain, &uop->status); + return 0; } if ( op.nr_frames == 1 ) { ASSERT(d->grant_table != NULL); - - if ( unlikely(put_user(virt_to_phys(d->grant_table) >> PAGE_SHIFT, - &op.frame_list[0])) ) - { - DPRINTK("Fault while writing frame list.\n"); - put_domain(d); - return -EFAULT; - } + (void)put_user(GNTST_okay, &uop->status); + (void)put_user(virt_to_phys(d->grant_table) >> PAGE_SHIFT, + &uop->frame_list[0]); } put_domain(d); @@ -355,22 +433,29 @@ gnttab_setup_table( long do_grant_table_op( - gnttab_op_t *uop) + unsigned int cmd, void *uop, unsigned int count) { long rc; - u32 cmd; - if ( unlikely(!access_ok(VERIFY_WRITE, uop, sizeof(*uop))) || - unlikely(__get_user(cmd, &uop->cmd)) ) - return -EFAULT; + if ( count > 512 ) + return -EINVAL; switch ( cmd ) { - case GNTTABOP_update_pin_status: - rc = gnttab_update_pin_status(&uop->u.update_pin_status); + case GNTTABOP_map_grant_ref: + if ( unlikely(!access_ok(VERIFY_WRITE, uop, + count * sizeof(gnttab_map_grant_ref_t))) ) + return -EFAULT; + rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count); + break; + case GNTTABOP_unmap_grant_ref: + if ( unlikely(!access_ok(VERIFY_WRITE, uop, + count * sizeof(gnttab_unmap_grant_ref_t))) ) + return -EFAULT; + rc = gnttab_unmap_grant_ref((gnttab_unmap_grant_ref_t *)uop, count); break; case GNTTABOP_setup_table: - rc = gnttab_setup_table(&uop->u.setup_table); + rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count); break; default: rc = -ENOSYS; @@ -381,66 +466,10 @@ do_grant_table_op( } int -gnttab_try_map( - struct domain *rd, struct domain *ld, unsigned long frame, int op) +gnttab_check_unmap( + struct domain *rd, struct domain *ld, unsigned long frame, int readonly) { - grant_table_t *t; - active_grant_entry_t *a; - u16 *ph, h; - - if ( unlikely((t = rd->grant_table) == NULL) ) - return 0; - - spin_lock(&t->lock); - - ph = &t->maphash[GNT_MAPHASH(frame)]; - while ( (h = *ph) != GNT_MAPHASH_INVALID ) - { - if ( (a = &t->active[*ph])->frame != frame ) - goto found; - ph = &a->next; - } - - fail: - spin_unlock(&t->lock); return 0; - - found: - if ( !(a->status & GNTPIN_host_accessible) ) - goto fail; - - switch ( op ) - { - case GNTTAB_MAP_RO: - if ( (a->status & GNTPIN_rmap_mask) == GNTPIN_rmap_mask ) - goto fail; - a->status += 1 << GNTPIN_rmap_shift; - break; - - case GNTTAB_MAP_RW: - if ( (a->status & GNTPIN_wmap_mask) == GNTPIN_wmap_mask ) - goto fail; - a->status += 1 << GNTPIN_wmap_shift; - break; - - case GNTTAB_UNMAP_RO: - if ( (a->status & GNTPIN_rmap_mask) == 0 ) - goto fail; - a->status -= 1 << GNTPIN_rmap_shift; - break; - - case GNTTAB_UNMAP_RW: - if ( (a->status & GNTPIN_wmap_mask) == 0 ) - goto fail; - a->status -= 1 << GNTPIN_wmap_shift; - break; - - default: - BUG(); - } - - spin_unlock(&t->lock); - return 1; } int @@ -529,21 +558,24 @@ grant_table_create( grant_table_t *t; int i; - if ( (t = xmalloc(sizeof(grant_table_t))) == NULL ) + if ( (t = xmalloc(sizeof(*t))) == NULL ) goto no_mem; /* Simple stuff. */ - t->shared = NULL; - t->active = NULL; + memset(t, 0, sizeof(*t)); spin_lock_init(&t->lock); - for ( i = 0; i < GNT_MAPHASH_SZ; i++ ) - t->maphash[i] = GNT_MAPHASH_INVALID; /* Active grant-table page. */ if ( (t->active = xmalloc(sizeof(active_grant_entry_t) * NR_GRANT_ENTRIES)) == NULL ) goto no_mem; + if ( (t->maptrack = (void *)alloc_xenheap_page()) == NULL ) + goto no_mem; + memset(t->maptrack, 0, PAGE_SIZE); + for ( i = 0; i < NR_MAPTRACK_ENTRIES; i++ ) + t->maptrack[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT; + /* Set up shared grant-table page. */ if ( (t->shared = (void *)alloc_xenheap_page()) == NULL ) goto no_mem; @@ -560,6 +592,8 @@ grant_table_create( { if ( t->active != NULL ) xfree(t->active); + if ( t->maptrack != NULL ) + free_xenheap_page((unsigned long)t->maptrack); xfree(t); } return -ENOMEM; @@ -576,6 +610,7 @@ grant_table_destroy( /* Free memory relating to this grant table. */ d->grant_table = NULL; free_xenheap_page((unsigned long)t->shared); + free_xenheap_page((unsigned long)t->maptrack); xfree(t->active); xfree(t); } diff --git a/xen/common/kernel.c b/xen/common/kernel.c index 7b787a5536..7ec89ae314 100644 --- a/xen/common/kernel.c +++ b/xen/common/kernel.c @@ -297,19 +297,9 @@ void cmain(multiboot_info_t *mbi) xmem_cache_init(); xmem_cache_sizes_init(max_page); - /* - * Create a domain-structure allocator. The SLAB_NO_REAP flag is essential! - * This is because in some situations a domain's reference count will be - * incremented by someone with no other handle on the structure -- this is - * inherently racey because the struct could be freed by the time that the - * count is incremented. By specifying 'no-reap' we ensure that, worst - * case, they increment some other domain's count, rather than corrupting - * a random field in a random structure! - * See, for example, arch/x86/memory.c:get_page_from_l1e(). - */ domain_struct_cachep = xmem_cache_create( "domain_cache", sizeof(struct domain), - 0, SLAB_HWCACHE_ALIGN | SLAB_NO_REAP, NULL, NULL); + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if ( domain_struct_cachep == NULL ) panic("No slab cache for task structs."); diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index 7e793d4607..f9b0da6304 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -311,7 +311,7 @@ unsigned long alloc_xenheap_pages(int order) for ( i = 0; i < (1 << order); i++ ) { - pg[i].count_info = PGC_always_set; + pg[i].count_info = 0; pg[i].u.inuse.domain = NULL; pg[i].u.inuse.type_info = 0; } @@ -384,7 +384,7 @@ struct pfn_info *alloc_domheap_pages(struct domain *d, int order) } } - pg[i].count_info = PGC_always_set; + pg[i].count_info = 0; pg[i].u.inuse.domain = NULL; pg[i].u.inuse.type_info = 0; } diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 24a129d616..83e59a060b 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -70,33 +70,30 @@ struct pfn_info #define PGT_type_mask (7<<29) /* Bits 29-31. */ /* Has this page been validated for use as its current type? */ #define _PGT_validated 28 -#define PGT_validated (1<<_PGT_validated) +#define PGT_validated (1U<<_PGT_validated) /* Owning guest has pinned this page to its current type? */ #define _PGT_pinned 27 -#define PGT_pinned (1<<_PGT_pinned) +#define PGT_pinned (1U<<_PGT_pinned) /* The 10 most significant bits of virt address if this is a page table. */ #define PGT_va_shift 17 -#define PGT_va_mask (((1<<10)-1)<page_alloc_lock); \ /* _dom holds an allocation reference */ \ - ASSERT((_pfn)->count_info == PGC_always_set); \ + ASSERT((_pfn)->count_info == 0); \ (_pfn)->count_info |= PGC_allocated | 1; \ if ( unlikely((_dom)->xenheap_pages++ == 0) ) \ get_knownalive_domain(_dom); \ diff --git a/xen/include/hypervisor-ifs/grant_table.h b/xen/include/hypervisor-ifs/grant_table.h index 17fb28d6a0..cb48c1f3b9 100644 --- a/xen/include/hypervisor-ifs/grant_table.h +++ b/xen/include/hypervisor-ifs/grant_table.h @@ -24,7 +24,10 @@ * * Introducing a valid entry into the grant table: * 1. Write ent->domid. - * 2. Write ent->frame (to zero if installing GTF_accept_transfer). + * 2. Write ent->frame: + * GTF_permit_access: Frame to which access is permitted. + * GTF_accept_transfer: Pseudo-phys frame slot being filled by new + * frame, or zero if none. * 3. Write memory barrier (WMB). * 4. Write ent->flags, inc. valid type. * @@ -49,10 +52,11 @@ * [*] If GTF_transfer_committed is set then the grant entry is 'committed'. * The guest must /not/ modify the grant entry until the address of the * transferred frame is written. It is safe for the guest to spin waiting - * for this to occur (detect by observing non-zero value in ent->frame). + * for this to occur (detect by observing GTF_transfer_completed in + * ent->flags). * * Invalidating a committed GTF_accept_transfer entry: - * 1. Wait for ent->frame != 0. + * 1. Wait for (ent->flags & GTF_transfer_completed). * * Changing a GTF_permit_access from writable to read-only: * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing. @@ -86,10 +90,10 @@ typedef struct { * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame * to this guest. Xen writes the page number to @frame. */ -#define GTF_invalid (0<<0) -#define GTF_permit_access (1<<0) -#define GTF_accept_transfer (2<<0) -#define GTF_type_mask (3<<0) +#define GTF_invalid (0U<<0) +#define GTF_permit_access (1U<<0) +#define GTF_accept_transfer (2U<<0) +#define GTF_type_mask (3U<<0) /* * Subflags for GTF_permit_access. @@ -98,23 +102,26 @@ typedef struct { * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] */ #define _GTF_readonly (2) -#define GTF_readonly (1<<_GTF_readonly) +#define GTF_readonly (1U<<_GTF_readonly) #define _GTF_reading (3) -#define GTF_reading (1<<_GTF_reading) +#define GTF_reading (1U<<_GTF_reading) #define _GTF_writing (4) -#define GTF_writing (1<<_GTF_writing) +#define GTF_writing (1U<<_GTF_writing) /* * Subflags for GTF_accept_transfer: * GTF_transfer_committed: Xen sets this flag to indicate that it is committed * to transferring ownership of a page frame. When a guest sees this flag - * it must /not/ modify the grant entry until the address of the - * transferred frame is written into the entry. - * NB. It is safe for the guest to spin-wait on the frame address: - * Xen will always write the frame address in a timely manner. + * it must /not/ modify the grant entry until GTF_transfer_completed is + * set by Xen. + * GTF_transfer_completed: It is safe for the guest to spin-wait on this flag + * after reading GTF_transfer_committed. Xen will always write the frame + * address, followed by ORing this flag, in a timely manner. */ #define _GTF_transfer_committed (2) -#define GTF_transfer_committed (1<<_GTF_transfer_committed) +#define GTF_transfer_committed (1U<<_GTF_transfer_committed) +#define _GTF_transfer_completed (3) +#define GTF_transfer_completed (1U<<_GTF_transfer_completed) /*********************************** @@ -127,28 +134,56 @@ typedef struct { typedef u16 grant_ref_t; /* - * GNTTABOP_update_pin_status: Change the pin status of of 's grant entry - * with reference . + * GNTTABOP_map_grant_ref: Map the grant entry (,) for access + * by devices and/or host CPUs. If successful, is a tracking number + * that must be presented later to destroy the mapping(s). On error, + * is a negative status code. * NOTES: - * 1. If GNTPIN_dev_accessible is specified then is the address + * 1. If GNTPIN_map_for_dev is specified then is the address * via which I/O devices may access the granted frame. - * 2. If GNTPIN_host_accessible is specified then is the - * physical address of the frame, which may be mapped into the caller's - * page tables. + * 2. If GNTPIN_map_for_host is specified then a mapping will be added at + * virtual address in the current address space. + * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a + * host mapping is destroyed by other means then it is *NOT* guaranteed + * to be accounted to the correct grant reference! */ -#define GNTTABOP_update_pin_status 0 +#define GNTTABOP_map_grant_ref 0 typedef struct { /* IN parameters. */ - domid_t dom; /* 0 */ - grant_ref_t ref; /* 2 */ - u16 pin_flags; /* 4 */ - u16 __pad; /* 6 */ + memory_t host_virt_addr; /* 0 */ + MEMORY_PADDING; + domid_t dom; /* 8 */ + grant_ref_t ref; /* 10 */ + u16 flags; /* 12: GNTMAP_* */ /* OUT parameters. */ - memory_t dev_bus_addr; /* 8 */ + s16 handle; /* 14: +ve: handle; -ve: GNTST_* */ + memory_t dev_bus_addr; /* 16 */ MEMORY_PADDING; - memory_t host_phys_addr; /* 12 */ +} PACKED gnttab_map_grant_ref_t; /* 24 bytes */ + +/* + * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings + * tracked by . If or is zero, that + * field is ignored. If non-zero, they must refer to a device/host mapping + * that is tracked by + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by . + * 3. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +#define GNTTABOP_unmap_grant_ref 1 +typedef struct { + /* IN parameters. */ + memory_t host_virt_addr; /* 0 */ MEMORY_PADDING; -} PACKED gnttab_update_pin_status_t; /* 16 bytes */ + memory_t dev_bus_addr; /* 8 */ + MEMORY_PADDING; + u16 handle; /* 16 */ + /* OUT parameters. */ + s16 status; /* 18: GNTST_* */ + u32 __pad; +} PACKED gnttab_unmap_grant_ref_t; /* 24 bytes */ /* * GNTTABOP_setup_table: Set up a grant table for comprising at least @@ -159,38 +194,58 @@ typedef struct { * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. * 3. Xen may not support more than a single grant-table page per domain. */ -#define GNTTABOP_setup_table 1 +#define GNTTABOP_setup_table 2 typedef struct { /* IN parameters. */ domid_t dom; /* 0 */ u16 nr_frames; /* 2 */ - u32 __pad; + u16 __pad; /* OUT parameters. */ + s16 status; /* 6: GNTST_* */ unsigned long *frame_list; /* 8 */ MEMORY_PADDING; } PACKED gnttab_setup_table_t; /* 16 bytes */ -typedef struct { - u32 cmd; /* GNTTABOP_* */ /* 0 */ - u32 __reserved; /* 4 */ - union { /* 8 */ - gnttab_update_pin_status_t update_pin_status; - gnttab_setup_table_t setup_table; - u8 __dummy[16]; - } PACKED u; -} PACKED gnttab_op_t; /* 24 bytes */ - /* - * Bitfield values for . + * Bitfield values for update_pin_status.flags. */ - /* Pin the grant entry for access by I/O devices. */ -#define _GNTPIN_dev_accessible (0) -#define GNTPIN_dev_accessible (1<<_GNTPIN_dev_accessible) - /* Pin the grant entry for access by host CPUs. */ -#define _GNTPIN_host_accessible (1) -#define GNTPIN_host_accessible (1<<_GNTPIN_host_accessible) + /* Map the grant entry for access by I/O devices. */ +#define _GNTMAP_device_map (0) +#define GNTMAP_device_map (1<<_GNTMAP_device_map) + /* Map the grant entry for access by host CPUs. */ +#define _GNTMAP_host_map (1) +#define GNTMAP_host_map (1<<_GNTMAP_host_map) /* Accesses to the granted frame will be restricted to read-only access. */ -#define _GNTPIN_readonly (2) -#define GNTPIN_readonly (1<<_GNTPIN_readonly) +#define _GNTMAP_readonly (2) +#define GNTMAP_readonly (1<<_GNTMAP_readonly) + /* + * GNTMAP_host_map subflag: + * 0 => The host mapping is usable only by the guest OS. + * 1 => The host mapping is usable by guest OS + current application. + */ +#define _GNTMAP_application_map (3) +#define GNTMAP_application_map (1<<_GNTMAP_application_map) + +/* + * Values for error status returns. All errors are -ve. + */ +#define GNTST_okay (0) +#define GNTST_general_error (-1) /* General undefined error. */ +#define GNTST_bad_domain (-2) /* Unrecognsed domain id. */ +#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */ +#define GNTST_bad_handle (-3) /* Unrecognised or inappropriate handle. */ +#define GNTST_no_device_space (-4) /* Out of space in I/O MMU. */ +#define GNTST_permission_denied (-5) /* Not enough privilege for operation. */ + +#define GNTTABOP_error_msgs { \ + "okay", \ + "undefined error", \ + "unrecognised domain id", \ + "invalid grant reference", \ + "invalid mapping handle", \ + "no spare translation slot in the I/O MMU", \ + "permission denied" \ +} + #endif /* __HYPERVISOR_IFS_GRANT_TABLE_H__ */ diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h index 52c7e828d7..bbacae6e4c 100644 --- a/xen/include/xen/grant_table.h +++ b/xen/include/xen/grant_table.h @@ -30,40 +30,53 @@ /* Active grant entry - used for shadowing GTF_permit_access grants. */ typedef struct { - u32 status; /* Reference count information. */ - u32 tlbflush_timestamp; /* Flush avoidance. */ - u16 next; /* Mapping hash chain. */ + u32 pin; /* Reference count information. */ domid_t domid; /* Domain being granted access. */ unsigned long frame; /* Frame being granted. */ } active_grant_entry_t; -/* - * Bitfields in active_grant_entry_t:counts. - * NB. Some other GNTPIN_xxx definitions are in hypervisor-ifs/grant_table.h. - */ /* Count of writable host-CPU mappings. */ -#define GNTPIN_wmap_shift (4) -#define GNTPIN_wmap_mask (0x3FFFU << GNTPIN_wmap_shift) +#define GNTPIN_hstw_shift (0) +#define GNTPIN_hstw_inc (1 << GNTPIN_hstw_shift) +#define GNTPIN_hstw_mask (0xFFU << GNTPIN_hstw_shift) /* Count of read-only host-CPU mappings. */ -#define GNTPIN_rmap_shift (18) -#define GNTPIN_rmap_mask (0x3FFFU << GNTPIN_rmap_shift) - -#define GNT_MAPHASH_SZ (256) -#define GNT_MAPHASH(_k) ((_k) & (GNT_MAPHASH_SZ-1)) -#define GNT_MAPHASH_INVALID (0xFFFFU) +#define GNTPIN_hstr_shift (8) +#define GNTPIN_hstr_inc (1 << GNTPIN_hstr_shift) +#define GNTPIN_hstr_mask (0xFFU << GNTPIN_hstr_shift) + /* Count of writable device-bus mappings. */ +#define GNTPIN_devw_shift (16) +#define GNTPIN_devw_inc (1 << GNTPIN_devw_shift) +#define GNTPIN_devw_mask (0xFFU << GNTPIN_devw_shift) + /* Count of read-only device-bus mappings. */ +#define GNTPIN_devr_shift (24) +#define GNTPIN_devr_inc (1 << GNTPIN_devr_shift) +#define GNTPIN_devr_mask (0xFFU << GNTPIN_devr_shift) #define NR_GRANT_ENTRIES (PAGE_SIZE / sizeof(grant_entry_t)) +/* + * Tracks a mapping of another domain's grant reference. Each domain has a + * table of these, indexes into which are returned as a 'mapping handle'. + */ +typedef struct { + u16 ref_and_flags; /* 0-2: GNTMAP_* ; 3-15: grant ref */ + domid_t domid; /* granting domain */ +} grant_mapping_t; +#define MAPTRACK_GNTMAP_MASK 7 +#define MAPTRACK_REF_SHIFT 3 +#define NR_MAPTRACK_ENTRIES (PAGE_SIZE / sizeof(grant_mapping_t)) + /* Per-domain grant information. */ typedef struct { /* Shared grant table (see include/hypervisor-ifs/grant_table.h). */ grant_entry_t *shared; /* Active grant table. */ active_grant_entry_t *active; - /* Lock protecting updates to maphash and shared grant table. */ + /* Mapping tracking table. */ + grant_mapping_t *maptrack; + unsigned int maptrack_head; + /* Lock protecting updates to active and shared grant tables. */ spinlock_t lock; - /* Hash table: frame -> active grant entry. */ - u16 maphash[GNT_MAPHASH_SZ]; } grant_table_t; /* Start-of-day system initialisation. */ @@ -76,13 +89,9 @@ int grant_table_create( void grant_table_destroy( struct domain *d); -/* Create/destroy host-CPU mappings via a grant-table entry. */ -#define GNTTAB_MAP_RO 0 -#define GNTTAB_MAP_RW 1 -#define GNTTAB_UNMAP_RO 2 -#define GNTTAB_UNMAP_RW 3 -int gnttab_try_map( - struct domain *rd, struct domain *ld, unsigned long frame, int op); +/* Destroy host-CPU mappings via a grant-table entry. */ +int gnttab_check_unmap( + struct domain *rd, struct domain *ld, unsigned long frame, int readonly); /* * Check that the given grant reference (rd,ref) allows 'ld' to transfer -- 2.30.2