From: kaf24@scramble.cl.cam.ac.uk Date: Tue, 23 Mar 2004 10:40:28 +0000 (+0000) Subject: bitkeeper revision 1.823 (4060141cL2UWa7gkwwnmGJlHbv0sAA) X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~18298 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=e08c905aad06e3a49269d376169be3180dcdd4e4;p=xen.git bitkeeper revision 1.823 (4060141cL2UWa7gkwwnmGJlHbv0sAA) Many files: xeno -> xen renames. mvdir --- diff --git a/.rootkeys b/.rootkeys index 933bfc3d28..a022ce035e 100644 --- a/.rootkeys +++ b/.rootkeys @@ -605,86 +605,86 @@ 3eb3c87fS7DNbg0i6yhFs28UIqAK5g xen/tools/figlet/xen.flf 3f05a939TA3SLPY7ZiScMotLjg9owQ xenolinux-2.4.25-sparse/Documentation/Configure.help 3e5a4e6589G-U42lFKs43plskXoFxQ xenolinux-2.4.25-sparse/Makefile -3e5a4e65IEPjnWPZ5w3TxS5scV8Ewg xenolinux-2.4.25-sparse/arch/xeno/Makefile -3e5a4e65n-KhsEAs-A4ULiStBp-r6w xenolinux-2.4.25-sparse/arch/xeno/boot/Makefile -3e5a4e65OV_j_DBtjzt5vej771AJsA xenolinux-2.4.25-sparse/arch/xeno/config.in -3e5a4e65TNEycLeXqPSXQJQm_xGecA xenolinux-2.4.25-sparse/arch/xeno/defconfig -3e6377f5xwPfYZkPHPrDbEq1PRN7uQ xenolinux-2.4.25-sparse/arch/xeno/drivers/balloon/Makefile -3e6377f8Me8IqtvEhb70XFgOvqQH7A xenolinux-2.4.25-sparse/arch/xeno/drivers/balloon/balloon.c -3e5a4e65iHEuC5sjFhj42XALYbLVRw xenolinux-2.4.25-sparse/arch/xeno/drivers/block/Makefile -3e5a4e65pP5spJErBW69pJxSSdK9RA xenolinux-2.4.25-sparse/arch/xeno/drivers/block/block.c -3e67f822FOPwqHiaRKbrskgWgoNL5g xenolinux-2.4.25-sparse/arch/xeno/drivers/block/block.h -3e676eb5RXnHzSHgA1BvM0B1aIm4qg xenolinux-2.4.25-sparse/arch/xeno/drivers/block/vbd.c -3e5a4e65G3e2s0ghPMgiJ-gBTUJ0uQ xenolinux-2.4.25-sparse/arch/xeno/drivers/console/Makefile -3e5a4e651TH-SXHoufurnWjgl5bfOA xenolinux-2.4.25-sparse/arch/xeno/drivers/console/console.c -3e5a4e656nfFISThfbyXQOA6HN6YHw xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/Makefile -3e5a4e65BXtftInNHUC2PjDfPhdZZA xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/core.c -3e5a4e65gfn_ltB8ujHMVFApnTTNRQ xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/vfr.c -40420a6ebRqDjufoN1WSJvolEW2Wjw xenolinux-2.4.25-sparse/arch/xeno/drivers/evtchn/Makefile -40420a73Wou6JlsZDiu6YwjYomsm7A xenolinux-2.4.25-sparse/arch/xeno/drivers/evtchn/evtchn.c -3e5a4e65gZBRBB6RsSVg1c9iahigAw xenolinux-2.4.25-sparse/arch/xeno/drivers/network/Makefile -3e5a4e65ZxKrbFetVB84JhrTyZ1YuQ xenolinux-2.4.25-sparse/arch/xeno/drivers/network/network.c -405853f2wg7JXZJNltspMwOZJklxgw xenolinux-2.4.25-sparse/arch/xeno/drivers/vnetif/Makefile -405853f6nbeazrNyEWNHBuoSg2PiPA xenolinux-2.4.25-sparse/arch/xeno/drivers/vnetif/vnetif.c -3e5a4e65lWzkiPXsZdzPt2RNnJGG1g xenolinux-2.4.25-sparse/arch/xeno/kernel/Makefile -3e5a4e65_hqfuxtGG8IUy6wRM86Ecg xenolinux-2.4.25-sparse/arch/xeno/kernel/entry.S -3e5a4e65Hy_1iUvMTPsNqGNXd9uFpg xenolinux-2.4.25-sparse/arch/xeno/kernel/head.S -3e5a4e65ibVQmwlOn0j3sVH_j_6hAg xenolinux-2.4.25-sparse/arch/xeno/kernel/hypervisor.c -3e5a4e65RMGcuA-HCn3-wNx3fFQwdg xenolinux-2.4.25-sparse/arch/xeno/kernel/i386_ksyms.c -3e5a4e65MEvZhlr070sK5JsfAQlv7Q xenolinux-2.4.25-sparse/arch/xeno/kernel/ioport.c -3e5a4e653U6cELGv528IxOLHvCq8iA xenolinux-2.4.25-sparse/arch/xeno/kernel/irq.c -3e5a4e65muT6SU3ck47IP87Q7Ti5hA xenolinux-2.4.25-sparse/arch/xeno/kernel/ldt.c -4051db84bZeRX7a_Kh6VyyDuT5FOIg xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-dma.c -4051db89iiHs38tWGkoW_RukNyaBHw xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-i386.c -4051db8dJYX86ZCLA-WfTW2dAyrehw xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-i386.h -4051db91BenvDZEMZxQCGkQyJYoG5w xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-irq.c -4051db95N9N99FjsRwi49YKUNHWI8A xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-pc.c -4051db99fbdTHgCpjywPCp7vjLCe7Q xenolinux-2.4.25-sparse/arch/xeno/kernel/physirq.c -3e5a4e65IGt3WwQDNiL4h-gYWgNTWQ xenolinux-2.4.25-sparse/arch/xeno/kernel/process.c -3e5a4e66tR-qJMLj3MppcKqmvuI2XQ xenolinux-2.4.25-sparse/arch/xeno/kernel/setup.c -3e5a4e66fWSTagLGU2P8BGFGRjhDiw xenolinux-2.4.25-sparse/arch/xeno/kernel/signal.c -3e5a4e66N__lUXNwzQ-eADRzK9LXuQ xenolinux-2.4.25-sparse/arch/xeno/kernel/time.c -3e5a4e66aHCbQ_F5QZ8VeyikLmuRZQ xenolinux-2.4.25-sparse/arch/xeno/kernel/traps.c -3e5a4e66-9_NczrVMbuQkoSLyXckIw xenolinux-2.4.25-sparse/arch/xeno/lib/Makefile -3e5a4e6637ZDk0BvFEC-aFQs599-ng xenolinux-2.4.25-sparse/arch/xeno/lib/delay.c -3f68905cF5i8-NYpIhGjKmh0y8Gu5g xenolinux-2.4.25-sparse/arch/xeno/lib/xeno_proc.c -3e5a4e66croVgpcJyJuF2ycQw0HuJw xenolinux-2.4.25-sparse/arch/xeno/mm/Makefile -3e5a4e66l8Q5Tv-6B3lQIRmaVbFPzg xenolinux-2.4.25-sparse/arch/xeno/mm/fault.c -3e5a4e668SE9rixq4ahho9rNhLUUFQ xenolinux-2.4.25-sparse/arch/xeno/mm/hypervisor.c -3e5a4e661gLzzff25pJooKIIWe7IWg xenolinux-2.4.25-sparse/arch/xeno/mm/init.c -3f0bed43UUdQichXAiVNrjV-y2Kzcg xenolinux-2.4.25-sparse/arch/xeno/mm/ioremap.c -3e5a4e66qRlSTcjafidMB6ulECADvg xenolinux-2.4.25-sparse/arch/xeno/vmlinux.lds +3e5a4e65IEPjnWPZ5w3TxS5scV8Ewg xenolinux-2.4.25-sparse/arch/xen/Makefile +3e5a4e65n-KhsEAs-A4ULiStBp-r6w xenolinux-2.4.25-sparse/arch/xen/boot/Makefile +3e5a4e65OV_j_DBtjzt5vej771AJsA xenolinux-2.4.25-sparse/arch/xen/config.in +3e5a4e65TNEycLeXqPSXQJQm_xGecA xenolinux-2.4.25-sparse/arch/xen/defconfig +3e6377f5xwPfYZkPHPrDbEq1PRN7uQ xenolinux-2.4.25-sparse/arch/xen/drivers/balloon/Makefile +3e6377f8Me8IqtvEhb70XFgOvqQH7A xenolinux-2.4.25-sparse/arch/xen/drivers/balloon/balloon.c +3e5a4e65iHEuC5sjFhj42XALYbLVRw xenolinux-2.4.25-sparse/arch/xen/drivers/block/Makefile +3e5a4e65pP5spJErBW69pJxSSdK9RA xenolinux-2.4.25-sparse/arch/xen/drivers/block/block.c +3e67f822FOPwqHiaRKbrskgWgoNL5g xenolinux-2.4.25-sparse/arch/xen/drivers/block/block.h +3e676eb5RXnHzSHgA1BvM0B1aIm4qg xenolinux-2.4.25-sparse/arch/xen/drivers/block/vbd.c +3e5a4e65G3e2s0ghPMgiJ-gBTUJ0uQ xenolinux-2.4.25-sparse/arch/xen/drivers/console/Makefile +3e5a4e651TH-SXHoufurnWjgl5bfOA xenolinux-2.4.25-sparse/arch/xen/drivers/console/console.c +3e5a4e656nfFISThfbyXQOA6HN6YHw xenolinux-2.4.25-sparse/arch/xen/drivers/dom0/Makefile +3e5a4e65BXtftInNHUC2PjDfPhdZZA xenolinux-2.4.25-sparse/arch/xen/drivers/dom0/core.c +3e5a4e65gfn_ltB8ujHMVFApnTTNRQ xenolinux-2.4.25-sparse/arch/xen/drivers/dom0/vfr.c +40420a6ebRqDjufoN1WSJvolEW2Wjw xenolinux-2.4.25-sparse/arch/xen/drivers/evtchn/Makefile +40420a73Wou6JlsZDiu6YwjYomsm7A xenolinux-2.4.25-sparse/arch/xen/drivers/evtchn/evtchn.c +3e5a4e65gZBRBB6RsSVg1c9iahigAw xenolinux-2.4.25-sparse/arch/xen/drivers/network/Makefile +3e5a4e65ZxKrbFetVB84JhrTyZ1YuQ xenolinux-2.4.25-sparse/arch/xen/drivers/network/network.c +405853f2wg7JXZJNltspMwOZJklxgw xenolinux-2.4.25-sparse/arch/xen/drivers/vnetif/Makefile +405853f6nbeazrNyEWNHBuoSg2PiPA xenolinux-2.4.25-sparse/arch/xen/drivers/vnetif/vnetif.c +3e5a4e65lWzkiPXsZdzPt2RNnJGG1g xenolinux-2.4.25-sparse/arch/xen/kernel/Makefile +3e5a4e65_hqfuxtGG8IUy6wRM86Ecg xenolinux-2.4.25-sparse/arch/xen/kernel/entry.S +3e5a4e65Hy_1iUvMTPsNqGNXd9uFpg xenolinux-2.4.25-sparse/arch/xen/kernel/head.S +3e5a4e65ibVQmwlOn0j3sVH_j_6hAg xenolinux-2.4.25-sparse/arch/xen/kernel/hypervisor.c +3e5a4e65RMGcuA-HCn3-wNx3fFQwdg xenolinux-2.4.25-sparse/arch/xen/kernel/i386_ksyms.c +3e5a4e65MEvZhlr070sK5JsfAQlv7Q xenolinux-2.4.25-sparse/arch/xen/kernel/ioport.c +3e5a4e653U6cELGv528IxOLHvCq8iA xenolinux-2.4.25-sparse/arch/xen/kernel/irq.c +3e5a4e65muT6SU3ck47IP87Q7Ti5hA xenolinux-2.4.25-sparse/arch/xen/kernel/ldt.c +4051db84bZeRX7a_Kh6VyyDuT5FOIg xenolinux-2.4.25-sparse/arch/xen/kernel/pci-dma.c +4051db89iiHs38tWGkoW_RukNyaBHw xenolinux-2.4.25-sparse/arch/xen/kernel/pci-i386.c +4051db8dJYX86ZCLA-WfTW2dAyrehw xenolinux-2.4.25-sparse/arch/xen/kernel/pci-i386.h +4051db91BenvDZEMZxQCGkQyJYoG5w xenolinux-2.4.25-sparse/arch/xen/kernel/pci-irq.c +4051db95N9N99FjsRwi49YKUNHWI8A xenolinux-2.4.25-sparse/arch/xen/kernel/pci-pc.c +4051db99fbdTHgCpjywPCp7vjLCe7Q xenolinux-2.4.25-sparse/arch/xen/kernel/physirq.c +3e5a4e65IGt3WwQDNiL4h-gYWgNTWQ xenolinux-2.4.25-sparse/arch/xen/kernel/process.c +3e5a4e66tR-qJMLj3MppcKqmvuI2XQ xenolinux-2.4.25-sparse/arch/xen/kernel/setup.c +3e5a4e66fWSTagLGU2P8BGFGRjhDiw xenolinux-2.4.25-sparse/arch/xen/kernel/signal.c +3e5a4e66N__lUXNwzQ-eADRzK9LXuQ xenolinux-2.4.25-sparse/arch/xen/kernel/time.c +3e5a4e66aHCbQ_F5QZ8VeyikLmuRZQ xenolinux-2.4.25-sparse/arch/xen/kernel/traps.c +3e5a4e66-9_NczrVMbuQkoSLyXckIw xenolinux-2.4.25-sparse/arch/xen/lib/Makefile +3e5a4e6637ZDk0BvFEC-aFQs599-ng xenolinux-2.4.25-sparse/arch/xen/lib/delay.c +3f68905cF5i8-NYpIhGjKmh0y8Gu5g xenolinux-2.4.25-sparse/arch/xen/lib/xeno_proc.c +3e5a4e66croVgpcJyJuF2ycQw0HuJw xenolinux-2.4.25-sparse/arch/xen/mm/Makefile +3e5a4e66l8Q5Tv-6B3lQIRmaVbFPzg xenolinux-2.4.25-sparse/arch/xen/mm/fault.c +3e5a4e668SE9rixq4ahho9rNhLUUFQ xenolinux-2.4.25-sparse/arch/xen/mm/hypervisor.c +3e5a4e661gLzzff25pJooKIIWe7IWg xenolinux-2.4.25-sparse/arch/xen/mm/init.c +3f0bed43UUdQichXAiVNrjV-y2Kzcg xenolinux-2.4.25-sparse/arch/xen/mm/ioremap.c +3e5a4e66qRlSTcjafidMB6ulECADvg xenolinux-2.4.25-sparse/arch/xen/vmlinux.lds 3e5a4e66mrtlmV75L1tjKDg8RaM5gA xenolinux-2.4.25-sparse/drivers/block/ll_rw_blk.c 3f108aeaLcGDgQdFAANLTUEid0a05w xenolinux-2.4.25-sparse/drivers/char/mem.c 3e5a4e66rw65CxyolW9PKz4GG42RcA xenolinux-2.4.25-sparse/drivers/char/tty_io.c 3e5a4e669uzIE54VwucPYtGwXLAbzA xenolinux-2.4.25-sparse/fs/exec.c -3e5a4e66wbeCpsJgVf_U8Jde-CNcsA xenolinux-2.4.25-sparse/include/asm-xeno/bugs.h -4048c0ddxnIa2GpBAVR-mY6mNSdeJg xenolinux-2.4.25-sparse/include/asm-xeno/control_if.h -3e5a4e66HdSkvIV6SJ1evG_xmTmXHA xenolinux-2.4.25-sparse/include/asm-xeno/desc.h -4048c0e0_P2wUTiT6UqgPhn0s7yFcA xenolinux-2.4.25-sparse/include/asm-xeno/evtchn.h -3e5a4e66SYp_UpAVcF8Lc1wa3Qtgzw xenolinux-2.4.25-sparse/include/asm-xeno/fixmap.h -3e5a4e67w_DWgjIJ17Tlossu1LGujQ xenolinux-2.4.25-sparse/include/asm-xeno/highmem.h -3e5a4e67YtcyDLQsShhCfQwPSELfvA xenolinux-2.4.25-sparse/include/asm-xeno/hw_irq.h -3e5a4e677VBavzM1UZIEcH1B-RlXMA xenolinux-2.4.25-sparse/include/asm-xeno/hypervisor.h -4060044fVx7-tokvNLKBf_6qBB4lqQ xenolinux-2.4.25-sparse/include/asm-xeno/io.h -3e5a4e673p7PEOyHFm3nHkYX6HQYBg xenolinux-2.4.25-sparse/include/asm-xeno/irq.h -3ead095db_LRUXnxaqs0dA1DWhPoQQ xenolinux-2.4.25-sparse/include/asm-xeno/keyboard.h -3e5a4e678ddsQOpbSiRdy1GRcDc9WA xenolinux-2.4.25-sparse/include/asm-xeno/mmu_context.h -3f8707e7ZmZ6TxyX0ZUEfvhA2Pb_xQ xenolinux-2.4.25-sparse/include/asm-xeno/msr.h -3e7270deQqtGPSnFxcW4AvJZuTUWfg xenolinux-2.4.25-sparse/include/asm-xeno/multicall.h -3e5a4e67mnQfh-R8KcQCaVo2Oho6yg xenolinux-2.4.25-sparse/include/asm-xeno/page.h -3e5a4e67uTYU5oEnIDjxuaez8njjqg xenolinux-2.4.25-sparse/include/asm-xeno/pgalloc.h -3e5a4e67X7JyupgdYkgDX19Huj2sAw xenolinux-2.4.25-sparse/include/asm-xeno/pgtable-2level.h -3e5a4e67gr4NLGtQ5CvSLimMYZlkOA xenolinux-2.4.25-sparse/include/asm-xeno/pgtable.h -3f108af1qNv8DVSGPv4zpqIU1txCkg xenolinux-2.4.25-sparse/include/asm-xeno/proc_cmd.h -3e5a4e676uK4xErTBDH6XJREn9LSyg xenolinux-2.4.25-sparse/include/asm-xeno/processor.h -3e5a4e67AJPjW-zL7p-xWuA6IVeH1g xenolinux-2.4.25-sparse/include/asm-xeno/ptrace.h -3e5a4e68uJz-xI0IBVMD7xRLQKJDFg xenolinux-2.4.25-sparse/include/asm-xeno/segment.h -3e5a4e68Nfdh6QcOKUTGCaYkf2LmYA xenolinux-2.4.25-sparse/include/asm-xeno/smp.h -3fa8e3f0kBLeE4To2vpdi3cpJbIkbQ xenolinux-2.4.25-sparse/include/asm-xeno/suspend.h -3e5a4e68mTr0zcp9SXDbnd-XLrrfxw xenolinux-2.4.25-sparse/include/asm-xeno/system.h -3f1056a9L_kqHcFheV00KbKBzv9j5w xenolinux-2.4.25-sparse/include/asm-xeno/vga.h -3f689063nhrIRsMMZjZxMFk7iEINqQ xenolinux-2.4.25-sparse/include/asm-xeno/xeno_proc.h +3e5a4e66wbeCpsJgVf_U8Jde-CNcsA xenolinux-2.4.25-sparse/include/asm-xen/bugs.h +4048c0ddxnIa2GpBAVR-mY6mNSdeJg xenolinux-2.4.25-sparse/include/asm-xen/control_if.h +3e5a4e66HdSkvIV6SJ1evG_xmTmXHA xenolinux-2.4.25-sparse/include/asm-xen/desc.h +4048c0e0_P2wUTiT6UqgPhn0s7yFcA xenolinux-2.4.25-sparse/include/asm-xen/evtchn.h +3e5a4e66SYp_UpAVcF8Lc1wa3Qtgzw xenolinux-2.4.25-sparse/include/asm-xen/fixmap.h +3e5a4e67w_DWgjIJ17Tlossu1LGujQ xenolinux-2.4.25-sparse/include/asm-xen/highmem.h +3e5a4e67YtcyDLQsShhCfQwPSELfvA xenolinux-2.4.25-sparse/include/asm-xen/hw_irq.h +3e5a4e677VBavzM1UZIEcH1B-RlXMA xenolinux-2.4.25-sparse/include/asm-xen/hypervisor.h +4060044fVx7-tokvNLKBf_6qBB4lqQ xenolinux-2.4.25-sparse/include/asm-xen/io.h +3e5a4e673p7PEOyHFm3nHkYX6HQYBg xenolinux-2.4.25-sparse/include/asm-xen/irq.h +3ead095db_LRUXnxaqs0dA1DWhPoQQ xenolinux-2.4.25-sparse/include/asm-xen/keyboard.h +3e5a4e678ddsQOpbSiRdy1GRcDc9WA xenolinux-2.4.25-sparse/include/asm-xen/mmu_context.h +3f8707e7ZmZ6TxyX0ZUEfvhA2Pb_xQ xenolinux-2.4.25-sparse/include/asm-xen/msr.h +3e7270deQqtGPSnFxcW4AvJZuTUWfg xenolinux-2.4.25-sparse/include/asm-xen/multicall.h +3e5a4e67mnQfh-R8KcQCaVo2Oho6yg xenolinux-2.4.25-sparse/include/asm-xen/page.h +3e5a4e67uTYU5oEnIDjxuaez8njjqg xenolinux-2.4.25-sparse/include/asm-xen/pgalloc.h +3e5a4e67X7JyupgdYkgDX19Huj2sAw xenolinux-2.4.25-sparse/include/asm-xen/pgtable-2level.h +3e5a4e67gr4NLGtQ5CvSLimMYZlkOA xenolinux-2.4.25-sparse/include/asm-xen/pgtable.h +3f108af1qNv8DVSGPv4zpqIU1txCkg xenolinux-2.4.25-sparse/include/asm-xen/proc_cmd.h +3e5a4e676uK4xErTBDH6XJREn9LSyg xenolinux-2.4.25-sparse/include/asm-xen/processor.h +3e5a4e67AJPjW-zL7p-xWuA6IVeH1g xenolinux-2.4.25-sparse/include/asm-xen/ptrace.h +3e5a4e68uJz-xI0IBVMD7xRLQKJDFg xenolinux-2.4.25-sparse/include/asm-xen/segment.h +3e5a4e68Nfdh6QcOKUTGCaYkf2LmYA xenolinux-2.4.25-sparse/include/asm-xen/smp.h +3fa8e3f0kBLeE4To2vpdi3cpJbIkbQ xenolinux-2.4.25-sparse/include/asm-xen/suspend.h +3e5a4e68mTr0zcp9SXDbnd-XLrrfxw xenolinux-2.4.25-sparse/include/asm-xen/system.h +3f1056a9L_kqHcFheV00KbKBzv9j5w xenolinux-2.4.25-sparse/include/asm-xen/vga.h +3f689063nhrIRsMMZjZxMFk7iEINqQ xenolinux-2.4.25-sparse/include/asm-xen/xeno_proc.h 3f056927gMHl7mWB89rb73JahbhQIA xenolinux-2.4.25-sparse/include/linux/blk.h 3e5a4e68WLX3B8owTvktP3HHOtznPQ xenolinux-2.4.25-sparse/include/linux/major.h 401c0590D_kwJDU59X8NyvqSv_Cl2A xenolinux-2.4.25-sparse/include/linux/sched.h diff --git a/xenolinux-2.4.25-sparse/Documentation/Configure.help b/xenolinux-2.4.25-sparse/Documentation/Configure.help index 84676592c8..8fff3cae45 100644 --- a/xenolinux-2.4.25-sparse/Documentation/Configure.help +++ b/xenolinux-2.4.25-sparse/Documentation/Configure.help @@ -597,10 +597,10 @@ CONFIG_BLK_DEV_NBD If unsure, say N. XenoLinux virtual block device support -CONFIG_XENOLINUX_BLOCK +CONFIG_XEN_VBD Xen can export virtual block devices which map back to extents of blocks on the physical partitions. This option is needed for - xenolinux to make use of such devices when running as a Xen guest. + Linux to make use of such devices when running as a Xen guest. If unsure, say Y. @@ -17383,11 +17383,11 @@ Acorn partition support CONFIG_ACORN_PARTITION Support hard disks partitioned under Acorn operating systems. -Xeno partition support -CONFIG_XENO_PARTITION - Support Xeno-style partitions on physical disks. The Xen +Xen virtual-partition support +CONFIG_XEN_VBD_PARTITION + Support partition-level virtual block devices. The Xen hypervisor can export partitions on a physical disk to clients, - but access to the partition table requires special hackery. + but access to the partition table requires special trickery. This will be used if this option is enabled; otherwise, it will be possible to access exported partitions by sector number but not with useful names e.g. /dev/hda4. diff --git a/xenolinux-2.4.25-sparse/Makefile b/xenolinux-2.4.25-sparse/Makefile index b6ccfb4d95..23c647e495 100644 --- a/xenolinux-2.4.25-sparse/Makefile +++ b/xenolinux-2.4.25-sparse/Makefile @@ -10,6 +10,7 @@ KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) # This will be overriden for Xen and UML builds. SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) ARCH ?= $(SUBARCH) +ARCH := $(ARCH:xeno=xen) ## Temporary hack while users adjust to new archname KERNELPATH=kernel-$(shell echo $(KERNELRELEASE) | sed -e "s/-//g") diff --git a/xenolinux-2.4.25-sparse/arch/xen/Makefile b/xenolinux-2.4.25-sparse/arch/xen/Makefile new file mode 100644 index 0000000000..04117a9e4b --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/Makefile @@ -0,0 +1,120 @@ +# +# xen/Makefile +# +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. Remember to do have actions +# for "archclean" and "archdep" for cleaning up and making dependencies for +# this architecture +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1994 by Linus Torvalds +# +# 19990713 Artur Skawina +# Added '-march' and '-mpreferred-stack-boundary' support +# + +override EXTRAVERSION := -xen$(EXTRAVERSION) + +LD=$(CROSS_COMPILE)ld -m elf_i386 +OBJCOPY=$(CROSS_COMPILE)objcopy -O binary -R .note -R .comment -S +LDFLAGS=-e stext +LINKFLAGS =-T $(TOPDIR)/arch/xen/vmlinux.lds $(LDFLAGS) + +CFLAGS += -pipe + +check_gcc = $(shell if $(CC) $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi) + +# prevent gcc from keeping the stack 16 byte aligned +CFLAGS += $(call check_gcc,-mpreferred-stack-boundary=2,) + +ifdef CONFIG_M686 +CFLAGS += -march=i686 +endif + +ifdef CONFIG_MPENTIUMIII +CFLAGS += -march=i686 +endif + +ifdef CONFIG_MPENTIUM4 +CFLAGS += -march=i686 +endif + +ifdef CONFIG_MK7 +CFLAGS += $(call check_gcc,-march=athlon,-march=i686 -malign-functions=4) +endif + +HEAD := arch/xen/kernel/head.o arch/xen/kernel/init_task.o + +SUBDIRS += arch/xen/kernel arch/xen/mm arch/xen/lib +SUBDIRS += arch/xen/drivers/console arch/xen/drivers/network +SUBDIRS += arch/xen/drivers/evtchn arch/xen/drivers/block +SUBDIRS += arch/xen/drivers/balloon arch/xen/drivers/vnetif +ifdef CONFIG_XEN_PRIVILEGED_GUEST +SUBDIRS += arch/xen/drivers/dom0 +endif + +CORE_FILES += arch/xen/kernel/kernel.o arch/xen/mm/mm.o +CORE_FILES += arch/xen/drivers/evtchn/drv.o +CORE_FILES += arch/xen/drivers/console/drv.o +CORE_FILES += arch/xen/drivers/block/drv.o +CORE_FILES += arch/xen/drivers/network/drv.o +CORE_FILES += arch/xen/drivers/vnetif/drv.o +ifdef CONFIG_XEN_PRIVILEGED_GUEST +CORE_FILES += arch/xen/drivers/dom0/drv.o +endif +CORE_FILES += arch/xen/drivers/balloon/drv.o +LIBS := $(TOPDIR)/arch/xen/lib/lib.a $(LIBS) $(TOPDIR)/arch/xen/lib/lib.a + +arch/xen/kernel: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xen/kernel + +arch/xen/mm: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xen/mm + +arch/xen/drivers/console: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/console + +arch/xen/drivers/network: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/network + +arch/xen/drivers/block: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/block + +arch/xen/drivers/dom0: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/dom0 + +arch/xen/drivers/balloon: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xen/drivers/balloon + +MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot + +vmlinux: arch/xen/vmlinux.lds + +FORCE: ; + +.PHONY: bzImage compressed clean archclean archmrproper archdep + +bzImage: vmlinux + @$(MAKEBOOT) xenolinux.gz + +install: bzImage + mkdir -p $(prefix)/boot + install -m0644 arch/$(ARCH)/boot/xenolinux.gz $(prefix)/boot/xenolinux.gz + +dist: bzImage + mkdir -p ../install/boot + install -m0644 arch/$(ARCH)/boot/xenolinux.gz ../install/boot/xenolinux.gz + +archclean: + @$(MAKEBOOT) clean + +archmrproper: + rm -f include/asm-xen/hypervisor-ifs/arch + +archdep: + rm -f include/asm-xen/hypervisor-ifs/arch + ( cd include/asm-xen/hypervisor-ifs ; rm -rf arch ; ln -sf arch-$(SUBARCH) arch) + @$(MAKEBOOT) dep diff --git a/xenolinux-2.4.25-sparse/arch/xen/boot/Makefile b/xenolinux-2.4.25-sparse/arch/xen/boot/Makefile new file mode 100644 index 0000000000..01fb131959 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/boot/Makefile @@ -0,0 +1,22 @@ +# +# arch/xen/boot/Makefile +# + +xenolinux.gz: xenolinux + gzip -f -9 < $< > $@ + +xenolinux: $(TOPDIR)/vmlinux + # Guest OS header -- first 8 bytes are identifier 'XenGuest'. + echo -e -n 'XenGuest' >$@ + # Guest OS header -- next 4 bytes are load address (0xC0000000). + echo -e -n '\000\000\000\300' >>$@ + $(OBJCOPY) $< xenolinux.body + # Guest OS header is immediately followed by raw OS image. + # Start address must be at byte 0. + cat xenolinux.body >>$@ + rm -f xenolinux.body + +dep: + +clean: + rm -f xenolinux xenolinux.gz \ No newline at end of file diff --git a/xenolinux-2.4.25-sparse/arch/xen/config.in b/xenolinux-2.4.25-sparse/arch/xen/config.in new file mode 100644 index 0000000000..ebfc77aea9 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/config.in @@ -0,0 +1,190 @@ +# +# For a description of the syntax of this configuration file, +# see Documentation/kbuild/config-language.txt. +# +mainmenu_name "Linux Kernel Configuration" + +define_bool CONFIG_XEN y + +define_bool CONFIG_X86 y +define_bool CONFIG_ISA y +define_bool CONFIG_SBUS n + +define_bool CONFIG_UID16 y + +mainmenu_option next_comment +comment 'Xenolinux' +bool 'Support for privileged operations (domain 0)' CONFIG_XEN_PRIVILEGED_GUEST +endmenu +# The IBM S/390 patch needs this. +define_bool CONFIG_NO_IDLE_HZ y + +mainmenu_option next_comment +comment 'Code maturity level options' +bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL +endmenu + +mainmenu_option next_comment +comment 'Loadable module support' +bool 'Enable loadable module support' CONFIG_MODULES +if [ "$CONFIG_MODULES" = "y" ]; then + bool ' Set version information on all module symbols' CONFIG_MODVERSIONS + bool ' Kernel module loader' CONFIG_KMOD +fi +endmenu + +mainmenu_option next_comment +comment 'Processor type and features' +choice 'Processor family' \ + "Pentium-Pro/Celeron/Pentium-II CONFIG_M686 \ + Pentium-III/Celeron(Coppermine) CONFIG_MPENTIUMIII \ + Pentium-4 CONFIG_MPENTIUM4 \ + Athlon/Duron/K7 CONFIG_MK7 \ + Opteron/Athlon64/Hammer/K8 CONFIG_MK8 \ + VIA-C3-2 CONFIG_MVIAC3_2" Pentium-Pro + + define_bool CONFIG_X86_WP_WORKS_OK y + define_bool CONFIG_X86_INVLPG y + define_bool CONFIG_X86_CMPXCHG y + define_bool CONFIG_X86_XADD y + define_bool CONFIG_X86_BSWAP y + define_bool CONFIG_X86_POPAD_OK y + define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n + define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y + + define_bool CONFIG_X86_GOOD_APIC y + define_bool CONFIG_X86_PGE y + define_bool CONFIG_X86_USE_PPRO_CHECKSUM y + define_bool CONFIG_X86_TSC y + +if [ "$CONFIG_M686" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 +fi +if [ "$CONFIG_MPENTIUMIII" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 +fi +if [ "$CONFIG_MPENTIUM4" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 7 +fi +if [ "$CONFIG_MK8" = "y" ]; then + define_bool CONFIG_MK7 y +fi +if [ "$CONFIG_MK7" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 6 + define_bool CONFIG_X86_USE_3DNOW y +fi +if [ "$CONFIG_MVIAC3_2" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 +fi + +if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + tristate 'BIOS Enhanced Disk Drive calls determine boot disk (EXPERIMENTAL)' CONFIG_EDD +fi + +choice 'High Memory Support' \ + "off CONFIG_NOHIGHMEM \ + 4GB CONFIG_HIGHMEM4G \ + 64GB CONFIG_HIGHMEM64G" off +if [ "$CONFIG_HIGHMEM4G" = "y" ]; then + define_bool CONFIG_HIGHMEM y +fi +if [ "$CONFIG_HIGHMEM64G" = "y" ]; then + define_bool CONFIG_HIGHMEM y + define_bool CONFIG_X86_PAE y +fi + +#bool 'Symmetric multi-processing support' CONFIG_SMP +#if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then +# define_bool CONFIG_HAVE_DEC_LOCK y +#fi +endmenu + +mainmenu_option next_comment +comment 'General setup' + +bool 'Networking support' CONFIG_NET + +bool 'PCI support' CONFIG_PCI +if [ "$CONFIG_PCI" = "y" ]; then + tristate ' 3c590/3c900 series (592/595/597) "Vortex/Boomerang" support' CONFIG_VORTEX + tristate 'Intel(R) PRO/1000 Gigabit Ethernet support' CONFIG_E1000 + if [ "$CONFIG_E1000" != "n" ]; then + bool ' Use Rx Polling (NAPI)' CONFIG_E1000_NAPI + fi +fi +source drivers/pci/Config.in + +bool 'System V IPC' CONFIG_SYSVIPC +bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT +bool 'Sysctl support' CONFIG_SYSCTL +if [ "$CONFIG_PROC_FS" = "y" ]; then + choice 'Kernel core (/proc/kcore) format' \ + "ELF CONFIG_KCORE_ELF \ + A.OUT CONFIG_KCORE_AOUT" ELF +fi +tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT +tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC +bool 'Select task to kill on out of memory condition' CONFIG_OOM_KILLER + +endmenu + +if [ "$CONFIG_NET" = "y" ]; then + source net/Config.in +fi + + +# +# Block device driver configuration +# +mainmenu_option next_comment +comment 'Block devices' +tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP +dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET +tristate 'RAM disk support' CONFIG_BLK_DEV_RAM +if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then + int ' Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096 +fi +dep_bool ' Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM +bool 'Per partition statistics in /proc/partitions' CONFIG_BLK_STATS +bool 'XenoLinux virtual block device support' CONFIG_XEN_VBD +#endmenu +define_bool CONFIG_BLK_DEV_HD n +endmenu + +source drivers/char/Config.in + +source fs/Config.in + +mainmenu_option next_comment +comment 'Console drivers' + +bool 'Xen console support' CONFIG_XEN_CONSOLE + +if [ "$CONFIG_VT" = "y" ]; then + bool 'VGA text console' CONFIG_VGA_CONSOLE + bool 'Dummy console' CONFIG_DUMMY_CONSOLE +fi +endmenu + +mainmenu_option next_comment +comment 'Kernel hacking' + +bool 'Kernel debugging' CONFIG_DEBUG_KERNEL +if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; then + bool ' Debug high memory support' CONFIG_DEBUG_HIGHMEM + bool ' Debug memory allocations' CONFIG_DEBUG_SLAB + bool ' Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT + bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ + bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK + bool ' Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE + bool ' Load all symbols for debugging' CONFIG_KALLSYMS + bool ' Compile the kernel with frame pointers' CONFIG_FRAME_POINTER +fi + +int 'Kernel messages buffer length shift (0 = default)' CONFIG_LOG_BUF_SHIFT 0 + +endmenu + +source crypto/Config.in +source lib/Config.in diff --git a/xenolinux-2.4.25-sparse/arch/xen/defconfig b/xenolinux-2.4.25-sparse/arch/xen/defconfig new file mode 100644 index 0000000000..7b1d1b1ec8 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/defconfig @@ -0,0 +1,457 @@ +# +# Automatically generated make config: don't edit +# +CONFIG_XEN=y +CONFIG_X86=y +CONFIG_ISA=y +# CONFIG_SBUS is not set +CONFIG_UID16=y + +# +# Xenolinux options +# +# support for privileged domains +CONFIG_XEN_PRIVILEGED_GUEST=y +# on-demand timer setting (taken from s390 patch set) +CONFIG_NO_IDLE_HZ=y + +# +# Code maturity level options +# +# CONFIG_EXPERIMENTAL is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y +CONFIG_KMOD=y + +# +# Processor type and features +# +CONFIG_M686=y +# CONFIG_MPENTIUMIII is not set +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK7 is not set +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_CMPXCHG=y +CONFIG_X86_XADD=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +# CONFIG_RWSEM_GENERIC_SPINLOCK is not set +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_X86_GOOD_APIC=y +CONFIG_X86_PGE=y +CONFIG_X86_USE_PPRO_CHECKSUM=y +CONFIG_X86_TSC=y +CONFIG_X86_L1_CACHE_SHIFT=5 +CONFIG_NOHIGHMEM=y +# CONFIG_HIGHMEM4G is not set +# CONFIG_HIGHMEM64G is not set + +# +# General setup +# +CONFIG_NET=y +# CONFIG_PCI is not set +# CONFIG_PCI_NAMES is not set +CONFIG_SYSVIPC=y +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +CONFIG_KCORE_ELF=y +# CONFIG_KCORE_AOUT is not set +CONFIG_BINFMT_AOUT=y +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +# CONFIG_OOM_KILLER is not set + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +# CONFIG_NETLINK_DEV is not set +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_FILTER=y +CONFIG_UNIX=y +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +# CONFIG_IP_PNP_BOOTP is not set +# CONFIG_IP_PNP_RARP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_INET_ECN is not set +# CONFIG_SYN_COOKIES is not set + +# +# IP: Netfilter Configuration +# +CONFIG_IP_NF_CONNTRACK=y +CONFIG_IP_NF_FTP=y +# CONFIG_IP_NF_AMANDA is not set +CONFIG_IP_NF_TFTP=y +CONFIG_IP_NF_IRC=y +CONFIG_IP_NF_IPTABLES=y +# CONFIG_IP_NF_MATCH_LIMIT is not set +# CONFIG_IP_NF_MATCH_MAC is not set +# CONFIG_IP_NF_MATCH_PKTTYPE is not set +# CONFIG_IP_NF_MATCH_MARK is not set +# CONFIG_IP_NF_MATCH_MULTIPORT is not set +# CONFIG_IP_NF_MATCH_TOS is not set +# CONFIG_IP_NF_MATCH_RECENT is not set +# CONFIG_IP_NF_MATCH_ECN is not set +# CONFIG_IP_NF_MATCH_DSCP is not set +# CONFIG_IP_NF_MATCH_AH_ESP is not set +# CONFIG_IP_NF_MATCH_LENGTH is not set +# CONFIG_IP_NF_MATCH_TTL is not set +# CONFIG_IP_NF_MATCH_TCPMSS is not set +# CONFIG_IP_NF_MATCH_HELPER is not set +CONFIG_IP_NF_MATCH_STATE=y +CONFIG_IP_NF_MATCH_CONNTRACK=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_NAT=y +CONFIG_IP_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_REDIRECT=y +# CONFIG_IP_NF_NAT_LOCAL is not set +CONFIG_IP_NF_NAT_IRC=y +CONFIG_IP_NF_NAT_FTP=y +CONFIG_IP_NF_NAT_TFTP=y +# CONFIG_IP_NF_MANGLE is not set +CONFIG_IP_NF_TARGET_LOG=y +CONFIG_IP_NF_TARGET_ULOG=y +# CONFIG_IP_NF_TARGET_TCPMSS is not set +# CONFIG_IP_NF_ARPTABLES is not set + +# +# IP: Virtual Server Configuration +# +# CONFIG_IP_VS is not set +# CONFIG_VLAN_8021Q is not set + +# +# +# +# CONFIG_IPX is not set +# CONFIG_ATALK is not set + +# +# Appletalk devices +# +# CONFIG_DEV_APPLETALK is not set +# CONFIG_DECNET is not set +# CONFIG_BRIDGE is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set + +# +# Block devices +# +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_NBD=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_INITRD=y +# CONFIG_BLK_STATS is not set +CONFIG_XEN_VBD=y +# CONFIG_BLK_DEV_HD is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +# CONFIG_SERIAL is not set +# CONFIG_SERIAL_EXTENDED is not set +# CONFIG_SERIAL_NONSTANDARD is not set +CONFIG_UNIX98_PTYS=y +CONFIG_UNIX98_PTY_COUNT=256 +# CONFIG_PRINTER is not set +# CONFIG_PPDEV is not set +# CONFIG_TIPAR is not set + +# +# I2C support +# +# CONFIG_I2C is not set + +# +# Mice +# +# CONFIG_BUSMOUSE is not set +CONFIG_MOUSE=y +CONFIG_PSMOUSE=y +# CONFIG_82C710_MOUSE is not set +# CONFIG_PC110_PAD is not set +# CONFIG_MK712_MOUSE is not set + +# +# Joysticks +# +# CONFIG_INPUT_GAMEPORT is not set +# CONFIG_INPUT_NS558 is not set +# CONFIG_INPUT_LIGHTNING is not set +# CONFIG_INPUT_PCIGAME is not set +# CONFIG_INPUT_CS461X is not set +# CONFIG_INPUT_EMU10K1 is not set +# CONFIG_INPUT_SERIO is not set +# CONFIG_INPUT_SERPORT is not set + +# +# Joysticks +# +# CONFIG_INPUT_ANALOG is not set +# CONFIG_INPUT_A3D is not set +# CONFIG_INPUT_ADI is not set +# CONFIG_INPUT_COBRA is not set +# CONFIG_INPUT_GF2K is not set +# CONFIG_INPUT_GRIP is not set +# CONFIG_INPUT_INTERACT is not set +# CONFIG_INPUT_TMDC is not set +# CONFIG_INPUT_SIDEWINDER is not set +# CONFIG_INPUT_IFORCE_USB is not set +# CONFIG_INPUT_IFORCE_232 is not set +# CONFIG_INPUT_WARRIOR is not set +# CONFIG_INPUT_MAGELLAN is not set +# CONFIG_INPUT_SPACEORB is not set +# CONFIG_INPUT_SPACEBALL is not set +# CONFIG_INPUT_STINGER is not set +# CONFIG_INPUT_DB9 is not set +# CONFIG_INPUT_GAMECON is not set +# CONFIG_INPUT_TURBOGRAFX is not set +# CONFIG_QIC02_TAPE is not set +# CONFIG_IPMI_HANDLER is not set +# CONFIG_IPMI_PANIC_EVENT is not set +# CONFIG_IPMI_DEVICE_INTERFACE is not set +# CONFIG_IPMI_KCS is not set +# CONFIG_IPMI_WATCHDOG is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_SCx200 is not set +# CONFIG_SCx200_GPIO is not set +# CONFIG_AMD_RNG is not set +# CONFIG_INTEL_RNG is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_AMD_PM768 is not set +# CONFIG_NVRAM is not set +# CONFIG_RTC is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_FTAPE is not set +# CONFIG_AGP is not set + +# +# Direct Rendering Manager (XFree86 DRI support) +# +# CONFIG_DRM is not set +# CONFIG_MWAVE is not set +# CONFIG_OBMOUSE is not set + +# +# File systems +# +# CONFIG_QUOTA is not set +# CONFIG_QFMT_V2 is not set +CONFIG_AUTOFS_FS=y +CONFIG_AUTOFS4_FS=y +# CONFIG_REISERFS_FS is not set +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +# CONFIG_ADFS_FS is not set +# CONFIG_ADFS_FS_RW is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BEFS_DEBUG is not set +# CONFIG_BFS_FS is not set +CONFIG_EXT3_FS=y +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FAT_FS=y +CONFIG_MSDOS_FS=y +CONFIG_UMSDOS_FS=y +CONFIG_VFAT_FS=y +# CONFIG_EFS_FS is not set +# CONFIG_JFFS_FS is not set +# CONFIG_JFFS2_FS is not set +# CONFIG_CRAMFS is not set +CONFIG_TMPFS=y +CONFIG_RAMFS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +# CONFIG_JFS_FS is not set +# CONFIG_JFS_DEBUG is not set +# CONFIG_JFS_STATISTICS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_NTFS_FS is not set +# CONFIG_NTFS_RW is not set +# CONFIG_HPFS_FS is not set +CONFIG_PROC_FS=y +# CONFIG_DEVFS_FS is not set +# CONFIG_DEVFS_MOUNT is not set +# CONFIG_DEVFS_DEBUG is not set +CONFIG_DEVPTS_FS=y +# CONFIG_QNX4FS_FS is not set +# CONFIG_QNX4FS_RW is not set +# CONFIG_ROMFS_FS is not set +CONFIG_EXT2_FS=y +# CONFIG_SYSV_FS is not set +# CONFIG_UDF_FS is not set +# CONFIG_UDF_RW is not set +# CONFIG_UFS_FS is not set +# CONFIG_UFS_FS_WRITE is not set +# CONFIG_XFS_FS is not set +# CONFIG_XFS_QUOTA is not set +# CONFIG_XFS_RT is not set +# CONFIG_XFS_TRACE is not set +# CONFIG_XFS_DEBUG is not set + +# +# Network File Systems +# +# CONFIG_CODA_FS is not set +# CONFIG_INTERMEZZO_FS is not set +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +# CONFIG_NFS_DIRECTIO is not set +CONFIG_ROOT_NFS=y +CONFIG_NFSD=y +CONFIG_NFSD_V3=y +# CONFIG_NFSD_TCP is not set +CONFIG_SUNRPC=y +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +# CONFIG_SMB_FS is not set +# CONFIG_NCP_FS is not set +# CONFIG_NCPFS_PACKET_SIGNING is not set +# CONFIG_NCPFS_IOCTL_LOCKING is not set +# CONFIG_NCPFS_STRONG is not set +# CONFIG_NCPFS_NFS_NS is not set +# CONFIG_NCPFS_OS2_NS is not set +# CONFIG_NCPFS_SMALLDOS is not set +# CONFIG_NCPFS_NLS is not set +# CONFIG_NCPFS_EXTRAS is not set +CONFIG_ZISOFS_FS=y + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +# CONFIG_MAC_PARTITION is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +# CONFIG_SGI_PARTITION is not set +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +# CONFIG_EFI_PARTITION is not set +# CONFIG_SMB_NLS is not set +CONFIG_NLS=y + +# +# Native Language Support +# +CONFIG_NLS_DEFAULT="iso8559-1" +# CONFIG_NLS_CODEPAGE_437 is not set +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +CONFIG_NLS_ISO8859_1=y +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_UTF8 is not set + +# +# Console drivers +# +CONFIG_XEN_CONSOLE=y +CONFIG_VGA_CONSOLE=y +CONFIG_DUMMY_CONSOLE=y + +# +# Kernel hacking +# +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_HIGHMEM is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_IOVIRT is not set +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_BUGVERBOSE is not set +CONFIG_KALLSYMS=y +# CONFIG_FRAME_POINTER is not set +CONFIG_LOG_BUF_SHIFT=0 + +# +# Cryptographic options +# +# CONFIG_CRYPTO is not set + +# +# Library routines +# +# CONFIG_CRC32 is not set +CONFIG_ZLIB_INFLATE=y +# CONFIG_ZLIB_DEFLATE is not set diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/balloon/Makefile b/xenolinux-2.4.25-sparse/arch/xen/drivers/balloon/Makefile new file mode 100644 index 0000000000..9fb2227978 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/balloon/Makefile @@ -0,0 +1,3 @@ +O_TARGET := drv.o +obj-y := balloon.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/balloon/balloon.c b/xenolinux-2.4.25-sparse/arch/xen/drivers/balloon/balloon.c new file mode 100644 index 0000000000..aee0e4cf99 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/balloon/balloon.c @@ -0,0 +1,282 @@ +/****************************************************************************** + * balloon.c + * + * Xen balloon driver - enables returning/claiming memory to/from Xen. + * + * Copyright (c) 2003, B Dragovic + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +/* USER DEFINES -- THESE SHOULD BE COPIED TO USER-SPACE TOOLS */ +#define USER_INFLATE_BALLOON 1 /* return mem to hypervisor */ +#define USER_DEFLATE_BALLOON 2 /* claim mem from hypervisor */ +typedef struct user_balloon_op { + unsigned int op; + unsigned long size; +} user_balloon_op_t; +/* END OF USER DEFINE */ + +/* Dead entry written into ballon-owned entries in the PMT. */ +#define DEAD 0xdeadbeef + +static struct proc_dir_entry *balloon_pde; +unsigned long credit; + +static inline pte_t *get_ptep(unsigned long addr) +{ + pgd_t *pgd; pmd_t *pmd; pte_t *ptep; + pgd = pgd_offset_k(addr); + + if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG(); + + pmd = pmd_offset(pgd, addr); + if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG(); + + ptep = pte_offset(pmd, addr); + + return ptep; +} + +/* main function for relinquishing bit of memory */ +static unsigned long inflate_balloon(unsigned long num_pages) +{ + dom_mem_op_t dom_mem_op; + unsigned long *parray; + unsigned long *currp; + unsigned long curraddr; + unsigned long ret = 0; + unsigned long vaddr; + unsigned long i, j; + + parray = (unsigned long *)kmalloc(num_pages * + sizeof(unsigned long), GFP_KERNEL); + currp = parray; + + for ( i = 0; i < num_pages; i++ ) + { + /* try to obtain a free page, has to be done with GFP_ATOMIC + * as we do not want to sleep indefinately. + */ + vaddr = __get_free_page(GFP_ATOMIC); + + /* if allocation fails, free all reserved pages */ + if(!vaddr){ + printk("Unable to inflate balloon by %ld, only %ld pages free.", + num_pages, i); + currp = parray; + for(j = 0; j < i; j++){ + free_page(*currp++); + } + goto cleanup; + } + + *currp++ = vaddr; + } + + + currp = parray; + for ( i = 0; i < num_pages; i++ ) + { + curraddr = *currp; + *currp = virt_to_machine(*currp) >> PAGE_SHIFT; + queue_l1_entry_update(get_ptep(curraddr), 0); + phys_to_machine_mapping[__pa(curraddr) >> PAGE_SHIFT] = DEAD; + currp++; + } + + XEN_flush_page_update_queue(); + + dom_mem_op.op = MEMOP_RESERVATION_DECREASE; + dom_mem_op.u.decrease.size = num_pages; + dom_mem_op.u.decrease.pages = parray; + if ( (ret = HYPERVISOR_dom_mem_op(&dom_mem_op)) != num_pages ) + { + printk("Unable to inflate balloon, error %lx\n", ret); + goto cleanup; + } + + credit += num_pages; + ret = num_pages; + + cleanup: + kfree(parray); + + return ret; +} + +/* install new mem pages obtained by deflate_balloon. function walks + * phys->machine mapping table looking for DEAD entries and populates + * them. + */ +static unsigned long process_new_pages(unsigned long * parray, + unsigned long num) +{ + /* currently, this function is rather simplistic as + * it is assumed that domain reclaims only number of + * pages previously released. this is to change soon + * and the code to extend page tables etc. will be + * incorporated here. + */ + + unsigned long tot_pages = start_info.nr_pages; + unsigned long * curr = parray; + unsigned long num_installed; + unsigned long i; + + num_installed = 0; + for ( i = 0; (i < tot_pages) && (num_installed < num); i++ ) + { + if ( phys_to_machine_mapping[i] == DEAD ) + { + phys_to_machine_mapping[i] = *curr; + queue_l1_entry_update( + (pte_t *)((i << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE), i); + queue_l1_entry_update( + get_ptep((unsigned long)__va(i << PAGE_SHIFT)), + ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL)); + + *curr = (unsigned long)__va(i << PAGE_SHIFT); + curr++; + num_installed++; + } + } + + /* now, this is tricky (and will also change for machine addrs that + * are mapped to not previously released addresses). we free pages + * that were allocated by get_free_page (the mappings are different + * now, of course). + */ + curr = parray; + for ( i = 0; i < num_installed; i++ ) + { + free_page(*curr); + curr++; + } + + return num_installed; +} + +unsigned long deflate_balloon(unsigned long num_pages) +{ + dom_mem_op_t dom_mem_op; + unsigned long ret; + unsigned long * parray; + + printk(KERN_ALERT "bd240 debug: deflate balloon called for %lx pages\n", num_pages); + + if ( num_pages > credit ) + { + printk("Can not allocate more pages than previously released.\n"); + return -EAGAIN; + } + + parray = (unsigned long *)kmalloc(num_pages * sizeof(unsigned long), + GFP_KERNEL); + + dom_mem_op.op = MEMOP_RESERVATION_INCREASE; + dom_mem_op.u.increase.size = num_pages; + dom_mem_op.u.increase.pages = parray; + if((ret = HYPERVISOR_dom_mem_op(&dom_mem_op)) != num_pages){ + printk("Unable to deflate balloon, error %lx\n", ret); + goto cleanup; + } + + if((ret = process_new_pages(parray, num_pages)) < num_pages){ + printk("Unable to deflate balloon by specified %lx pages, only %lx.\n", + num_pages, ret); + goto cleanup; + } + + ret = num_pages; + credit -= num_pages; + + cleanup: + kfree(parray); + + return ret; +} + +static int balloon_write(struct file *file, const char *buffer, + u_long count, void *data) +{ + user_balloon_op_t bop; + + /* Only admin can play with the balloon :) */ + if ( !capable(CAP_SYS_ADMIN) ) + return -EPERM; + + if ( copy_from_user(&bop, buffer, sizeof(bop)) ) + return -EFAULT; + + switch ( bop.op ) + { + case USER_INFLATE_BALLOON: + if ( inflate_balloon(bop.size) < bop.size ) + return -EAGAIN; + break; + + case USER_DEFLATE_BALLOON: + deflate_balloon(bop.size); + break; + + default: + printk("Unknown command to balloon driver."); + return -EFAULT; + } + + return sizeof(bop); +} + +/* + * main balloon driver initialization function. + */ +static int __init init_module(void) +{ + printk(KERN_ALERT "Starting Xen Balloon driver\n"); + + credit = 0; + + balloon_pde = create_xen_proc_entry("balloon", 0600); + if ( balloon_pde == NULL ) + { + printk(KERN_ALERT "Unable to create balloon driver proc entry!"); + return -1; + } + + balloon_pde->write_proc = balloon_write; + + return 0; +} + +static void __exit cleanup_module(void) +{ + if ( balloon_pde != NULL ) + { + remove_xen_proc_entry("balloon"); + balloon_pde = NULL; + } +} + +module_init(init_module); +module_exit(cleanup_module); + + diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/block/Makefile b/xenolinux-2.4.25-sparse/arch/xen/drivers/block/Makefile new file mode 100644 index 0000000000..35986ca54a --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/block/Makefile @@ -0,0 +1,3 @@ +O_TARGET := drv.o +obj-y := block.o vbd.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/block/block.c b/xenolinux-2.4.25-sparse/arch/xen/drivers/block/block.c new file mode 100644 index 0000000000..1297fe1a03 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/block/block.c @@ -0,0 +1,621 @@ +/****************************************************************************** + * block.c + * + * Xenolinux virtual block-device driver. + * + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge + */ + +#include "block.h" +#include +#include +#include +#include +#include + +#include + +typedef unsigned char byte; /* from linux/ide.h */ + +#define XLBLK_RESPONSE_IRQ HYPEREVENT_IRQ(_EVENT_BLKDEV) +#define XLBLK_UPDATE_IRQ HYPEREVENT_IRQ(_EVENT_VBD_UPD) +#define DEBUG_IRQ HYPEREVENT_IRQ(_EVENT_DEBUG) + +#define STATE_ACTIVE 0 +#define STATE_SUSPENDED 1 +#define STATE_CLOSED 2 +static unsigned int state = STATE_SUSPENDED; + +static blk_ring_t *blk_ring; +static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */ +static BLK_RING_IDX req_prod; /* Private request producer. */ + +/* We plug the I/O ring if the driver is suspended or if the ring is full. */ +#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \ + (state != STATE_ACTIVE)) + + +/* + * Request queues with outstanding work, but ring is currently full. + * We need no special lock here, as we always access this with the + * io_request_lock held. We only need a small maximum list. + */ +#define MAX_PENDING 8 +static request_queue_t *pending_queues[MAX_PENDING]; +static int nr_pending; + +static kdev_t sg_dev; +static int sg_operation = -1; +static unsigned long sg_next_sect; +#define DISABLE_SCATTERGATHER() (sg_operation = -1) + +static inline void signal_requests_to_xen(void) +{ + block_io_op_t op; + + DISABLE_SCATTERGATHER(); + blk_ring->req_prod = req_prod; + + op.cmd = BLOCK_IO_OP_SIGNAL; + HYPERVISOR_block_io_op(&op); + return; +} + + +/* + * xlblk_update_int/update-vbds_task - handle VBD update events from Xen + * + * Schedule a task for keventd to run, which will update the VBDs and perform + * the corresponding updates to our view of VBD state, so the XenoLinux will + * respond to changes / additions / deletions to the set of VBDs automatically. + */ +static struct tq_struct update_tq; +static void update_vbds_task(void *unused) +{ + xlvbd_update_vbds(); +} +static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + update_tq.routine = update_vbds_task; + schedule_task(&update_tq); +} + + +int xen_block_open(struct inode *inode, struct file *filep) +{ + short xldev = inode->i_rdev; + struct gendisk *gd = get_gendisk(xldev); + xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); + short minor = MINOR(xldev); + + if ( gd->part[minor].nr_sects == 0 ) + { + /* + * Device either doesn't exist, or has zero capacity; we use a few + * cheesy heuristics to return the relevant error code + */ + if ( (gd->sizes[minor >> gd->minor_shift] != 0) || + ((minor & (gd->max_p - 1)) != 0) ) + { + /* + * We have a real device, but no such partition, or we just have a + * partition number so guess this is the problem. + */ + return -ENXIO; /* no such device or address */ + } + else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE ) + { + /* This is a removable device => assume that media is missing. */ + return -ENOMEDIUM; /* media not present (this is a guess) */ + } + else + { + /* Just go for the general 'no such device' error. */ + return -ENODEV; /* no such device */ + } + } + + /* Update of usage count is protected by per-device semaphore. */ + disk->usage++; + + return 0; +} + + +int xen_block_release(struct inode *inode, struct file *filep) +{ + xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); + + /* + * When usage drops to zero it may allow more VBD updates to occur. + * Update of usage count is protected by a per-device semaphore. + */ + if ( --disk->usage == 0 ) + { + update_tq.routine = update_vbds_task; + schedule_task(&update_tq); + } + + return 0; +} + + +int xen_block_ioctl(struct inode *inode, struct file *filep, + unsigned command, unsigned long argument) +{ + kdev_t dev = inode->i_rdev; + struct hd_geometry *geo = (struct hd_geometry *)argument; + struct gendisk *gd; + struct hd_struct *part; + int i; + + /* NB. No need to check permissions. That is done for us. */ + + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", + command, (long) argument, dev); + + gd = get_gendisk(dev); + part = &gd->part[MINOR(dev)]; + + switch ( command ) + { + case BLKGETSIZE: + DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); + return put_user(part->nr_sects, (unsigned long *) argument); + + case BLKGETSIZE64: + DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64, + (u64)part->nr_sects * 512); + return put_user((u64)part->nr_sects * 512, (u64 *) argument); + + case BLKRRPART: /* re-read partition table */ + DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); + return xen_block_revalidate(dev); + + case BLKSSZGET: + return hardsect_size[MAJOR(dev)][MINOR(dev)]; + + case BLKBSZGET: /* get block size */ + DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET); + break; + + case BLKBSZSET: /* set block size */ + DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET); + break; + + case BLKRASET: /* set read-ahead */ + DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET); + break; + + case BLKRAGET: /* get read-ahead */ + DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET); + break; + + case HDIO_GETGEO: + /* note: these values are complete garbage */ + DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO); + if (!argument) return -EINVAL; + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT; + return 0; + + case HDIO_GETGEO_BIG: + /* note: these values are complete garbage */ + DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); + if (!argument) return -EINVAL; + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT; + return 0; + + case CDROMMULTISESSION: + DPRINTK("FIXME: support multisession CDs later\n"); + for ( i = 0; i < sizeof(struct cdrom_multisession); i++ ) + if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT; + return 0; + + case SCSI_IOCTL_GET_BUS_NUMBER: + DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in Xen blkdev"); + return -ENOSYS; + + default: + printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", command); + return -ENOSYS; + } + + return 0; +} + +/* check media change: should probably do something here in some cases :-) */ +int xen_block_check(kdev_t dev) +{ + DPRINTK("xen_block_check\n"); + return 0; +} + +int xen_block_revalidate(kdev_t dev) +{ + struct block_device *bd; + struct gendisk *gd; + xl_disk_t *disk; + unsigned long capacity; + int i, rc = 0; + + if ( (bd = bdget(dev)) == NULL ) + return -EINVAL; + + /* + * Update of partition info, and check of usage count, is protected + * by the per-block-device semaphore. + */ + down(&bd->bd_sem); + + if ( ((gd = get_gendisk(dev)) == NULL) || + ((disk = xldev_to_xldisk(dev)) == NULL) || + ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) ) + { + rc = -EINVAL; + goto out; + } + + if ( disk->usage > 1 ) + { + rc = -EBUSY; + goto out; + } + + /* Only reread partition table if VBDs aren't mapped to partitions. */ + if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) ) + { + for ( i = gd->max_p - 1; i >= 0; i-- ) + { + invalidate_device(dev+i, 1); + gd->part[MINOR(dev+i)].start_sect = 0; + gd->part[MINOR(dev+i)].nr_sects = 0; + gd->sizes[MINOR(dev+i)] = 0; + } + + grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity); + } + + out: + up(&bd->bd_sem); + bdput(bd); + return rc; +} + + +/* + * hypervisor_request + * + * request block io + * + * id: for guest use only. + * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*} + * buffer: buffer to read/write into. this should be a + * virtual address in the guest os. + */ +static int hypervisor_request(unsigned long id, + int operation, + char * buffer, + unsigned long sector_number, + unsigned short nr_sectors, + kdev_t device) +{ + unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); + struct gendisk *gd; + blk_ring_req_entry_t *req; + struct buffer_head *bh; + + if ( unlikely(nr_sectors >= (1<<9)) ) + BUG(); + if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) ) + BUG(); + + if ( unlikely(state == STATE_CLOSED) ) + return 1; + + switch ( operation ) + { + + case XEN_BLOCK_READ: + case XEN_BLOCK_WRITE: + gd = get_gendisk(device); + + /* + * Update the sector_number we'll pass down as appropriate; note that + * we could sanity check that resulting sector will be in this + * partition, but this will happen in xen anyhow. + */ + sector_number += gd->part[MINOR(device)].start_sect; + + /* + * If this unit doesn't consist of virtual (i.e., Xen-specified) + * partitions then we clear the partn bits from the device number. + */ + if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & + GENHD_FL_VIRT_PARTNS) ) + device &= ~(gd->max_p - 1); + + if ( (sg_operation == operation) && + (sg_dev == device) && + (sg_next_sect == sector_number) ) + { + req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req; + bh = (struct buffer_head *)id; + bh->b_reqnext = (struct buffer_head *)req->id; + req->id = id; + req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors; + if ( ++req->nr_segments < MAX_BLK_SEGS ) + sg_next_sect += nr_sectors; + else + DISABLE_SCATTERGATHER(); + return 0; + } + else if ( RING_PLUGGED ) + { + return 1; + } + else + { + sg_operation = operation; + sg_dev = device; + sg_next_sect = sector_number + nr_sectors; + } + break; + + default: + panic("unknown op %d\n", operation); + } + + /* Fill out a communications ring structure. */ + req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req; + req->id = id; + req->operation = operation; + req->sector_number = (xen_sector_t)sector_number; + req->device = device; + req->nr_segments = 1; + req->buffer_and_sects[0] = buffer_ma | nr_sectors; + req_prod++; + + return 0; +} + + +/* + * do_xlblk_request + * read a block; request is in a request queue + */ +void do_xlblk_request(request_queue_t *rq) +{ + struct request *req; + struct buffer_head *bh, *next_bh; + int rw, nsect, full, queued = 0; + + DPRINTK("xlblk.c::do_xlblk_request\n"); + + while ( !rq->plugged && !list_empty(&rq->queue_head)) + { + if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) + goto out; + + DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", + req, req->cmd, req->sector, + req->current_nr_sectors, req->nr_sectors, req->bh); + + rw = req->cmd; + if ( rw == READA ) + rw = READ; + if ( unlikely((rw != READ) && (rw != WRITE)) ) + panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw); + + req->errors = 0; + + bh = req->bh; + while ( bh != NULL ) + { + next_bh = bh->b_reqnext; + bh->b_reqnext = NULL; + + full = hypervisor_request( + (unsigned long)bh, + (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, + bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); + + if ( full ) + { + bh->b_reqnext = next_bh; + pending_queues[nr_pending++] = rq; + if ( unlikely(nr_pending >= MAX_PENDING) ) + BUG(); + goto out; + } + + queued++; + + /* Dequeue the buffer head from the request. */ + nsect = bh->b_size >> 9; + bh = req->bh = next_bh; + + if ( bh != NULL ) + { + /* There's another buffer head to do. Update the request. */ + req->hard_sector += nsect; + req->hard_nr_sectors -= nsect; + req->sector = req->hard_sector; + req->nr_sectors = req->hard_nr_sectors; + req->current_nr_sectors = bh->b_size >> 9; + req->buffer = bh->b_data; + } + else + { + /* That was the last buffer head. Finalise the request. */ + if ( unlikely(end_that_request_first(req, 1, "XenBlk")) ) + BUG(); + blkdev_dequeue_request(req); + end_that_request_last(req); + } + } + } + + out: + if ( queued != 0 ) signal_requests_to_xen(); +} + + +static void kick_pending_request_queues(void) +{ + /* We kick pending request queues if the ring is reasonably empty. */ + if ( (nr_pending != 0) && + ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) ) + { + /* Attempt to drain the queue, but bail if the ring becomes full. */ + while ( (nr_pending != 0) && !RING_PLUGGED ) + do_xlblk_request(pending_queues[--nr_pending]); + } +} + + +static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + BLK_RING_IDX i; + unsigned long flags; + struct buffer_head *bh, *next_bh; + + if ( unlikely(state == STATE_CLOSED) ) + return; + + spin_lock_irqsave(&io_request_lock, flags); + + for ( i = resp_cons; i != blk_ring->resp_prod; i++ ) + { + blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp; + switch ( bret->operation ) + { + case XEN_BLOCK_READ: + case XEN_BLOCK_WRITE: + if ( unlikely(bret->status != 0) ) + DPRINTK("Bad return from blkdev data request: %lx\n", + bret->status); + for ( bh = (struct buffer_head *)bret->id; + bh != NULL; + bh = next_bh ) + { + next_bh = bh->b_reqnext; + bh->b_reqnext = NULL; + bh->b_end_io(bh, !bret->status); + } + break; + + default: + BUG(); + } + } + + resp_cons = i; + + kick_pending_request_queues(); + + spin_unlock_irqrestore(&io_request_lock, flags); +} + + +static void reset_xlblk_interface(void) +{ + block_io_op_t op; + + nr_pending = 0; + + op.cmd = BLOCK_IO_OP_RESET; + if ( HYPERVISOR_block_io_op(&op) != 0 ) + printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n"); + + op.cmd = BLOCK_IO_OP_RING_ADDRESS; + (void)HYPERVISOR_block_io_op(&op); + + set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT); + blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE); + blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0; + + wmb(); + state = STATE_ACTIVE; +} + + +int __init xlblk_init(void) +{ + int error; + + reset_xlblk_interface(); + + error = request_irq(XLBLK_RESPONSE_IRQ, xlblk_response_int, + SA_SAMPLE_RANDOM, "blkdev", NULL); + if ( error ) + { + printk(KERN_ALERT "Could not allocate receive interrupt\n"); + goto fail; + } + + error = request_irq(XLBLK_UPDATE_IRQ, xlblk_update_int, + SA_INTERRUPT, "blkdev", NULL); + + if ( error ) + { + printk(KERN_ALERT "Could not allocate block update interrupt\n"); + goto fail; + } + + (void)xlvbd_init(); + + return 0; + + fail: + return error; +} + + +static void __exit xlblk_cleanup(void) +{ + xlvbd_cleanup(); + free_irq(XLBLK_RESPONSE_IRQ, NULL); + free_irq(XLBLK_UPDATE_IRQ, NULL); +} + + +#ifdef MODULE +module_init(xlblk_init); +module_exit(xlblk_cleanup); +#endif + + +void blkdev_suspend(void) +{ + state = STATE_SUSPENDED; + wmb(); + + while ( resp_cons != blk_ring->req_prod ) + { + barrier(); + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(1); + } + + wmb(); + state = STATE_CLOSED; + wmb(); + + clear_fixmap(FIX_BLKRING_BASE); +} + + +void blkdev_resume(void) +{ + reset_xlblk_interface(); + spin_lock_irq(&io_request_lock); + kick_pending_request_queues(); + spin_unlock_irq(&io_request_lock); +} diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/block/block.h b/xenolinux-2.4.25-sparse/arch/xen/drivers/block/block.h new file mode 100644 index 0000000000..e41e03970e --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/block/block.h @@ -0,0 +1,82 @@ +/****************************************************************************** + * block.h + * + * Shared definitions between all levels of XenoLinux Virtual block devices. + */ + +#ifndef __XEN_DRIVERS_BLOCK_H__ +#define __XEN_DRIVERS_BLOCK_H__ + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#if 0 +#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a ) +#else +#define DPRINTK(_f, _a...) ((void)0) +#endif + +#if 0 +#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a ) +#else +#define DPRINTK_IOCTL(_f, _a...) ((void)0) +#endif + +/* Private gendisk->flags[] values. */ +#define GENHD_FL_XEN 2 /* Is unit a Xen block device? */ +#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */ + +/* + * We have one of these per vbd, whether ide, scsi or 'other'. + * They hang in an array off the gendisk structure. We may end up putting + * all kinds of interesting stuff here :-) + */ +typedef struct xl_disk { + int usage; +} xl_disk_t; + +extern int xen_control_msg(int operration, char *buffer, int size); +extern int xen_block_open(struct inode *inode, struct file *filep); +extern int xen_block_release(struct inode *inode, struct file *filep); +extern int xen_block_ioctl(struct inode *inode, struct file *filep, + unsigned command, unsigned long argument); +extern int xen_block_check(kdev_t dev); +extern int xen_block_revalidate(kdev_t dev); +extern void do_xlblk_request (request_queue_t *rq); + +extern void xlvbd_update_vbds(void); + +static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev) +{ + struct gendisk *gd = get_gendisk(xldev); + + if ( gd == NULL ) + return NULL; + + return (xl_disk_t *)gd->real_devices + + (MINOR(xldev) >> gd->minor_shift); +} + + +/* Virtual block-device subsystem. */ +extern int xlvbd_init(void); +extern void xlvbd_cleanup(void); + +#endif /* __XEN_DRIVERS_BLOCK_H__ */ diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/block/vbd.c b/xenolinux-2.4.25-sparse/arch/xen/drivers/block/vbd.c new file mode 100644 index 0000000000..e08b976c56 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/block/vbd.c @@ -0,0 +1,561 @@ +/****************************************************************************** + * vbd.c + * + * Xenolinux virtual block-device driver (xvd). + * + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge + */ + +#include "block.h" +#include + +/* + * For convenience we distinguish between ide, scsi and 'other' (i.e. + * potentially combinations of the two) in the naming scheme and in a few + * other places (like default readahead, etc). + */ +#define XLIDE_MAJOR_NAME "hd" +#define XLSCSI_MAJOR_NAME "sd" +#define XLVBD_MAJOR_NAME "xvd" + +#define XLIDE_DEVS_PER_MAJOR 2 +#define XLSCSI_DEVS_PER_MAJOR 16 +#define XLVBD_DEVS_PER_MAJOR 16 + +#define XLIDE_PARTN_SHIFT 6 /* amount to shift minor to get 'real' minor */ +#define XLIDE_MAX_PART (1 << XLIDE_PARTN_SHIFT) /* minors per ide vbd */ + +#define XLSCSI_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */ +#define XLSCSI_MAX_PART (1 << XLSCSI_PARTN_SHIFT) /* minors per scsi vbd */ + +#define XLVBD_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */ +#define XLVBD_MAX_PART (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */ + +/* The below are for the generic drivers/block/ll_rw_block.c code. */ +static int xlide_blksize_size[256]; +static int xlide_hardsect_size[256]; +static int xlide_max_sectors[256]; +static int xlscsi_blksize_size[256]; +static int xlscsi_hardsect_size[256]; +static int xlscsi_max_sectors[256]; +static int xlvbd_blksize_size[256]; +static int xlvbd_hardsect_size[256]; +static int xlvbd_max_sectors[256]; + +/* Information from Xen about our VBDs. */ +#define MAX_VBDS 64 +static int nr_vbds; +static xen_disk_t *vbd_info; + +static struct block_device_operations xlvbd_block_fops = +{ + open: xen_block_open, + release: xen_block_release, + ioctl: xen_block_ioctl, + check_media_change: xen_block_check, + revalidate: xen_block_revalidate, +}; + +static int xlvbd_get_vbd_info(xen_disk_t *disk_info) +{ + int error; + block_io_op_t op; + + /* Probe for disk information. */ + memset(&op, 0, sizeof(op)); + op.cmd = BLOCK_IO_OP_VBD_PROBE; + op.u.probe_params.domain = 0; + op.u.probe_params.xdi.max = MAX_VBDS; + op.u.probe_params.xdi.disks = disk_info; + op.u.probe_params.xdi.count = 0; + + if ( (error = HYPERVISOR_block_io_op(&op)) != 0 ) + { + printk(KERN_ALERT "Could not probe disks (%d)\n", error); + return -1; + } + + return op.u.probe_params.xdi.count; +} + +/* + * xlvbd_init_device - initialise a VBD device + * @disk: a xen_disk_t describing the VBD + * + * Takes a xen_disk_t * that describes a VBD the domain has access to. + * Performs appropriate initialisation and registration of the device. + * + * Care needs to be taken when making re-entrant calls to ensure that + * corruption does not occur. Also, devices that are in use should not have + * their details updated. This is the caller's responsibility. + */ +static int xlvbd_init_device(xen_disk_t *xd) +{ + int device = xd->device; + int major = MAJOR(device); + int minor = MINOR(device); + int is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */ + int is_scsi= SCSI_BLK_MAJOR(major); /* is this a scsi device? */ + char *major_name; + struct gendisk *gd; + struct block_device *bd; + xl_disk_t *disk; + int i, rc = 0, max_part, partno; + unsigned long capacity; + + unsigned char buf[64]; + + if ( (bd = bdget(device)) == NULL ) + return -1; + + /* + * Update of partition info, and check of usage count, is protected + * by the per-block-device semaphore. + */ + down(&bd->bd_sem); + + if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) ) + { + printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device); + rc = -1; + goto out; + } + + if ( is_ide ) { + + major_name = XLIDE_MAJOR_NAME; + max_part = XLIDE_MAX_PART; + + } else if ( is_scsi ) { + + major_name = XLSCSI_MAJOR_NAME; + max_part = XLSCSI_MAX_PART; + + } else if (XD_VIRTUAL(xd->info)) { + + major_name = XLVBD_MAJOR_NAME; + max_part = XLVBD_MAX_PART; + + } else { + + /* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */ + printk(KERN_ALERT "Assuming device %02x:%02x is CCISS/SCSI\n", + major, minor); + is_scsi = 1; + major_name = "cciss"; + max_part = XLSCSI_MAX_PART; + + } + + partno = minor & (max_part - 1); + + if ( (gd = get_gendisk(device)) == NULL ) + { + rc = register_blkdev(major, major_name, &xlvbd_block_fops); + if ( rc < 0 ) + { + printk(KERN_ALERT "XL VBD: can't get major %d\n", major); + goto out; + } + + if ( is_ide ) + { + blksize_size[major] = xlide_blksize_size; + hardsect_size[major] = xlide_hardsect_size; + max_sectors[major] = xlide_max_sectors; + read_ahead[major] = 8; /* from drivers/ide/ide-probe.c */ + } + else if ( is_scsi ) + { + blksize_size[major] = xlscsi_blksize_size; + hardsect_size[major] = xlscsi_hardsect_size; + max_sectors[major] = xlscsi_max_sectors; + read_ahead[major] = 0; /* XXX 8; -- guessing */ + } + else + { + blksize_size[major] = xlvbd_blksize_size; + hardsect_size[major] = xlvbd_hardsect_size; + max_sectors[major] = xlvbd_max_sectors; + read_ahead[major] = 8; + } + + blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request); + + /* + * Turn off barking 'headactive' mode. We dequeue buffer heads as + * soon as we pass them down to Xen. + */ + blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0); + + /* Construct an appropriate gendisk structure. */ + gd = kmalloc(sizeof(struct gendisk), GFP_KERNEL); + gd->major = major; + gd->major_name = major_name; + + gd->max_p = max_part; + if ( is_ide ) + { + gd->minor_shift = XLIDE_PARTN_SHIFT; + gd->nr_real = XLIDE_DEVS_PER_MAJOR; + } + else if ( is_scsi ) + { + gd->minor_shift = XLSCSI_PARTN_SHIFT; + gd->nr_real = XLSCSI_DEVS_PER_MAJOR; + } + else + { + gd->minor_shift = XLVBD_PARTN_SHIFT; + gd->nr_real = XLVBD_DEVS_PER_MAJOR; + } + + /* + ** The sizes[] and part[] arrays hold the sizes and other + ** information about every partition with this 'major' (i.e. + ** every disk sharing the 8 bit prefix * max partns per disk) + */ + gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL); + gd->part = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), + GFP_KERNEL); + memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int)); + memset(gd->part, 0, max_part * gd->nr_real + * sizeof(struct hd_struct)); + + + gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), + GFP_KERNEL); + memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t)); + + gd->next = NULL; + gd->fops = &xlvbd_block_fops; + + gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), + GFP_KERNEL); + gd->flags = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL); + + memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr)); + memset(gd->flags, 0, gd->nr_real * sizeof(*gd->flags)); + + add_gendisk(gd); + + blk_size[major] = gd->sizes; + } + + if ( XD_READONLY(xd->info) ) + set_device_ro(device, 1); + + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XEN; + + /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */ + capacity = (unsigned long)xd->capacity; + + if ( partno != 0 ) + { + /* + * If this was previously set up as a real disc we will have set + * up partition-table information. Virtual partitions override + * 'real' partitions, and the two cannot coexist on a device. + */ + if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) && + (gd->sizes[minor & ~(max_part-1)] != 0) ) + { + /* + * Any non-zero sub-partition entries must be cleaned out before + * installing 'virtual' partition entries. The two types cannot + * coexist, and virtual partitions are favoured. + */ + kdev_t dev = device & ~(max_part-1); + for ( i = max_part - 1; i > 0; i-- ) + { + invalidate_device(dev+i, 1); + gd->part[MINOR(dev+i)].start_sect = 0; + gd->part[MINOR(dev+i)].nr_sects = 0; + gd->sizes[MINOR(dev+i)] = 0; + } + printk(KERN_ALERT + "Virtual partitions found for /dev/%s - ignoring any " + "real partition information we may have found.\n", + disk_name(gd, MINOR(device), buf)); + } + + /* Need to skankily setup 'partition' information */ + gd->part[minor].start_sect = 0; + gd->part[minor].nr_sects = capacity; + gd->sizes[minor] = capacity; + + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; + } + else + { + gd->part[minor].nr_sects = capacity; + gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9); + + /* Some final fix-ups depending on the device type */ + switch ( XD_TYPE(xd->info) ) + { + case XD_TYPE_CDROM: + case XD_TYPE_FLOPPY: + case XD_TYPE_TAPE: + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; + printk(KERN_ALERT + "Skipping partition check on %s /dev/%s\n", + XD_TYPE(xd->info)==XD_TYPE_CDROM ? "cdrom" : + (XD_TYPE(xd->info)==XD_TYPE_TAPE ? "tape" : + "floppy"), disk_name(gd, MINOR(device), buf)); + break; + + case XD_TYPE_DISK: + /* Only check partitions on real discs (not virtual!). */ + if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS ) + { + printk(KERN_ALERT + "Skipping partition check on virtual /dev/%s\n", + disk_name(gd, MINOR(device), buf)); + break; + } + register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity); + break; + + default: + printk(KERN_ALERT "XenoLinux: unknown device type %d\n", + XD_TYPE(xd->info)); + break; + } + } + + out: + up(&bd->bd_sem); + bdput(bd); + return rc; +} + + +/* + * xlvbd_remove_device - remove a device node if possible + * @device: numeric device ID + * + * Updates the gendisk structure and invalidates devices. + * + * This is OK for now but in future, should perhaps consider where this should + * deallocate gendisks / unregister devices. + */ +static int xlvbd_remove_device(int device) +{ + int i, rc = 0, minor = MINOR(device); + struct gendisk *gd; + struct block_device *bd; + xl_disk_t *disk = NULL; + + if ( (bd = bdget(device)) == NULL ) + return -1; + + /* + * Update of partition info, and check of usage count, is protected + * by the per-block-device semaphore. + */ + down(&bd->bd_sem); + + if ( ((gd = get_gendisk(device)) == NULL) || + ((disk = xldev_to_xldisk(device)) == NULL) ) + BUG(); + + if ( disk->usage != 0 ) + { + printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device); + rc = -1; + goto out; + } + + if ( (minor & (gd->max_p-1)) != 0 ) + { + /* 1: The VBD is mapped to a partition rather than a whole unit. */ + invalidate_device(device, 1); + gd->part[minor].start_sect = 0; + gd->part[minor].nr_sects = 0; + gd->sizes[minor] = 0; + + /* Clear the consists-of-virtual-partitions flag if possible. */ + gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS; + for ( i = 1; i < gd->max_p; i++ ) + if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 ) + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; + + /* + * If all virtual partitions are now gone, and a 'whole unit' VBD is + * present, then we can try to grok the unit's real partition table. + */ + if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) && + (gd->sizes[minor & ~(gd->max_p-1)] != 0) && + !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) ) + { + register_disk(gd, + device&~(gd->max_p-1), + gd->max_p, + &xlvbd_block_fops, + gd->part[minor&~(gd->max_p-1)].nr_sects); + } + } + else + { + /* + * 2: The VBD is mapped to an entire 'unit'. Clear all partitions. + * NB. The partition entries are only cleared if there are no VBDs + * mapped to individual partitions on this unit. + */ + i = gd->max_p - 1; /* Default: clear subpartitions as well. */ + if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS ) + i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */ + while ( i >= 0 ) + { + invalidate_device(device+i, 1); + gd->part[minor+i].start_sect = 0; + gd->part[minor+i].nr_sects = 0; + gd->sizes[minor+i] = 0; + i--; + } + } + + out: + up(&bd->bd_sem); + bdput(bd); + return rc; +} + +/* + * xlvbd_update_vbds - reprobes the VBD status and performs updates driver + * state. The VBDs need to be updated in this way when the domain is + * initialised and also each time we receive an XLBLK_UPDATE event. + */ +void xlvbd_update_vbds(void) +{ + int i, j, k, old_nr, new_nr; + xen_disk_t *old_info, *new_info, *merged_info; + + old_info = vbd_info; + old_nr = nr_vbds; + + new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL); + if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 ) + { + kfree(new_info); + return; + } + + /* + * Final list maximum size is old list + new list. This occurs only when + * old list and new list do not overlap at all, and we cannot yet destroy + * VBDs in the old list because the usage counts are busy. + */ + merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL); + + /* @i tracks old list; @j tracks new list; @k tracks merged list. */ + i = j = k = 0; + + while ( (i < old_nr) && (j < new_nr) ) + { + if ( old_info[i].device < new_info[j].device ) + { + if ( xlvbd_remove_device(old_info[i].device) != 0 ) + memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); + i++; + } + else if ( old_info[i].device > new_info[j].device ) + { + if ( xlvbd_init_device(&new_info[j]) == 0 ) + memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); + j++; + } + else + { + if ( ((old_info[i].capacity == new_info[j].capacity) && + (old_info[i].info == new_info[j].info)) || + (xlvbd_remove_device(old_info[i].device) != 0) ) + memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); + else if ( xlvbd_init_device(&new_info[j]) == 0 ) + memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); + i++; j++; + } + } + + for ( ; i < old_nr; i++ ) + { + if ( xlvbd_remove_device(old_info[i].device) != 0 ) + memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); + } + + for ( ; j < new_nr; j++ ) + { + if ( xlvbd_init_device(&new_info[j]) == 0 ) + memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); + } + + vbd_info = merged_info; + nr_vbds = k; + + kfree(old_info); + kfree(new_info); +} + + +/* + * Set up all the linux device goop for the virtual block devices (vbd's) that + * xen tells us about. Note that although from xen's pov VBDs are addressed + * simply an opaque 16-bit device number, the domain creation tools + * conventionally allocate these numbers to correspond to those used by 'real' + * linux -- this is just for convenience as it means e.g. that the same + * /etc/fstab can be used when booting with or without xen. + */ +int __init xlvbd_init(void) +{ + int i; + + /* + * If compiled as a module, we don't support unloading yet. We therefore + * permanently increment the reference count to disallow it. + */ + SET_MODULE_OWNER(&xlvbd_block_fops); + MOD_INC_USE_COUNT; + + /* Initialize the global arrays. */ + for ( i = 0; i < 256; i++ ) + { + /* from the generic ide code (drivers/ide/ide-probe.c, etc) */ + xlide_blksize_size[i] = 1024; + xlide_hardsect_size[i] = 512; + xlide_max_sectors[i] = 128; /* 'hwif->rqsize' if we knew it */ + + /* from the generic scsi disk code (drivers/scsi/sd.c) */ + xlscsi_blksize_size[i] = 1024; /* XXX 512; */ + xlscsi_hardsect_size[i] = 512; + xlscsi_max_sectors[i] = 128*8; /* XXX 128; */ + + /* we don't really know what to set these too since it depends */ + xlvbd_blksize_size[i] = 512; + xlvbd_hardsect_size[i] = 512; + xlvbd_max_sectors[i] = 128; + } + + vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL); + nr_vbds = xlvbd_get_vbd_info(vbd_info); + + if ( nr_vbds < 0 ) + { + kfree(vbd_info); + vbd_info = NULL; + nr_vbds = 0; + } + else + { + for ( i = 0; i < nr_vbds; i++ ) + xlvbd_init_device(&vbd_info[i]); + } + + return 0; +} + + +#ifdef MODULE +module_init(xlvbd_init); +#endif diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/console/Makefile b/xenolinux-2.4.25-sparse/arch/xen/drivers/console/Makefile new file mode 100644 index 0000000000..aaa546a8f3 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/console/Makefile @@ -0,0 +1,3 @@ +O_TARGET := drv.o +obj-$(CONFIG_XEN_CONSOLE) := console.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/console/console.c b/xenolinux-2.4.25-sparse/arch/xen/drivers/console/console.c new file mode 100644 index 0000000000..f4d97a65ba --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/console/console.c @@ -0,0 +1,508 @@ +/****************************************************************************** + * console.c + * + * Virtual console driver. + * + * Copyright (c) 2002-2004, K A Fraser. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static spinlock_t xen_console_lock = SPIN_LOCK_UNLOCKED; + +#define XEN_TTY_MINOR 123 + +/******************** Kernel console driver ********************************/ + +static void nonpriv_conwrite(const char *s, unsigned int count) +{ + control_if_t *ctrl_if; + evtchn_op_t evtchn_op; + int src, dst, p; + + ctrl_if = (control_if_t *)((char *)HYPERVISOR_shared_info + 2048); + + while ( count != 0 ) + { + /* Wait for the request ring to drain. */ + while ( ctrl_if->tx_resp_prod != ctrl_if->tx_req_prod ) + barrier(); + + p = MASK_CONTROL_IDX(ctrl_if->tx_req_prod); + + ctrl_if->tx_ring[p].type = CMSG_CONSOLE; + ctrl_if->tx_ring[p].subtype = CMSG_CONSOLE_DATA; + ctrl_if->tx_ring[p].id = 0xaa; + src = dst = 0; + while ( (src < count) && (dst < (sizeof(ctrl_if->tx_ring[p].msg)-1)) ) + { + if ( (ctrl_if->tx_ring[p].msg[dst++] = s[src++]) == '\n' ) + ctrl_if->tx_ring[p].msg[dst++] = '\r'; + } + ctrl_if->tx_ring[p].length = dst; + + ctrl_if->tx_req_prod++; + evtchn_op.cmd = EVTCHNOP_send; + evtchn_op.u.send.local_port = 0; + (void)HYPERVISOR_event_channel_op(&evtchn_op); + + s += src; + count -= src; + } +} + +static void priv_conwrite(const char *s, unsigned int count) +{ + int rc; + + while ( count > 0 ) + { + if ( (rc = HYPERVISOR_console_io(CONSOLEIO_write, count, s)) > 0 ) + { + count -= rc; + s += rc; + } + } +} + +static void xen_console_write(struct console *co, const char *s, + unsigned int count) +{ + unsigned long flags; + spin_lock_irqsave(&xen_console_lock, flags); + if ( !(start_info.flags & SIF_INITDOMAIN) ) + nonpriv_conwrite(s, count); + else + priv_conwrite(s, count); + spin_unlock_irqrestore(&xen_console_lock, flags); +} + +static kdev_t xen_console_device(struct console *c) +{ + /* + * This is the magic that binds our "struct console" to our + * "tty_struct", defined below. + */ + return MKDEV(TTY_MAJOR, XEN_TTY_MINOR); +} + +static struct console xen_console_info = { + name: "xencons", /* Used to be xen_console, but we're only + actually allowed 8 charcters including + the terminator... */ + write: xen_console_write, + device: xen_console_device, + flags: CON_PRINTBUFFER, + index: -1, +}; + +void xen_console_init(void) +{ + register_console(&xen_console_info); +} + + +/*** Useful function for console debugging -- goes straight to Xen ****/ +asmlinkage int xprintk(const char *fmt, ...) +{ + va_list args; + int printk_len; + static char printk_buf[1024]; + + /* Emit the output into the temporary buffer */ + va_start(args, fmt); + printk_len = vsnprintf(printk_buf, sizeof(printk_buf), fmt, args); + va_end(args); + + /* Send the processed output directly to Xen. */ + xen_console_write(NULL, printk_buf, printk_len); + + return 0; +} + + +/******************** User-space console driver (/dev/console) ************/ + +static struct tty_driver xen_console_driver; +static int xen_console_refcount; +static struct tty_struct *xen_console_table[1]; +static struct termios *xen_console_termios[1]; +static struct termios *xen_console_termios_locked[1]; +static struct tty_struct *xen_console_tty; + +#define WBUF_SIZE 1024 +#define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1)) +static char wbuf[WBUF_SIZE], x_char; +static unsigned int wc, wp; /* write_cons, write_prod */ + +static void __do_console_io(void) +{ + control_if_t *ctrl_if; + control_msg_t *msg; + evtchn_op_t evtchn_op; + CONTROL_RING_IDX c; + int i, l, work_done = 0; + static char rbuf[16]; + + if ( xen_console_tty == NULL ) + return; + + /* Special-case I/O handling for domain 0. */ + if ( start_info.flags & SIF_INITDOMAIN ) + { + /* Receive work. */ + while ( (l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0 ) + for ( i = 0; i < l; i++ ) + tty_insert_flip_char(xen_console_tty, rbuf[i], 0); + if ( xen_console_tty->flip.count != 0 ) + tty_flip_buffer_push(xen_console_tty); + + /* Transmit work. */ + while ( wc != wp ) + { + l = wp - wc; + if ( l > (WBUF_SIZE - WBUF_MASK(wc)) ) + l = WBUF_SIZE - WBUF_MASK(wc); + priv_conwrite(&wbuf[WBUF_MASK(wc)], l); + wc += l; + wake_up_interruptible(&xen_console_tty->write_wait); + if ( (xen_console_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && + (xen_console_tty->ldisc.write_wakeup != NULL) ) + (xen_console_tty->ldisc.write_wakeup)(xen_console_tty); + } + + return; + } + + /* Acknowledge the notification. */ + evtchn_clear_port(0); + + ctrl_if = (control_if_t *)((char *)HYPERVISOR_shared_info + 2048); + + /* Receive work. */ + for ( c = ctrl_if->rx_resp_prod; c != ctrl_if->rx_req_prod; c++ ) + { + msg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(c)]; + if ( (msg->type == CMSG_CONSOLE) && + (msg->subtype == CMSG_CONSOLE_DATA) ) + { + for ( i = 0; i < msg->length; i++ ) + tty_insert_flip_char(xen_console_tty, msg->msg[i], 0); + } + msg->length = 0; + } + if ( ctrl_if->rx_resp_prod != c ) + { + ctrl_if->rx_resp_prod = c; + work_done = 1; + tty_flip_buffer_push(xen_console_tty); + } + + /* Transmit work. */ + for ( c = ctrl_if->tx_req_prod; + (c - ctrl_if->tx_resp_prod) != CONTROL_RING_SIZE; + c++ ) + { + if ( (wc == wp) && (x_char == 0) ) + break; + msg = &ctrl_if->tx_ring[MASK_CONTROL_IDX(c)]; + msg->type = CMSG_CONSOLE; + msg->subtype = CMSG_CONSOLE_DATA; + msg->id = 0xaa; + l = 0; + if ( x_char != 0 ) /* Handle XON/XOFF urgently. */ + { + msg->msg[l++] = x_char; + x_char = 0; + } + while ( (l < sizeof(msg->msg)) && (wc != wp) ) + msg->msg[l++] = wbuf[WBUF_MASK(wc++)]; + msg->length = l; + } + if ( ctrl_if->tx_req_prod != c ) + { + ctrl_if->tx_req_prod = c; + work_done = 1; + /* There might be something for waiters to do. */ + wake_up_interruptible(&xen_console_tty->write_wait); + if ( (xen_console_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && + (xen_console_tty->ldisc.write_wakeup != NULL) ) + (xen_console_tty->ldisc.write_wakeup)(xen_console_tty); + } + + if ( work_done ) + { + /* Send a notification to the controller. */ + evtchn_op.cmd = EVTCHNOP_send; + evtchn_op.u.send.local_port = 0; + (void)HYPERVISOR_event_channel_op(&evtchn_op); + } +} + +/* This is the callback entry point for domains != 0. */ +static void control_event(unsigned int port) +{ + unsigned long flags; + spin_lock_irqsave(&xen_console_lock, flags); + __do_console_io(); + spin_unlock_irqrestore(&xen_console_lock, flags); +} + +/* This is the callback entry point for domain 0. */ +static void control_irq(int irq, void *dev_id, struct pt_regs *regs) +{ + unsigned long flags; + spin_lock_irqsave(&xen_console_lock, flags); + __do_console_io(); + spin_unlock_irqrestore(&xen_console_lock, flags); +} + +static int xen_console_write_room(struct tty_struct *tty) +{ + return WBUF_SIZE - (wp - wc); +} + +static int xen_console_chars_in_buffer(struct tty_struct *tty) +{ + return wp - wc; +} + +static void xen_console_send_xchar(struct tty_struct *tty, char ch) +{ + unsigned long flags; + spin_lock_irqsave(&xen_console_lock, flags); + x_char = ch; + __do_console_io(); + spin_unlock_irqrestore(&xen_console_lock, flags); +} + +static void xen_console_throttle(struct tty_struct *tty) +{ + if ( I_IXOFF(tty) ) + xen_console_send_xchar(tty, STOP_CHAR(tty)); +} + +static void xen_console_unthrottle(struct tty_struct *tty) +{ + if ( I_IXOFF(tty) ) + { + if ( x_char != 0 ) + x_char = 0; + else + xen_console_send_xchar(tty, START_CHAR(tty)); + } +} + +static void xen_console_flush_buffer(struct tty_struct *tty) +{ + unsigned long flags; + spin_lock_irqsave(&xen_console_lock, flags); + wc = wp = 0; + spin_unlock_irqrestore(&xen_console_lock, flags); +} + +static inline int __xen_console_put_char(int ch) +{ + char _ch = (char)ch; + if ( (wp - wc) == WBUF_SIZE ) + return 0; + wbuf[WBUF_MASK(wp++)] = _ch; + return 1; +} + +static int xen_console_write(struct tty_struct *tty, int from_user, + const u_char * buf, int count) +{ + int i; + unsigned long flags; + + if ( from_user && verify_area(VERIFY_READ, buf, count) ) + return -EINVAL; + + spin_lock_irqsave(&xen_console_lock, flags); + + for ( i = 0; i < count; i++ ) + { + char ch; + if ( from_user ) + __get_user(ch, buf + i); + else + ch = buf[i]; + if ( !__xen_console_put_char(ch) ) + break; + } + + if ( i != 0 ) + __do_console_io(); + + spin_unlock_irqrestore(&xen_console_lock, flags); + + return i; +} + +static void xen_console_put_char(struct tty_struct *tty, u_char ch) +{ + unsigned long flags; + spin_lock_irqsave(&xen_console_lock, flags); + (void)__xen_console_put_char(ch); + spin_unlock_irqrestore(&xen_console_lock, flags); +} + +static void xen_console_flush_chars(struct tty_struct *tty) +{ + unsigned long flags; + spin_lock_irqsave(&xen_console_lock, flags); + __do_console_io(); + spin_unlock_irqrestore(&xen_console_lock, flags); +} + +static void xen_console_wait_until_sent(struct tty_struct *tty, int timeout) +{ + unsigned long orig_jiffies = jiffies; + + while ( tty->driver.chars_in_buffer(tty) ) + { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(1); + if ( signal_pending(current) ) + break; + if ( (timeout != 0) && time_after(jiffies, orig_jiffies + timeout) ) + break; + } + + set_current_state(TASK_RUNNING); +} + +static int xen_console_open(struct tty_struct *tty, struct file *filp) +{ + int line; + unsigned long flags; + + MOD_INC_USE_COUNT; + line = MINOR(tty->device) - tty->driver.minor_start; + if ( line != 0 ) + { + MOD_DEC_USE_COUNT; + return -ENODEV; + } + + spin_lock_irqsave(&xen_console_lock, flags); + tty->driver_data = NULL; + if ( xen_console_tty == NULL ) + xen_console_tty = tty; + __do_console_io(); + spin_unlock_irqrestore(&xen_console_lock, flags); + + return 0; +} + +static void xen_console_close(struct tty_struct *tty, struct file *filp) +{ + unsigned long flags; + + if ( tty->count == 1 ) + { + tty->closing = 1; + tty_wait_until_sent(tty, 0); + if ( tty->driver.flush_buffer != NULL ) + tty->driver.flush_buffer(tty); + if ( tty->ldisc.flush_buffer != NULL ) + tty->ldisc.flush_buffer(tty); + tty->closing = 0; + spin_lock_irqsave(&xen_console_lock, flags); + xen_console_tty = NULL; + spin_unlock_irqrestore(&xen_console_lock, flags); + } + + MOD_DEC_USE_COUNT; +} + +int __init xen_con_init(void) +{ + memset(&xen_console_driver, 0, sizeof(struct tty_driver)); + xen_console_driver.magic = TTY_DRIVER_MAGIC; + xen_console_driver.name = "xencons"; + xen_console_driver.major = TTY_MAJOR; + xen_console_driver.minor_start = XEN_TTY_MINOR; + xen_console_driver.num = 1; + xen_console_driver.type = TTY_DRIVER_TYPE_SERIAL; + xen_console_driver.subtype = SERIAL_TYPE_NORMAL; + xen_console_driver.init_termios = tty_std_termios; + xen_console_driver.flags = + TTY_DRIVER_REAL_RAW | TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_NO_DEVFS; + xen_console_driver.refcount = &xen_console_refcount; + xen_console_driver.table = xen_console_table; + xen_console_driver.termios = xen_console_termios; + xen_console_driver.termios_locked = xen_console_termios_locked; + + xen_console_driver.open = xen_console_open; + xen_console_driver.close = xen_console_close; + xen_console_driver.write = xen_console_write; + xen_console_driver.write_room = xen_console_write_room; + xen_console_driver.put_char = xen_console_put_char; + xen_console_driver.flush_chars = xen_console_flush_chars; + xen_console_driver.chars_in_buffer = xen_console_chars_in_buffer; + xen_console_driver.send_xchar = xen_console_send_xchar; + xen_console_driver.flush_buffer = xen_console_flush_buffer; + xen_console_driver.throttle = xen_console_throttle; + xen_console_driver.unthrottle = xen_console_unthrottle; + xen_console_driver.wait_until_sent = xen_console_wait_until_sent; + + if ( tty_register_driver(&xen_console_driver) ) + panic("Couldn't register Xen virtual console driver\n"); + + if ( !(start_info.flags & SIF_INITDOMAIN) ) + { + if ( evtchn_request_port(0, control_event) != 0 ) + BUG(); + control_event(0); /* kickstart the console */ + } + else + { + request_irq(HYPEREVENT_IRQ(_EVENT_CONSOLE), + control_irq, 0, "console", NULL); + control_irq(0, NULL, NULL); /* kickstart the console */ + } + + printk("Xen virtual console successfully installed\n"); + + return 0; +} + +void __exit xen_con_fini(void) +{ + int ret; + + ret = tty_unregister_driver(&xen_console_driver); + if ( ret != 0 ) + printk(KERN_ERR "Unable to unregister Xen console driver: %d\n", ret); + + if ( !(start_info.flags & SIF_INITDOMAIN) ) + (void)evtchn_free_port(0); +} + +module_init(xen_con_init); +module_exit(xen_con_fini); + diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/dom0/Makefile b/xenolinux-2.4.25-sparse/arch/xen/drivers/dom0/Makefile new file mode 100644 index 0000000000..3e2e17bd23 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/dom0/Makefile @@ -0,0 +1,3 @@ +O_TARGET := drv.o +obj-y := core.o vfr.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/dom0/core.c b/xenolinux-2.4.25-sparse/arch/xen/drivers/dom0/core.c new file mode 100644 index 0000000000..b59f3e8a84 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/dom0/core.c @@ -0,0 +1,104 @@ +/****************************************************************************** + * core.c + * + * Interface to privileged domain-0 commands. + * + * Copyright (c) 2002-2004, K A Fraser, B Dragovic + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +static struct proc_dir_entry *privcmd_intf; + +static int privcmd_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long data) +{ + int ret = 0; + + switch ( cmd ) + { + case IOCTL_PRIVCMD_HYPERCALL: + { + privcmd_hypercall_t hypercall; + + if ( copy_from_user(&hypercall, (void *)data, sizeof(hypercall)) ) + return -EFAULT; + + __asm__ __volatile__ ( + "pushl %%ebx; pushl %%ecx; pushl %%edx; pushl %%esi; pushl %%edi; " + "movl 4(%%eax),%%ebx ;" + "movl 8(%%eax),%%ecx ;" + "movl 12(%%eax),%%edx ;" + "movl 16(%%eax),%%esi ;" + "movl 20(%%eax),%%edi ;" + "movl (%%eax),%%eax ;" + TRAP_INSTR "; " + "popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx" + : "=a" (ret) : "0" (&hypercall) : "memory" ); + + } + break; + + default: + ret = -EINVAL; + break; + } + return ret; +} + + +static struct file_operations privcmd_file_ops = { + ioctl : privcmd_ioctl +}; + + +static int __init init_module(void) +{ + if ( !(start_info.flags & SIF_PRIVILEGED) ) + return 0; + + privcmd_intf = create_xen_proc_entry("privcmd", 0400); + if ( privcmd_intf != NULL ) + { + privcmd_intf->owner = THIS_MODULE; + privcmd_intf->nlink = 1; + privcmd_intf->proc_fops = &privcmd_file_ops; + } + + return 0; +} + + +static void __exit cleanup_module(void) +{ + if ( privcmd_intf == NULL ) return; + remove_xen_proc_entry("privcmd"); + privcmd_intf = NULL; +} + + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/dom0/vfr.c b/xenolinux-2.4.25-sparse/arch/xen/drivers/dom0/vfr.c new file mode 100644 index 0000000000..9d8ca0a32d --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/dom0/vfr.c @@ -0,0 +1,343 @@ +/****************************************************************************** + * vfr.c + * + * Interface to the virtual firewall/router. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct proc_dir_entry *proc_vfr; + +static unsigned char readbuf[1024]; + +/* Helpers, implemented at the bottom. */ +u32 getipaddr(const char *buff, unsigned int len); +u16 antous(const char *buff, int len); +u64 antoull(const char *buff, int len); +int anton(const char *buff, int len); + +static int vfr_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + strcpy(page, readbuf); + *readbuf = '\0'; + *eof = 1; + *start = page; + return strlen(page); +} + +/* The format for the vfr interface is as follows: + * + * COMMAND = [= [...]] + * + * where: + * + * COMMAND = { ACCEPT | COUNT } + * + * field=val pairs are as follows: + * + * field = { srcaddr | dstaddr } + * val is a dot seperated, numeric IP address. + * + * field = { srcport | dstport } + * val is a (16-bit) unsigned int + * + * field = { proto } + * val = { IP | TCP | UDP | ARP } + * + */ + +#define isspace(_x) ( ((_x)==' ') || ((_x)=='\t') || ((_x)=='\v') || \ + ((_x)=='\f') || ((_x)=='\r') || ((_x)=='\n') ) + +static int vfr_write_proc(struct file *file, const char *buffer, + u_long count, void *data) +{ + network_op_t op; + int ret, len; + int ts, te, tl; // token start, end, and length + int fs, fe, fl; // field. + + len = count; + ts = te = 0; + + memset(&op, 0, sizeof(network_op_t)); + + // get the command: + while ( count && isspace(buffer[ts]) ) { ts++; count--; } // skip spaces. + te = ts; + while ( count && !isspace(buffer[te]) ) { te++; count--; } // command end + if ( te <= ts ) goto bad; + tl = te - ts; + + if ( strncmp(&buffer[ts], "ADD", tl) == 0 ) + { + op.cmd = NETWORK_OP_ADDRULE; + } + else if ( strncmp(&buffer[ts], "DELETE", tl) == 0 ) + { + op.cmd = NETWORK_OP_DELETERULE; + } + else if ( strncmp(&buffer[ts], "PRINT", tl) == 0 ) + { + op.cmd = NETWORK_OP_GETRULELIST; + goto doneparsing; + } + + ts = te; + + // get the action + while ( count && (buffer[ts] == ' ') ) { ts++; count--; } // skip spaces. + te = ts; + while ( count && (buffer[te] != ' ') ) { te++; count--; } // command end + if ( te <= ts ) goto bad; + tl = te - ts; + + if ( strncmp(&buffer[ts], "ACCEPT", tl) == 0 ) + { + op.u.net_rule.action = NETWORK_ACTION_ACCEPT; + goto keyval; + } + if ( strncmp(&buffer[ts], "COUNT", tl) == 0 ) + { + op.u.net_rule.action = NETWORK_ACTION_COUNT; + goto keyval; + } + + // default case; + return (len); + + + // get the key=val pairs. + keyval: + while (count) + { + //get field + ts = te; while ( count && isspace(buffer[ts]) ) { ts++; count--; } + te = ts; + while ( count && !isspace(buffer[te]) && (buffer[te] != '=') ) + { te++; count--; } + if ( te <= ts ) + goto doneparsing; + tl = te - ts; + fs = ts; fe = te; fl = tl; // save the field markers. + // skip " = " (ignores extra equals.) + while ( count && (isspace(buffer[te]) || (buffer[te] == '=')) ) + { te++; count--; } + ts = te; + while ( count && !isspace(buffer[te]) ) { te++; count--; } + tl = te - ts; + + if ( (fl <= 0) || (tl <= 0) ) goto bad; + + /* NB. Prefix matches must go first! */ + if (strncmp(&buffer[fs], "src", fl) == 0) + { + op.u.net_rule.src_dom = VIF_SPECIAL; + op.u.net_rule.src_idx = VIF_ANY_INTERFACE; + } + else if (strncmp(&buffer[fs], "dst", fl) == 0) + { + op.u.net_rule.dst_dom = VIF_SPECIAL; + op.u.net_rule.dst_idx = VIF_PHYSICAL_INTERFACE; + } + else if (strncmp(&buffer[fs], "srcaddr", fl) == 0) + { + op.u.net_rule.src_addr = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstaddr", fl) == 0) + { + op.u.net_rule.dst_addr = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcaddrmask", fl) == 0) + { + op.u.net_rule.src_addr_mask = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstaddrmask", fl) == 0) + { + op.u.net_rule.dst_addr_mask = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcport", fl) == 0) + { + op.u.net_rule.src_port = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstport", fl) == 0) + { + op.u.net_rule.dst_port = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcportmask", fl) == 0) + { + op.u.net_rule.src_port_mask = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstportmask", fl) == 0) + { + op.u.net_rule.dst_port_mask = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcdom", fl) == 0) + { + op.u.net_rule.src_dom = antoull(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcidx", fl) == 0) + { + op.u.net_rule.src_idx = anton(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstdom", fl) == 0) + { + op.u.net_rule.dst_dom = antoull(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstidx", fl) == 0) + { + op.u.net_rule.dst_idx = anton(&buffer[ts], tl); + } + else if ( (strncmp(&buffer[fs], "proto", fl) == 0)) + { + if (strncmp(&buffer[ts], "any", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_ANY; + if (strncmp(&buffer[ts], "ip", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_IP; + if (strncmp(&buffer[ts], "tcp", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_TCP; + if (strncmp(&buffer[ts], "udp", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_UDP; + if (strncmp(&buffer[ts], "arp", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_ARP; + } + } + + doneparsing: + ret = HYPERVISOR_network_op(&op); + return(len); + + bad: + return(len); + + +} + +static int __init init_module(void) +{ + if ( !(start_info.flags & SIF_PRIVILEGED) ) + return 0; + + *readbuf = '\0'; + proc_vfr = create_xen_proc_entry("vfr", 0600); + if ( proc_vfr != NULL ) + { + proc_vfr->owner = THIS_MODULE; + proc_vfr->nlink = 1; + proc_vfr->read_proc = vfr_read_proc; + proc_vfr->write_proc = vfr_write_proc; + printk("Successfully installed virtual firewall/router interface\n"); + } + return 0; +} + +static void __exit cleanup_module(void) +{ + if ( proc_vfr == NULL ) return; + remove_xen_proc_entry("vfr"); + proc_vfr = NULL; +} + +module_init(init_module); +module_exit(cleanup_module); + +/* Helper functions start here: */ + +int anton(const char *buff, int len) +{ + int ret; + char c; + int sign = 1; + + ret = 0; + + if (len == 0) return 0; + if (*buff == '-') { sign = -1; buff++; len--; } + + while ( (len) && ((c = *buff) >= '0') && (c <= '9') ) + { + ret *= 10; + ret += c - '0'; + buff++; len--; + } + + ret *= sign; + return ret; +} + +u16 antous(const char *buff, int len) +{ + u16 ret; + char c; + + ret = 0; + + while ( (len) && ((c = *buff) >= '0') && (c <= '9') ) + { + ret *= 10; + ret += c - '0'; + buff++; len--; + } + + return ret; +} + +u64 antoull(const char *buff, int len) +{ + u64 ret; + char c; + + ret = 0; + + while ( (len) && ((c = *buff) >= '0') && (c <= '9') ) + { + ret *= 10; + ret += c - '0'; + buff++; len--; + } + + return ret; +} + +u32 getipaddr(const char *buff, unsigned int len) +{ + char c; + u32 ret, val; + + ret = 0; val = 0; + + while ( len ) + { + if (!((((c = *buff) >= '0') && ( c <= '9')) || ( c == '.' ) ) ) + { + return(0); // malformed. + } + + if ( c == '.' ) { + if (val > 255) return (0); //malformed. + ret = ret << 8; + ret += val; + val = 0; + len--; buff++; + continue; + } + val *= 10; + val += c - '0'; + buff++; len--; + } + ret = ret << 8; + ret += val; + + return (ret); +} + diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/evtchn/Makefile b/xenolinux-2.4.25-sparse/arch/xen/drivers/evtchn/Makefile new file mode 100644 index 0000000000..61c983f625 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/evtchn/Makefile @@ -0,0 +1,3 @@ +O_TARGET := drv.o +obj-y := evtchn.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/evtchn/evtchn.c b/xenolinux-2.4.25-sparse/arch/xen/drivers/evtchn/evtchn.c new file mode 100644 index 0000000000..a7978ee8d2 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/evtchn/evtchn.c @@ -0,0 +1,481 @@ +/****************************************************************************** + * evtchn.c + * + * Xenolinux driver for receiving and demuxing event-channel signals. + * + * Copyright (c) 2004, K A Fraser + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* NB. This must be shared amongst drivers if more things go in /dev/xen */ +static devfs_handle_t xen_dev_dir; + +/* Only one process may open /dev/xen/evtchn at any time. */ +static unsigned long evtchn_dev_inuse; + +/* Notification ring, accessed via /dev/xen/evtchn. */ +#define RING_SIZE 2048 /* 2048 16-bit entries */ +#define RING_MASK(_i) ((_i)&(RING_SIZE-1)) +static u16 *ring; +static unsigned int ring_cons, ring_prod, ring_overflow; + +/* Processes wait on this queue via /dev/xen/evtchn when ring is empty. */ +static DECLARE_WAIT_QUEUE_HEAD(evtchn_wait); +static struct fasync_struct *evtchn_async_queue; + +static evtchn_receiver_t rx_fns[1024]; + +static u32 pend_outstanding[32]; +static u32 disc_outstanding[32]; + +static spinlock_t lock; + +int evtchn_request_port(unsigned int port, evtchn_receiver_t rx_fn) +{ + unsigned long flags; + int rc; + + spin_lock_irqsave(&lock, flags); + + if ( rx_fns[port] != NULL ) + { + printk(KERN_ALERT "Event channel port %d already in use.\n", port); + rc = -EINVAL; + } + else + { + rx_fns[port] = rx_fn; + rc = 0; + } + + spin_unlock_irqrestore(&lock, flags); + + return rc; +} + +int evtchn_free_port(unsigned int port) +{ + unsigned long flags; + int rc; + + spin_lock_irqsave(&lock, flags); + + if ( rx_fns[port] == NULL ) + { + printk(KERN_ALERT "Event channel port %d not in use.\n", port); + rc = -EINVAL; + } + else + { + rx_fns[port] = NULL; + rc = 0; + } + + spin_unlock_irqrestore(&lock, flags); + + return rc; +} + +/* + * NB. Clearing port can race a notification from remote end. Caller must + * therefore recheck notification status on return to avoid missing events. + */ +void evtchn_clear_port(unsigned int port) +{ + unsigned int p = port & PORTIDX_MASK; + unsigned long flags; + + spin_lock_irqsave(&lock, flags); + + if ( unlikely(port & PORT_DISCONNECT) ) + { + clear_bit(p, &disc_outstanding[0]); + clear_bit(p, &HYPERVISOR_shared_info->event_channel_disc[0]); + } + else + { + clear_bit(p, &pend_outstanding[0]); + clear_bit(p, &HYPERVISOR_shared_info->event_channel_pend[0]); + } + + spin_unlock_irqrestore(&lock, flags); +} + +static inline void process_bitmask(u32 *sel, + u32 *mask, + u32 *outstanding, + unsigned int port_subtype) +{ + unsigned long l1, l2; + unsigned int l1_idx, l2_idx, port; + + l1 = xchg(sel, 0); + while ( (l1_idx = ffs(l1)) != 0 ) + { + l1_idx--; + l1 &= ~(1 << l1_idx); + + l2 = mask[l1_idx] & ~outstanding[l1_idx]; + outstanding[l1_idx] |= l2; + while ( (l2_idx = ffs(l2)) != 0 ) + { + l2_idx--; + l2 &= ~(1 << l2_idx); + + port = (l1_idx * 32) + l2_idx; + if ( rx_fns[port] != NULL ) + { + (*rx_fns[port])(port | port_subtype); + } + else if ( ring != NULL ) + { + if ( (ring_prod - ring_cons) < RING_SIZE ) + { + ring[RING_MASK(ring_prod)] = (u16)(port | port_subtype); + if ( ring_cons == ring_prod++ ) + { + wake_up_interruptible(&evtchn_wait); + kill_fasync(&evtchn_async_queue, SIGIO, POLL_IN); + } + } + else + { + ring_overflow = 1; + } + } + } + } +} + +static void evtchn_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + shared_info_t *si = HYPERVISOR_shared_info; + unsigned long flags; + + spin_lock_irqsave(&lock, flags); + + process_bitmask(&si->event_channel_pend_sel, + &si->event_channel_pend[0], + &pend_outstanding[0], + PORT_NORMAL); + + process_bitmask(&si->event_channel_disc_sel, + &si->event_channel_disc[0], + &disc_outstanding[0], + PORT_DISCONNECT); + + spin_unlock_irqrestore(&lock, flags); +} + +static void __evtchn_reset_buffer_ring(void) +{ + u32 m; + unsigned int i, j; + + /* Initialise the ring with currently outstanding notifications. */ + ring_cons = ring_prod = ring_overflow = 0; + + for ( i = 0; i < 32; i++ ) + { + m = pend_outstanding[i]; + while ( (j = ffs(m)) != 0 ) + { + m &= ~(1 << --j); + if ( rx_fns[(i * 32) + j] == NULL ) + ring[ring_prod++] = (u16)(((i * 32) + j) | PORT_NORMAL); + } + + m = disc_outstanding[i]; + while ( (j = ffs(m)) != 0 ) + { + m &= ~(1 << --j); + if ( rx_fns[(i * 32) + j] == NULL ) + ring[ring_prod++] = (u16)(((i * 32) + j) | PORT_DISCONNECT); + } + } +} + +static ssize_t evtchn_read(struct file *file, char *buf, + size_t count, loff_t *ppos) +{ + int rc; + unsigned int c, p, bytes1 = 0, bytes2 = 0; + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue(&evtchn_wait, &wait); + + count &= ~1; /* even number of bytes */ + + if ( count == 0 ) + { + rc = 0; + goto out; + } + + if ( count > PAGE_SIZE ) + count = PAGE_SIZE; + + for ( ; ; ) + { + set_current_state(TASK_INTERRUPTIBLE); + + if ( (c = ring_cons) != (p = ring_prod) ) + break; + + if ( ring_overflow ) + { + rc = -EFBIG; + goto out; + } + + if ( file->f_flags & O_NONBLOCK ) + { + rc = -EAGAIN; + goto out; + } + + if ( signal_pending(current) ) + { + rc = -ERESTARTSYS; + goto out; + } + + schedule(); + } + + /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ + if ( ((c ^ p) & RING_SIZE) != 0 ) + { + bytes1 = (RING_SIZE - RING_MASK(c)) * sizeof(u16); + bytes2 = RING_MASK(p) * sizeof(u16); + } + else + { + bytes1 = (p - c) * sizeof(u16); + bytes2 = 0; + } + + /* Truncate chunks according to caller's maximum byte count. */ + if ( bytes1 > count ) + { + bytes1 = count; + bytes2 = 0; + } + else if ( (bytes1 + bytes2) > count ) + { + bytes2 = count - bytes1; + } + + if ( copy_to_user(buf, &ring[RING_MASK(c)], bytes1) || + ((bytes2 != 0) && copy_to_user(&buf[bytes1], &ring[0], bytes2)) ) + { + rc = -EFAULT; + goto out; + } + + ring_cons += (bytes1 + bytes2) / sizeof(u16); + + rc = bytes1 + bytes2; + + out: + __set_current_state(TASK_RUNNING); + remove_wait_queue(&evtchn_wait, &wait); + return rc; +} + +static ssize_t evtchn_write(struct file *file, const char *buf, + size_t count, loff_t *ppos) +{ + int rc, i; + u16 *kbuf = (u16 *)get_free_page(GFP_KERNEL); + + if ( kbuf == NULL ) + return -ENOMEM; + + count &= ~1; /* even number of bytes */ + + if ( count == 0 ) + { + rc = 0; + goto out; + } + + if ( count > PAGE_SIZE ) + count = PAGE_SIZE; + + if ( copy_from_user(kbuf, buf, count) != 0 ) + { + rc = -EFAULT; + goto out; + } + + for ( i = 0; i < (count/2); i++ ) + evtchn_clear_port(kbuf[i]); + + rc = count; + + out: + free_page((unsigned long)kbuf); + return rc; +} + +static int evtchn_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + if ( cmd != EVTCHN_RESET ) + return -EINVAL; + + spin_lock_irq(&lock); + __evtchn_reset_buffer_ring(); + spin_unlock_irq(&lock); + + return 0; +} + +static unsigned int evtchn_poll(struct file *file, poll_table *wait) +{ + unsigned int mask = POLLOUT | POLLWRNORM; + poll_wait(file, &evtchn_wait, wait); + if ( ring_cons != ring_prod ) + mask |= POLLIN | POLLRDNORM; + if ( ring_overflow ) + mask = POLLERR; + return mask; +} + +static int evtchn_fasync(int fd, struct file *filp, int on) +{ + return fasync_helper(fd, filp, on, &evtchn_async_queue); +} + +static int evtchn_open(struct inode *inode, struct file *filp) +{ + u16 *_ring; + + if ( test_and_set_bit(0, &evtchn_dev_inuse) ) + return -EBUSY; + + /* Allocate outside locked region so that we can use GFP_KERNEL. */ + if ( (_ring = (u16 *)get_free_page(GFP_KERNEL)) == NULL ) + return -ENOMEM; + + spin_lock_irq(&lock); + ring = _ring; + __evtchn_reset_buffer_ring(); + spin_unlock_irq(&lock); + + MOD_INC_USE_COUNT; + + return 0; +} + +static int evtchn_release(struct inode *inode, struct file *filp) +{ + spin_lock_irq(&lock); + if ( ring != NULL ) + { + free_page((unsigned long)ring); + ring = NULL; + } + spin_unlock_irq(&lock); + + evtchn_dev_inuse = 0; + + MOD_DEC_USE_COUNT; + + return 0; +} + +static struct file_operations evtchn_fops = { + owner: THIS_MODULE, + read: evtchn_read, + write: evtchn_write, + ioctl: evtchn_ioctl, + poll: evtchn_poll, + fasync: evtchn_fasync, + open: evtchn_open, + release: evtchn_release +}; + +static struct miscdevice evtchn_miscdev = { + minor: EVTCHN_MINOR, + name: "evtchn", + fops: &evtchn_fops +}; + +static int __init init_module(void) +{ + devfs_handle_t symlink_handle; + int err, pos; + char link_dest[64]; + + /* (DEVFS) create '/dev/misc/evtchn'. */ + err = misc_register(&evtchn_miscdev); + if ( err != 0 ) + { + printk(KERN_ALERT "Could not register /dev/misc/evtchn\n"); + return err; + } + + /* (DEVFS) create directory '/dev/xen'. */ + xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL); + + /* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */ + pos = devfs_generate_path(evtchn_miscdev.devfs_handle, + &link_dest[3], + sizeof(link_dest) - 3); + if ( pos >= 0 ) + strncpy(&link_dest[pos], "../", 3); + + /* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */ + (void)devfs_mk_symlink(xen_dev_dir, + "evtchn", + DEVFS_FL_DEFAULT, + &link_dest[pos], + &symlink_handle, + NULL); + + /* (DEVFS) automatically destroy the symlink with its destination. */ + devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle); + + err = request_irq(HYPEREVENT_IRQ(_EVENT_EVTCHN), + evtchn_interrupt, 0, "evtchn", NULL); + if ( err != 0 ) + { + printk(KERN_ALERT "Could not allocate evtchn receive interrupt\n"); + return err; + } + + /* Kickstart servicing of notifications. */ + evtchn_interrupt(0, NULL, NULL); + + printk("Event-channel driver installed.\n"); + + return 0; +} + +static void cleanup_module(void) +{ + free_irq(HYPEREVENT_IRQ(_EVENT_EVTCHN), NULL); + misc_deregister(&evtchn_miscdev); +} + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/network/Makefile b/xenolinux-2.4.25-sparse/arch/xen/drivers/network/Makefile new file mode 100644 index 0000000000..2e4c1f4825 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/network/Makefile @@ -0,0 +1,3 @@ +O_TARGET := drv.o +obj-y := network.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/network/network.c b/xenolinux-2.4.25-sparse/arch/xen/drivers/network/network.c new file mode 100644 index 0000000000..c5d25442e2 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/network/network.c @@ -0,0 +1,631 @@ +/****************************************************************************** + * network.c + * + * Virtual network driver for XenoLinux. + * + * Copyright (c) 2002-2003, K A Fraser + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */ + +static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs); +static void network_tx_buf_gc(struct net_device *dev); +static void network_alloc_rx_buffers(struct net_device *dev); +static void cleanup_module(void); + +static struct list_head dev_list; + +struct net_private +{ + struct list_head list; + struct net_device *dev; + + struct net_device_stats stats; + NET_RING_IDX rx_resp_cons, tx_resp_cons; + unsigned int net_ring_fixmap_idx, tx_full; + net_ring_t *net_ring; + net_idx_t *net_idx; + spinlock_t tx_lock; + unsigned int idx; /* Domain-specific index of this VIF. */ + + unsigned int rx_bufs_to_notify; + +#define STATE_ACTIVE 0 +#define STATE_SUSPENDED 1 +#define STATE_CLOSED 2 + unsigned int state; + + /* + * {tx,rx}_skbs store outstanding skbuffs. The first entry in each + * array is an index into a chain of free entries. + */ + struct sk_buff *tx_skbs[XENNET_TX_RING_SIZE+1]; + struct sk_buff *rx_skbs[XENNET_RX_RING_SIZE+1]; +}; + +/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */ +#define ADD_ID_TO_FREELIST(_list, _id) \ + (_list)[(_id)] = (_list)[0]; \ + (_list)[0] = (void *)(unsigned long)(_id); +#define GET_ID_FROM_FREELIST(_list) \ + ({ unsigned long _id = (unsigned long)(_list)[0]; \ + (_list)[0] = (_list)[_id]; \ + (unsigned short)_id; }) + + +static void _dbg_network_int(struct net_device *dev) +{ + struct net_private *np = dev->priv; + + if ( np->state == STATE_CLOSED ) + return; + + printk(KERN_ALERT "net: tx_full=%d, tx_resp_cons=0x%08x," + " tx_req_prod=0x%08x\nnet: tx_resp_prod=0x%08x," + " tx_event=0x%08x, state=%d\n", + np->tx_full, np->tx_resp_cons, + np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, + np->net_idx->tx_event, + test_bit(__LINK_STATE_XOFF, &dev->state)); + printk(KERN_ALERT "net: rx_resp_cons=0x%08x," + " rx_req_prod=0x%08x\nnet: rx_resp_prod=0x%08x, rx_event=0x%08x\n", + np->rx_resp_cons, np->net_idx->rx_req_prod, + np->net_idx->rx_resp_prod, np->net_idx->rx_event); +} + + +static void dbg_network_int(int irq, void *unused, struct pt_regs *ptregs) +{ + struct list_head *ent; + struct net_private *np; + list_for_each ( ent, &dev_list ) + { + np = list_entry(ent, struct net_private, list); + _dbg_network_int(np->dev); + } +} + + +static int network_open(struct net_device *dev) +{ + struct net_private *np = dev->priv; + netop_t netop; + int i, ret; + + netop.cmd = NETOP_RESET_RINGS; + netop.vif = np->idx; + if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 ) + { + printk(KERN_ALERT "Possible net trouble: couldn't reset ring idxs\n"); + return ret; + } + + netop.cmd = NETOP_GET_VIF_INFO; + netop.vif = np->idx; + if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 ) + { + printk(KERN_ALERT "Couldn't get info for vif %d\n", np->idx); + return ret; + } + + memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN); + + set_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx, + netop.u.get_vif_info.ring_mfn << PAGE_SHIFT); + np->net_ring = (net_ring_t *)fix_to_virt( + FIX_NETRING0_BASE + np->net_ring_fixmap_idx); + np->net_idx = &HYPERVISOR_shared_info->net_idx[np->idx]; + + np->rx_bufs_to_notify = 0; + np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0; + memset(&np->stats, 0, sizeof(np->stats)); + spin_lock_init(&np->tx_lock); + memset(np->net_ring, 0, sizeof(*np->net_ring)); + memset(np->net_idx, 0, sizeof(*np->net_idx)); + + /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ + for ( i = 0; i <= XENNET_TX_RING_SIZE; i++ ) + np->tx_skbs[i] = (void *)(i+1); + for ( i = 0; i <= XENNET_RX_RING_SIZE; i++ ) + np->rx_skbs[i] = (void *)(i+1); + + wmb(); + np->state = STATE_ACTIVE; + + network_alloc_rx_buffers(dev); + + netif_start_queue(dev); + + MOD_INC_USE_COUNT; + + return 0; +} + + +static void network_tx_buf_gc(struct net_device *dev) +{ + NET_RING_IDX i, prod; + unsigned short id; + struct net_private *np = dev->priv; + struct sk_buff *skb; + tx_entry_t *tx_ring = np->net_ring->tx_ring; + + do { + prod = np->net_idx->tx_resp_prod; + + for ( i = np->tx_resp_cons; i != prod; i++ ) + { + id = tx_ring[MASK_NET_TX_IDX(i)].resp.id; + skb = np->tx_skbs[id]; + ADD_ID_TO_FREELIST(np->tx_skbs, id); + dev_kfree_skb_any(skb); + } + + np->tx_resp_cons = prod; + + /* + * Set a new event, then check for race with update of tx_cons. Note + * that it is essential to schedule a callback, no matter how few + * buffers are pending. Even if there is space in the transmit ring, + * higher layers may be blocked because too much data is outstanding: + * in such cases notification from Xen is likely to be the only kick + * that we'll get. + */ + np->net_idx->tx_event = + prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1; + mb(); + } + while ( prod != np->net_idx->tx_resp_prod ); + + if ( np->tx_full && + ((np->net_idx->tx_req_prod - prod) < XENNET_TX_RING_SIZE) ) + { + np->tx_full = 0; + if ( np->state == STATE_ACTIVE ) + netif_wake_queue(dev); + } +} + + +static inline pte_t *get_ppte(void *addr) +{ + pgd_t *pgd; pmd_t *pmd; pte_t *pte; + pgd = pgd_offset_k( (unsigned long)addr); + pmd = pmd_offset(pgd, (unsigned long)addr); + pte = pte_offset(pmd, (unsigned long)addr); + return pte; +} + + +static void network_alloc_rx_buffers(struct net_device *dev) +{ + unsigned short id; + struct net_private *np = dev->priv; + struct sk_buff *skb; + netop_t netop; + NET_RING_IDX i = np->net_idx->rx_req_prod; + + if ( unlikely((i - np->rx_resp_cons) == XENNET_RX_RING_SIZE) || + unlikely(np->state != STATE_ACTIVE) ) + return; + + do { + skb = dev_alloc_skb(RX_BUF_SIZE); + if ( unlikely(skb == NULL) ) + break; + + skb->dev = dev; + + if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) ) + panic("alloc_skb needs to provide us page-aligned buffers."); + + id = GET_ID_FROM_FREELIST(np->rx_skbs); + np->rx_skbs[id] = skb; + + np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id = id; + np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = + virt_to_machine(get_ppte(skb->head)); + + np->rx_bufs_to_notify++; + } + while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE ); + + /* + * We may have allocated buffers which have entries outstanding in the page + * update queue -- make sure we flush those first! + */ + flush_page_update_queue(); + + np->net_idx->rx_req_prod = i; + np->net_idx->rx_event = np->rx_resp_cons + 1; + + /* Batch Xen notifications. */ + if ( np->rx_bufs_to_notify > (XENNET_RX_RING_SIZE/4) ) + { + netop.cmd = NETOP_PUSH_BUFFERS; + netop.vif = np->idx; + (void)HYPERVISOR_net_io_op(&netop); + np->rx_bufs_to_notify = 0; + } +} + + +static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + unsigned short id; + struct net_private *np = (struct net_private *)dev->priv; + tx_req_entry_t *tx; + netop_t netop; + NET_RING_IDX i; + + if ( unlikely(np->tx_full) ) + { + printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name); + netif_stop_queue(dev); + return -ENOBUFS; + } + + if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= + PAGE_SIZE) ) + { + struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE); + if ( unlikely(new_skb == NULL) ) + return 1; + skb_put(new_skb, skb->len); + memcpy(new_skb->data, skb->data, skb->len); + dev_kfree_skb(skb); + skb = new_skb; + } + + spin_lock_irq(&np->tx_lock); + + i = np->net_idx->tx_req_prod; + + id = GET_ID_FROM_FREELIST(np->tx_skbs); + np->tx_skbs[id] = skb; + + tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req; + + tx->id = id; + tx->addr = phys_to_machine(virt_to_phys(skb->data)); + tx->size = skb->len; + + wmb(); + np->net_idx->tx_req_prod = i + 1; + + network_tx_buf_gc(dev); + + if ( (i - np->tx_resp_cons) == (XENNET_TX_RING_SIZE - 1) ) + { + np->tx_full = 1; + netif_stop_queue(dev); + } + + spin_unlock_irq(&np->tx_lock); + + np->stats.tx_bytes += skb->len; + np->stats.tx_packets++; + + /* Only notify Xen if there are no outstanding responses. */ + mb(); + if ( np->net_idx->tx_resp_prod == i ) + { + netop.cmd = NETOP_PUSH_BUFFERS; + netop.vif = np->idx; + (void)HYPERVISOR_net_io_op(&netop); + } + + return 0; +} + + +static inline void _network_interrupt(struct net_device *dev) +{ + struct net_private *np = dev->priv; + unsigned long flags; + struct sk_buff *skb; + rx_resp_entry_t *rx; + NET_RING_IDX i; + + if ( unlikely(np->state == STATE_CLOSED) ) + return; + + spin_lock_irqsave(&np->tx_lock, flags); + network_tx_buf_gc(dev); + spin_unlock_irqrestore(&np->tx_lock, flags); + + again: + for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ ) + { + rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp; + + skb = np->rx_skbs[rx->id]; + ADD_ID_TO_FREELIST(np->rx_skbs, rx->id); + + if ( unlikely(rx->status != RING_STATUS_OK) ) + { + /* Gate this error. We get a (valid) slew of them on suspend. */ + if ( np->state == STATE_ACTIVE ) + printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status); + dev_kfree_skb_any(skb); + continue; + } + + /* + * Set up shinfo -- from alloc_skb This was particularily nasty: the + * shared info is hidden at the back of the data area (presumably so it + * can be shared), but on page flip it gets very spunked. + */ + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; + + phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] = + (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT; + + skb->data = skb->tail = skb->head + rx->offset; + skb_put(skb, rx->size); + skb->protocol = eth_type_trans(skb, dev); + + np->stats.rx_packets++; + + np->stats.rx_bytes += rx->size; + netif_rx(skb); + dev->last_rx = jiffies; + } + + np->rx_resp_cons = i; + + network_alloc_rx_buffers(dev); + + /* Deal with hypervisor racing our resetting of rx_event. */ + mb(); + if ( np->net_idx->rx_resp_prod != i ) + goto again; +} + + +static void network_interrupt(int irq, void *unused, struct pt_regs *ptregs) +{ + struct list_head *ent; + struct net_private *np; + list_for_each ( ent, &dev_list ) + { + np = list_entry(ent, struct net_private, list); + _network_interrupt(np->dev); + } +} + + +static int network_close(struct net_device *dev) +{ + struct net_private *np = dev->priv; + netop_t netop; + + np->state = STATE_SUSPENDED; + wmb(); + + netif_stop_queue(np->dev); + + netop.cmd = NETOP_FLUSH_BUFFERS; + netop.vif = np->idx; + (void)HYPERVISOR_net_io_op(&netop); + + while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) || + (np->tx_resp_cons != np->net_idx->tx_req_prod) ) + { + barrier(); + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(1); + } + + wmb(); + np->state = STATE_CLOSED; + wmb(); + + /* Now no longer safe to take interrupts for this device. */ + clear_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx); + + MOD_DEC_USE_COUNT; + + return 0; +} + + +static struct net_device_stats *network_get_stats(struct net_device *dev) +{ + struct net_private *np = (struct net_private *)dev->priv; + return &np->stats; +} + + +/* + * This notifier is installed for domain 0 only. + * All other domains have VFR rules installed on their behalf by domain 0 + * when they are created. For bootstrap, Xen creates wildcard rules for + * domain 0 -- this notifier is used to detect when we find our proper + * IP address, so we can poke down proper rules and remove the wildcards. + */ +static int inetdev_notify(struct notifier_block *this, + unsigned long event, + void *ptr) +{ + struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + struct net_device *dev = ifa->ifa_dev->dev; + struct list_head *ent; + struct net_private *np; + int idx = -1; + network_op_t op; + + list_for_each ( ent, &dev_list ) + { + np = list_entry(dev_list.next, struct net_private, list); + if ( np->dev == dev ) + idx = np->idx; + } + + if ( idx == -1 ) + goto out; + + memset(&op, 0, sizeof(op)); + op.u.net_rule.proto = NETWORK_PROTO_ANY; + op.u.net_rule.action = NETWORK_ACTION_ACCEPT; + + if ( event == NETDEV_UP ) + op.cmd = NETWORK_OP_ADDRULE; + else if ( event == NETDEV_DOWN ) + op.cmd = NETWORK_OP_DELETERULE; + else + goto out; + + op.u.net_rule.src_dom = 0; + op.u.net_rule.src_idx = idx; + op.u.net_rule.dst_dom = VIF_SPECIAL; + op.u.net_rule.dst_idx = VIF_PHYSICAL_INTERFACE; + op.u.net_rule.src_addr = ntohl(ifa->ifa_address); + op.u.net_rule.src_addr_mask = ~0UL; + op.u.net_rule.dst_addr = 0; + op.u.net_rule.dst_addr_mask = 0; + (void)HYPERVISOR_network_op(&op); + + op.u.net_rule.src_dom = VIF_SPECIAL; + op.u.net_rule.src_idx = VIF_ANY_INTERFACE; + op.u.net_rule.dst_dom = 0; + op.u.net_rule.dst_idx = idx; + op.u.net_rule.src_addr = 0; + op.u.net_rule.src_addr_mask = 0; + op.u.net_rule.dst_addr = ntohl(ifa->ifa_address); + op.u.net_rule.dst_addr_mask = ~0UL; + (void)HYPERVISOR_network_op(&op); + + out: + return NOTIFY_DONE; +} + +static struct notifier_block notifier_inetdev = { + .notifier_call = inetdev_notify, + .next = NULL, + .priority = 0 +}; + + +static int __init init_module(void) +{ + int i, fixmap_idx=-1, err; + struct net_device *dev; + struct net_private *np; + netop_t netop; + + INIT_LIST_HEAD(&dev_list); + + /* + * Domain 0 must poke its own network rules as it discovers its IP + * addresses. All other domains have a privileged "parent" to do this for + * them at start of day. + */ + if ( start_info.flags & SIF_INITDOMAIN ) + (void)register_inetaddr_notifier(¬ifier_inetdev); + + err = request_irq(HYPEREVENT_IRQ(_EVENT_NET), network_interrupt, + SA_SAMPLE_RANDOM, "network", NULL); + if ( err ) + { + printk(KERN_WARNING "Could not allocate network interrupt\n"); + goto fail; + } + + err = request_irq(HYPEREVENT_IRQ(_EVENT_DEBUG), dbg_network_int, + SA_SHIRQ, "net_dbg", &dbg_network_int); + if ( err ) + printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n"); + + for ( i = 0; i < MAX_DOMAIN_VIFS; i++ ) + { + /* If the VIF is invalid then the query hypercall will fail. */ + netop.cmd = NETOP_GET_VIF_INFO; + netop.vif = i; + if ( HYPERVISOR_net_io_op(&netop) != 0 ) + continue; + + /* We actually only support up to 4 vifs right now. */ + if ( ++fixmap_idx == 4 ) + break; + + dev = alloc_etherdev(sizeof(struct net_private)); + if ( dev == NULL ) + { + err = -ENOMEM; + goto fail; + } + + np = dev->priv; + np->state = STATE_CLOSED; + np->net_ring_fixmap_idx = fixmap_idx; + np->idx = i; + + SET_MODULE_OWNER(dev); + dev->open = network_open; + dev->hard_start_xmit = network_start_xmit; + dev->stop = network_close; + dev->get_stats = network_get_stats; + + memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN); + + if ( (err = register_netdev(dev)) != 0 ) + { + kfree(dev); + goto fail; + } + + np->dev = dev; + list_add(&np->list, &dev_list); + } + + return 0; + + fail: + cleanup_module(); + return err; +} + + +static void cleanup_module(void) +{ + struct net_private *np; + struct net_device *dev; + + while ( !list_empty(&dev_list) ) + { + np = list_entry(dev_list.next, struct net_private, list); + list_del(&np->list); + dev = np->dev; + unregister_netdev(dev); + kfree(dev); + } + + if ( start_info.flags & SIF_INITDOMAIN ) + (void)unregister_inetaddr_notifier(¬ifier_inetdev); +} + + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/vnetif/Makefile b/xenolinux-2.4.25-sparse/arch/xen/drivers/vnetif/Makefile new file mode 100644 index 0000000000..304c2e78ef --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/vnetif/Makefile @@ -0,0 +1,3 @@ +O_TARGET := drv.o +obj-y := vnetif.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xen/drivers/vnetif/vnetif.c b/xenolinux-2.4.25-sparse/arch/xen/drivers/vnetif/vnetif.c new file mode 100644 index 0000000000..91f3c5c17e --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/drivers/vnetif/vnetif.c @@ -0,0 +1,553 @@ +/****************************************************************************** + * vnetif.c + * + * Virtual network driver for XenoLinux. + * + * Copyright (c) 2002-2004, K A Fraser + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */ + +static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs); +static void network_tx_buf_gc(struct net_device *dev); +static void network_alloc_rx_buffers(struct net_device *dev); +static void cleanup_module(void); + +static struct list_head dev_list; + +struct net_private +{ + struct list_head list; + struct net_device *dev; + + struct net_device_stats stats; + NET_RING_IDX rx_resp_cons, tx_resp_cons; + unsigned int net_ring_fixmap_idx, tx_full; + net_ring_t *net_ring; + net_idx_t *net_idx; + spinlock_t tx_lock; + unsigned int idx; /* Domain-specific index of this VIF. */ + + unsigned int rx_bufs_to_notify; + +#define STATE_ACTIVE 0 +#define STATE_SUSPENDED 1 +#define STATE_CLOSED 2 + unsigned int state; + + /* + * {tx,rx}_skbs store outstanding skbuffs. The first entry in each + * array is an index into a chain of free entries. + */ + struct sk_buff *tx_skbs[XENNET_TX_RING_SIZE+1]; + struct sk_buff *rx_skbs[XENNET_RX_RING_SIZE+1]; +}; + +/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */ +#define ADD_ID_TO_FREELIST(_list, _id) \ + (_list)[(_id)] = (_list)[0]; \ + (_list)[0] = (void *)(unsigned long)(_id); +#define GET_ID_FROM_FREELIST(_list) \ + ({ unsigned long _id = (unsigned long)(_list)[0]; \ + (_list)[0] = (_list)[_id]; \ + (unsigned short)_id; }) + + +static void _dbg_network_int(struct net_device *dev) +{ + struct net_private *np = dev->priv; + + if ( np->state == STATE_CLOSED ) + return; + + printk(KERN_ALERT "net: tx_full=%d, tx_resp_cons=0x%08x," + " tx_req_prod=0x%08x\nnet: tx_resp_prod=0x%08x," + " tx_event=0x%08x, state=%d\n", + np->tx_full, np->tx_resp_cons, + np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, + np->net_idx->tx_event, + test_bit(__LINK_STATE_XOFF, &dev->state)); + printk(KERN_ALERT "net: rx_resp_cons=0x%08x," + " rx_req_prod=0x%08x\nnet: rx_resp_prod=0x%08x, rx_event=0x%08x\n", + np->rx_resp_cons, np->net_idx->rx_req_prod, + np->net_idx->rx_resp_prod, np->net_idx->rx_event); +} + + +static void dbg_network_int(int irq, void *unused, struct pt_regs *ptregs) +{ + struct list_head *ent; + struct net_private *np; + list_for_each ( ent, &dev_list ) + { + np = list_entry(ent, struct net_private, list); + _dbg_network_int(np->dev); + } +} + + +static int network_open(struct net_device *dev) +{ + struct net_private *np = dev->priv; + netop_t netop; + int i, ret; + + netop.cmd = NETOP_RESET_RINGS; + netop.vif = np->idx; + if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 ) + { + printk(KERN_ALERT "Possible net trouble: couldn't reset ring idxs\n"); + return ret; + } + + netop.cmd = NETOP_GET_VIF_INFO; + netop.vif = np->idx; + if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 ) + { + printk(KERN_ALERT "Couldn't get info for vif %d\n", np->idx); + return ret; + } + + memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN); + + set_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx, + netop.u.get_vif_info.ring_mfn << PAGE_SHIFT); + np->net_ring = (net_ring_t *)fix_to_virt( + FIX_NETRING0_BASE + np->net_ring_fixmap_idx); + np->net_idx = &HYPERVISOR_shared_info->net_idx[np->idx]; + + np->rx_bufs_to_notify = 0; + np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0; + memset(&np->stats, 0, sizeof(np->stats)); + spin_lock_init(&np->tx_lock); + memset(np->net_ring, 0, sizeof(*np->net_ring)); + memset(np->net_idx, 0, sizeof(*np->net_idx)); + + /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ + for ( i = 0; i <= XENNET_TX_RING_SIZE; i++ ) + np->tx_skbs[i] = (void *)(i+1); + for ( i = 0; i <= XENNET_RX_RING_SIZE; i++ ) + np->rx_skbs[i] = (void *)(i+1); + + wmb(); + np->state = STATE_ACTIVE; + + network_alloc_rx_buffers(dev); + + netif_start_queue(dev); + + MOD_INC_USE_COUNT; + + return 0; +} + + +static void network_tx_buf_gc(struct net_device *dev) +{ + NET_RING_IDX i, prod; + unsigned short id; + struct net_private *np = dev->priv; + struct sk_buff *skb; + tx_entry_t *tx_ring = np->net_ring->tx_ring; + + do { + prod = np->net_idx->tx_resp_prod; + + for ( i = np->tx_resp_cons; i != prod; i++ ) + { + id = tx_ring[MASK_NET_TX_IDX(i)].resp.id; + skb = np->tx_skbs[id]; + ADD_ID_TO_FREELIST(np->tx_skbs, id); + dev_kfree_skb_any(skb); + } + + np->tx_resp_cons = prod; + + /* + * Set a new event, then check for race with update of tx_cons. Note + * that it is essential to schedule a callback, no matter how few + * buffers are pending. Even if there is space in the transmit ring, + * higher layers may be blocked because too much data is outstanding: + * in such cases notification from Xen is likely to be the only kick + * that we'll get. + */ + np->net_idx->tx_event = + prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1; + mb(); + } + while ( prod != np->net_idx->tx_resp_prod ); + + if ( np->tx_full && + ((np->net_idx->tx_req_prod - prod) < XENNET_TX_RING_SIZE) ) + { + np->tx_full = 0; + if ( np->state == STATE_ACTIVE ) + netif_wake_queue(dev); + } +} + + +static inline pte_t *get_ppte(void *addr) +{ + pgd_t *pgd; pmd_t *pmd; pte_t *pte; + pgd = pgd_offset_k( (unsigned long)addr); + pmd = pmd_offset(pgd, (unsigned long)addr); + pte = pte_offset(pmd, (unsigned long)addr); + return pte; +} + + +static void network_alloc_rx_buffers(struct net_device *dev) +{ + unsigned short id; + struct net_private *np = dev->priv; + struct sk_buff *skb; + netop_t netop; + NET_RING_IDX i = np->net_idx->rx_req_prod; + + if ( unlikely((i - np->rx_resp_cons) == XENNET_RX_RING_SIZE) || + unlikely(np->state != STATE_ACTIVE) ) + return; + + do { + skb = dev_alloc_skb(RX_BUF_SIZE); + if ( unlikely(skb == NULL) ) + break; + + skb->dev = dev; + + if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) ) + panic("alloc_skb needs to provide us page-aligned buffers."); + + id = GET_ID_FROM_FREELIST(np->rx_skbs); + np->rx_skbs[id] = skb; + + np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id = id; + np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = + virt_to_machine(get_ppte(skb->head)); + + np->rx_bufs_to_notify++; + } + while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE ); + + /* + * We may have allocated buffers which have entries outstanding in the page + * update queue -- make sure we flush those first! + */ + flush_page_update_queue(); + + np->net_idx->rx_req_prod = i; + np->net_idx->rx_event = np->rx_resp_cons + 1; + + /* Batch Xen notifications. */ + if ( np->rx_bufs_to_notify > (XENNET_RX_RING_SIZE/4) ) + { + netop.cmd = NETOP_PUSH_BUFFERS; + netop.vif = np->idx; + (void)HYPERVISOR_net_io_op(&netop); + np->rx_bufs_to_notify = 0; + } +} + + +static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + unsigned short id; + struct net_private *np = (struct net_private *)dev->priv; + tx_req_entry_t *tx; + netop_t netop; + NET_RING_IDX i; + + if ( unlikely(np->tx_full) ) + { + printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name); + netif_stop_queue(dev); + return -ENOBUFS; + } + + if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= + PAGE_SIZE) ) + { + struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE); + if ( unlikely(new_skb == NULL) ) + return 1; + skb_put(new_skb, skb->len); + memcpy(new_skb->data, skb->data, skb->len); + dev_kfree_skb(skb); + skb = new_skb; + } + + spin_lock_irq(&np->tx_lock); + + i = np->net_idx->tx_req_prod; + + id = GET_ID_FROM_FREELIST(np->tx_skbs); + np->tx_skbs[id] = skb; + + tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req; + + tx->id = id; + tx->addr = phys_to_machine(virt_to_phys(skb->data)); + tx->size = skb->len; + + wmb(); + np->net_idx->tx_req_prod = i + 1; + + network_tx_buf_gc(dev); + + if ( (i - np->tx_resp_cons) == (XENNET_TX_RING_SIZE - 1) ) + { + np->tx_full = 1; + netif_stop_queue(dev); + } + + spin_unlock_irq(&np->tx_lock); + + np->stats.tx_bytes += skb->len; + np->stats.tx_packets++; + + /* Only notify Xen if there are no outstanding responses. */ + mb(); + if ( np->net_idx->tx_resp_prod == i ) + { + netop.cmd = NETOP_PUSH_BUFFERS; + netop.vif = np->idx; + (void)HYPERVISOR_net_io_op(&netop); + } + + return 0; +} + + +static inline void _network_interrupt(struct net_device *dev) +{ + struct net_private *np = dev->priv; + unsigned long flags; + struct sk_buff *skb; + rx_resp_entry_t *rx; + NET_RING_IDX i; + + if ( unlikely(np->state == STATE_CLOSED) ) + return; + + spin_lock_irqsave(&np->tx_lock, flags); + network_tx_buf_gc(dev); + spin_unlock_irqrestore(&np->tx_lock, flags); + + again: + for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ ) + { + rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp; + + skb = np->rx_skbs[rx->id]; + ADD_ID_TO_FREELIST(np->rx_skbs, rx->id); + + if ( unlikely(rx->status != RING_STATUS_OK) ) + { + /* Gate this error. We get a (valid) slew of them on suspend. */ + if ( np->state == STATE_ACTIVE ) + printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status); + dev_kfree_skb_any(skb); + continue; + } + + /* + * Set up shinfo -- from alloc_skb This was particularily nasty: the + * shared info is hidden at the back of the data area (presumably so it + * can be shared), but on page flip it gets very spunked. + */ + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; + + phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] = + (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT; + + skb->data = skb->tail = skb->head + rx->offset; + skb_put(skb, rx->size); + skb->protocol = eth_type_trans(skb, dev); + + np->stats.rx_packets++; + + np->stats.rx_bytes += rx->size; + netif_rx(skb); + dev->last_rx = jiffies; + } + + np->rx_resp_cons = i; + + network_alloc_rx_buffers(dev); + + /* Deal with hypervisor racing our resetting of rx_event. */ + mb(); + if ( np->net_idx->rx_resp_prod != i ) + goto again; +} + + +static void network_interrupt(int irq, void *unused, struct pt_regs *ptregs) +{ + struct list_head *ent; + struct net_private *np; + list_for_each ( ent, &dev_list ) + { + np = list_entry(ent, struct net_private, list); + _network_interrupt(np->dev); + } +} + + +static int network_close(struct net_device *dev) +{ + struct net_private *np = dev->priv; + netop_t netop; + + np->state = STATE_SUSPENDED; + wmb(); + + netif_stop_queue(np->dev); + + netop.cmd = NETOP_FLUSH_BUFFERS; + netop.vif = np->idx; + (void)HYPERVISOR_net_io_op(&netop); + + while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) || + (np->tx_resp_cons != np->net_idx->tx_req_prod) ) + { + barrier(); + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(1); + } + + wmb(); + np->state = STATE_CLOSED; + wmb(); + + /* Now no longer safe to take interrupts for this device. */ + clear_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx); + + MOD_DEC_USE_COUNT; + + return 0; +} + + +static struct net_device_stats *network_get_stats(struct net_device *dev) +{ + struct net_private *np = (struct net_private *)dev->priv; + return &np->stats; +} + + +static int __init init_module(void) +{ +#if 0 + int i, fixmap_idx=-1, err; + struct net_device *dev; + struct net_private *np; + netop_t netop; + + INIT_LIST_HEAD(&dev_list); + + err = request_irq(HYPEREVENT_IRQ(_EVENT_NET), network_interrupt, + SA_SAMPLE_RANDOM, "network", NULL); + if ( err ) + { + printk(KERN_WARNING "Could not allocate network interrupt\n"); + goto fail; + } + + err = request_irq(HYPEREVENT_IRQ(_EVENT_DEBUG), dbg_network_int, + SA_SHIRQ, "net_dbg", &dbg_network_int); + if ( err ) + printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n"); + + for ( i = 0; i < MAX_DOMAIN_VIFS; i++ ) + { + /* If the VIF is invalid then the query hypercall will fail. */ + netop.cmd = NETOP_GET_VIF_INFO; + netop.vif = i; + if ( HYPERVISOR_net_io_op(&netop) != 0 ) + continue; + + /* We actually only support up to 4 vifs right now. */ + if ( ++fixmap_idx == 4 ) + break; + + dev = alloc_etherdev(sizeof(struct net_private)); + if ( dev == NULL ) + { + err = -ENOMEM; + goto fail; + } + + np = dev->priv; + np->state = STATE_CLOSED; + np->net_ring_fixmap_idx = fixmap_idx; + np->idx = i; + + SET_MODULE_OWNER(dev); + dev->open = network_open; + dev->hard_start_xmit = network_start_xmit; + dev->stop = network_close; + dev->get_stats = network_get_stats; + + memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN); + + if ( (err = register_netdev(dev)) != 0 ) + { + kfree(dev); + goto fail; + } + + np->dev = dev; + list_add(&np->list, &dev_list); + } + + return 0; + + fail: + cleanup_module(); + return err; +#endif + return 0; +} + + +static void cleanup_module(void) +{ + struct net_private *np; + struct net_device *dev; + + while ( !list_empty(&dev_list) ) + { + np = list_entry(dev_list.next, struct net_private, list); + list_del(&np->list); + dev = np->dev; + unregister_netdev(dev); + kfree(dev); + } +} + + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/Makefile b/xenolinux-2.4.25-sparse/arch/xen/kernel/Makefile new file mode 100644 index 0000000000..10fc43e742 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/Makefile @@ -0,0 +1,19 @@ + +.S.o: + $(CC) $(AFLAGS) -traditional -c $< -o $*.o + +all: kernel.o head.o init_task.o + +O_TARGET := kernel.o + +export-objs := i386_ksyms.o + +obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ + ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o \ + i386_ksyms.o i387.o hypervisor.o physirq.o pci-dma.o + +ifdef CONFIG_PCI +obj-y += pci-i386.o pci-pc.o pci-irq.o +endif + +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/entry.S b/xenolinux-2.4.25-sparse/arch/xen/kernel/entry.S new file mode 100644 index 0000000000..9b1a77d4c1 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/entry.S @@ -0,0 +1,781 @@ +/* + * linux/arch/i386/entry.S + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * entry.S contains the system-call and fault low-level handling routines. + * This also contains the timer-interrupt handler, as well as all interrupts + * and faults that can result in a task-switch. + * + * NOTE: This code handles signal-recognition, which happens every time + * after a timer-interrupt and after each system call. + * + * I changed all the .align's to 4 (16 byte alignment), as that's faster + * on a 486. + * + * Stack layout in 'ret_from_system_call': + * ptrace needs to have all regs on the stack. + * if the order here is changed, it needs to be + * updated in fork.c:copy_process, signal.c:do_signal, + * ptrace.c and ptrace.h + * + * 0(%esp) - %ebx + * 4(%esp) - %ecx + * 8(%esp) - %edx + * C(%esp) - %esi + * 10(%esp) - %edi + * 14(%esp) - %ebp + * 18(%esp) - %eax + * 1C(%esp) - %ds + * 20(%esp) - %es + * 24(%esp) - orig_eax + * 28(%esp) - %eip + * 2C(%esp) - %cs + * 30(%esp) - %eflags + * 34(%esp) - %oldesp + * 38(%esp) - %oldss + * + * "current" is in register %ebx during any slow entries. + */ + +#include +#include +#include +#include +#include + +EBX = 0x00 +ECX = 0x04 +EDX = 0x08 +ESI = 0x0C +EDI = 0x10 +EBP = 0x14 +EAX = 0x18 +DS = 0x1C +ES = 0x20 +ORIG_EAX = 0x24 +EIP = 0x28 +CS = 0x2C +EFLAGS = 0x30 +OLDESP = 0x34 +OLDSS = 0x38 + +CF_MASK = 0x00000001 +TF_MASK = 0x00000100 +IF_MASK = 0x00000200 +DF_MASK = 0x00000400 +NT_MASK = 0x00004000 + +/* + * these are offsets into the task-struct. + */ +state = 0 +flags = 4 +sigpending = 8 +addr_limit = 12 +exec_domain = 16 +need_resched = 20 +tsk_ptrace = 24 +processor = 52 + +ENOSYS = 38 + + +#define SAVE_ALL \ + cld; \ + pushl %es; \ + pushl %ds; \ + pushl %eax; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + movl $(__KERNEL_DS),%edx; \ + movl %edx,%ds; \ + movl %edx,%es; + +#define RESTORE_ALL \ + popl %ebx; \ + popl %ecx; \ + popl %edx; \ + popl %esi; \ + popl %edi; \ + popl %ebp; \ + popl %eax; \ +1: popl %ds; \ +2: popl %es; \ + addl $4,%esp; \ +3: iret; \ +.section .fixup,"ax"; \ +4: movl $0,(%esp); \ + jmp 1b; \ +5: movl $0,(%esp); \ + jmp 2b; \ +6: pushl %ss; \ + popl %ds; \ + pushl %ss; \ + popl %es; \ + pushl $11; \ + call do_exit; \ +.previous; \ +.section __ex_table,"a";\ + .align 4; \ + .long 1b,4b; \ + .long 2b,5b; \ + .long 3b,6b; \ +.previous + +#define GET_CURRENT(reg) \ + movl $-8192, reg; \ + andl %esp, reg + +ENTRY(lcall7) + pushfl # We get a different stack layout with call + pushl %eax # gates, which has to be cleaned up later.. + SAVE_ALL + movl EIP(%esp),%eax # due to call gates, this is eflags, not eip.. + movl CS(%esp),%edx # this is eip.. + movl EFLAGS(%esp),%ecx # and this is cs.. + movl %eax,EFLAGS(%esp) # + andl $~(NT_MASK|TF_MASK|DF_MASK), %eax + pushl %eax + popfl + movl %edx,EIP(%esp) # Now we move them to their "normal" places + movl %ecx,CS(%esp) # + movl %esp,%ebx + pushl %ebx + andl $-8192,%ebx # GET_CURRENT + movl exec_domain(%ebx),%edx # Get the execution domain + movl 4(%edx),%edx # Get the lcall7 handler for the domain + pushl $0x7 + call *%edx + addl $4, %esp + popl %eax + jmp ret_from_sys_call + +ENTRY(lcall27) + pushfl # We get a different stack layout with call + pushl %eax # gates, which has to be cleaned up later.. + SAVE_ALL + movl EIP(%esp),%eax # due to call gates, this is eflags, not eip.. + movl CS(%esp),%edx # this is eip.. + movl EFLAGS(%esp),%ecx # and this is cs.. + movl %eax,EFLAGS(%esp) # + andl $~(NT_MASK|TF_MASK|DF_MASK), %eax + pushl %eax + popfl + movl %edx,EIP(%esp) # Now we move them to their "normal" places + movl %ecx,CS(%esp) # + movl %esp,%ebx + pushl %ebx + andl $-8192,%ebx # GET_CURRENT + movl exec_domain(%ebx),%edx # Get the execution domain + movl 4(%edx),%edx # Get the lcall7 handler for the domain + pushl $0x27 + call *%edx + addl $4, %esp + popl %eax + jmp ret_from_sys_call + +ENTRY(ret_from_fork) + pushl %ebx + call SYMBOL_NAME(schedule_tail) + addl $4, %esp + GET_CURRENT(%ebx) + testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS + jne tracesys_exit + jmp ret_from_sys_call + +/* + * Return to user mode is not as complex as all this looks, + * but we want the default path for a system call return to + * go as quickly as possible which is why some of this is + * less clear than it otherwise should be. + */ +ENTRY(system_call) + pushl %eax # save orig_eax + SAVE_ALL + GET_CURRENT(%ebx) + testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS + jne tracesys + cmpl $(NR_syscalls),%eax + jae badsys + call *SYMBOL_NAME(sys_call_table)(,%eax,4) + movl %eax,EAX(%esp) # save the return value +ENTRY(ret_from_sys_call) + movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi + btrl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # make tests atomic +ret_syscall_tests: + cmpl $0,need_resched(%ebx) + jne reschedule + cmpl $0,sigpending(%ebx) + je safesti # ensure need_resched updates are seen +signal_return: + btsl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # reenable event callbacks + movl %esp,%eax + xorl %edx,%edx + call SYMBOL_NAME(do_signal) + jmp ret_from_sys_call + + ALIGN +restore_all: + RESTORE_ALL + + ALIGN +tracesys: + movl $-ENOSYS,EAX(%esp) + call SYMBOL_NAME(syscall_trace) + movl ORIG_EAX(%esp),%eax + cmpl $(NR_syscalls),%eax + jae tracesys_exit + call *SYMBOL_NAME(sys_call_table)(,%eax,4) + movl %eax,EAX(%esp) # save the return value +tracesys_exit: + call SYMBOL_NAME(syscall_trace) + jmp ret_from_sys_call +badsys: + movl $-ENOSYS,EAX(%esp) + jmp ret_from_sys_call + + ALIGN +ENTRY(ret_from_intr) + GET_CURRENT(%ebx) +ret_from_exception: + movb CS(%esp),%al + testl $2,%eax + jne ret_from_sys_call + jmp restore_all + + ALIGN +reschedule: + btsl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # reenable event callbacks + call SYMBOL_NAME(schedule) # test + jmp ret_from_sys_call + +ENTRY(divide_error) + pushl $0 # no error code + pushl $ SYMBOL_NAME(do_divide_error) + ALIGN +error_code: + pushl %ds + pushl %eax + xorl %eax,%eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + GET_CURRENT(%ebx) + cld + movl %es,%ecx + movl ORIG_EAX(%esp), %esi # get the error code + movl ES(%esp), %edi # get the function address + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + movl %esp,%edx + pushl %esi # push the error code + pushl %edx # push the pt_regs pointer + movl $(__KERNEL_DS),%edx + movl %edx,%ds + movl %edx,%es + call *%edi + addl $8,%esp + jmp ret_from_exception + +# A note on the "critical region" in our callback handler. +# We want to avoid stacking callback handlers due to events occurring +# during handling of the last event. To do this, we keep events disabled +# until we've done all processing. HOWEVER, we must enable events before +# popping the stack frame (can't be done atomically) and so it would still +# be possible to get enough handler activations to overflow the stack. +# Although unlikely, bugs of that kind are hard to track down, so we'd +# like to avoid the possibility. +# So, on entry to the handler we detect whether we interrupted an +# existing activation in its critical region -- if so, we pop the current +# activation and restart the handler using the previous one. +ENTRY(hypervisor_callback) + pushl %eax + SAVE_ALL + GET_CURRENT(%ebx) + movl EIP(%esp),%eax + cmpl $scrit,%eax + jb 11f + cmpl $ecrit,%eax + jb critical_region_fixup +11: push %esp + call do_hypervisor_callback + add $4,%esp + movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi + movb CS(%esp),%cl + test $2,%cl # slow return to ring 2 or 3 + jne ret_syscall_tests +safesti:btsl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # reenable event callbacks +scrit: /**** START OF CRITICAL REGION ****/ + cmpl $0,(%esi) + jne 14f # process more events if necessary... + RESTORE_ALL +14: btrl $EVENTS_MASTER_ENABLE_BIT,4(%esi) + jmp 11b +ecrit: /**** END OF CRITICAL REGION ****/ +# [How we do the fixup]. We want to merge the current stack frame with the +# just-interrupted frame. How we do this depends on where in the critical +# region the interrupted handler was executing, and so how many saved +# registers are in each frame. We do this quickly using the lookup table +# 'critical_fixup_table'. For each byte offset in the critical region, it +# provides the number of bytes which have already been popped from the +# interrupted stack frame. +critical_region_fixup: + addl $critical_fixup_table-scrit,%eax + movzbl (%eax),%eax # %eax contains num bytes popped + mov %esp,%esi + add %eax,%esi # %esi points at end of src region + mov %esp,%edi + add $0x34,%edi # %edi points at end of dst region + mov %eax,%ecx + shr $2,%ecx # convert words to bytes + je 16f # skip loop if nothing to copy +15: subl $4,%esi # pre-decrementing copy loop + subl $4,%edi + movl (%esi),%eax + movl %eax,(%edi) + loop 15b +16: movl %edi,%esp # final %edi is top of merged stack + jmp 11b + +critical_fixup_table: + .byte 0x00,0x00,0x00 # cmpl $0,(%esi) + .byte 0x00,0x00 # jne 14f + .byte 0x00 # pop %ebx + .byte 0x04 # pop %ecx + .byte 0x08 # pop %edx + .byte 0x0c # pop %esi + .byte 0x10 # pop %edi + .byte 0x14 # pop %ebp + .byte 0x18 # pop %eax + .byte 0x1c # pop %ds + .byte 0x20 # pop %es + .byte 0x24,0x24,0x24 # add $4,%esp + .byte 0x28 # iret + .byte 0x00,0x00,0x00,0x00,0x00 # btrl $31,4(%esi) + .byte 0x00,0x00 # jmp 11b + +# Hypervisor uses this for application faults while it executes. +ENTRY(failsafe_callback) + call SYMBOL_NAME(install_safe_pf_handler) +1: pop %ds +2: pop %es +3: pop %fs +4: pop %gs + call SYMBOL_NAME(install_normal_pf_handler) +5: iret +.section .fixup,"ax"; \ +6: movl $0,(%esp); \ + jmp 1b; \ +7: movl $0,(%esp); \ + jmp 2b; \ +8: movl $0,(%esp); \ + jmp 3b; \ +9: movl $0,(%esp); \ + jmp 4b; \ +10: pushl %ss; \ + popl %ds; \ + pushl %ss; \ + popl %es; \ + pushl $11; \ + call do_exit; \ +.previous; \ +.section __ex_table,"a";\ + .align 4; \ + .long 1b,6b; \ + .long 2b,7b; \ + .long 3b,8b; \ + .long 4b,9b; \ + .long 5b,10b; \ +.previous + +ENTRY(coprocessor_error) + pushl $0 + pushl $ SYMBOL_NAME(do_coprocessor_error) + jmp error_code + +ENTRY(simd_coprocessor_error) + pushl $0 + pushl $ SYMBOL_NAME(do_simd_coprocessor_error) + jmp error_code + +ENTRY(device_not_available) + pushl $-1 # mark this as an int + SAVE_ALL + GET_CURRENT(%ebx) + call SYMBOL_NAME(math_state_restore) + jmp ret_from_exception + +ENTRY(debug) + pushl $0 + pushl $ SYMBOL_NAME(do_debug) + jmp error_code + +ENTRY(int3) + pushl $0 + pushl $ SYMBOL_NAME(do_int3) + jmp error_code + +ENTRY(overflow) + pushl $0 + pushl $ SYMBOL_NAME(do_overflow) + jmp error_code + +ENTRY(bounds) + pushl $0 + pushl $ SYMBOL_NAME(do_bounds) + jmp error_code + +ENTRY(invalid_op) + pushl $0 + pushl $ SYMBOL_NAME(do_invalid_op) + jmp error_code + +ENTRY(coprocessor_segment_overrun) + pushl $0 + pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun) + jmp error_code + +ENTRY(double_fault) + pushl $ SYMBOL_NAME(do_double_fault) + jmp error_code + +ENTRY(invalid_TSS) + pushl $ SYMBOL_NAME(do_invalid_TSS) + jmp error_code + +ENTRY(segment_not_present) + pushl $ SYMBOL_NAME(do_segment_not_present) + jmp error_code + +ENTRY(stack_segment) + pushl $ SYMBOL_NAME(do_stack_segment) + jmp error_code + +ENTRY(general_protection) + pushl $ SYMBOL_NAME(do_general_protection) + jmp error_code + +ENTRY(alignment_check) + pushl $ SYMBOL_NAME(do_alignment_check) + jmp error_code + +# This handler is special, because it gets an extra value on its stack, +# which is the linear faulting address. +#define PAGE_FAULT_STUB(_name1, _name2) \ +ENTRY(_name1) \ + pushl %ds ; \ + pushl %eax ; \ + xorl %eax,%eax ; \ + pushl %ebp ; \ + pushl %edi ; \ + pushl %esi ; \ + pushl %edx ; \ + decl %eax /* eax = -1 */ ; \ + pushl %ecx ; \ + pushl %ebx ; \ + GET_CURRENT(%ebx) ; \ + cld ; \ + movl %es,%ecx ; \ + movl ORIG_EAX(%esp), %esi /* get the error code */ ; \ + movl ES(%esp), %edi /* get the faulting address */ ; \ + movl %eax, ORIG_EAX(%esp) ; \ + movl %ecx, ES(%esp) ; \ + movl %esp,%edx ; \ + pushl %edi /* push the faulting address */ ; \ + pushl %esi /* push the error code */ ; \ + pushl %edx /* push the pt_regs pointer */ ; \ + movl $(__KERNEL_DS),%edx ; \ + movl %edx,%ds ; \ + movl %edx,%es ; \ + call SYMBOL_NAME(_name2) ; \ + addl $12,%esp ; \ + jmp ret_from_exception ; +PAGE_FAULT_STUB(page_fault, do_page_fault) +PAGE_FAULT_STUB(safe_page_fault, do_safe_page_fault) + +ENTRY(machine_check) + pushl $0 + pushl $ SYMBOL_NAME(do_machine_check) + jmp error_code + +ENTRY(spurious_interrupt_bug) + pushl $0 + pushl $ SYMBOL_NAME(do_spurious_interrupt_bug) + jmp error_code + +.data +ENTRY(sys_call_table) + .long SYMBOL_NAME(sys_ni_syscall) /* 0 - old "setup()" system call*/ + .long SYMBOL_NAME(sys_exit) + .long SYMBOL_NAME(sys_fork) + .long SYMBOL_NAME(sys_read) + .long SYMBOL_NAME(sys_write) + .long SYMBOL_NAME(sys_open) /* 5 */ + .long SYMBOL_NAME(sys_close) + .long SYMBOL_NAME(sys_waitpid) + .long SYMBOL_NAME(sys_creat) + .long SYMBOL_NAME(sys_link) + .long SYMBOL_NAME(sys_unlink) /* 10 */ + .long SYMBOL_NAME(sys_execve) + .long SYMBOL_NAME(sys_chdir) + .long SYMBOL_NAME(sys_time) + .long SYMBOL_NAME(sys_mknod) + .long SYMBOL_NAME(sys_chmod) /* 15 */ + .long SYMBOL_NAME(sys_lchown16) + .long SYMBOL_NAME(sys_ni_syscall) /* old break syscall holder */ + .long SYMBOL_NAME(sys_stat) + .long SYMBOL_NAME(sys_lseek) + .long SYMBOL_NAME(sys_getpid) /* 20 */ + .long SYMBOL_NAME(sys_mount) + .long SYMBOL_NAME(sys_oldumount) + .long SYMBOL_NAME(sys_setuid16) + .long SYMBOL_NAME(sys_getuid16) + .long SYMBOL_NAME(sys_stime) /* 25 */ + .long SYMBOL_NAME(sys_ptrace) + .long SYMBOL_NAME(sys_alarm) + .long SYMBOL_NAME(sys_fstat) + .long SYMBOL_NAME(sys_pause) + .long SYMBOL_NAME(sys_utime) /* 30 */ + .long SYMBOL_NAME(sys_ni_syscall) /* old stty syscall holder */ + .long SYMBOL_NAME(sys_ni_syscall) /* old gtty syscall holder */ + .long SYMBOL_NAME(sys_access) + .long SYMBOL_NAME(sys_nice) + .long SYMBOL_NAME(sys_ni_syscall) /* 35 */ /* old ftime syscall holder */ + .long SYMBOL_NAME(sys_sync) + .long SYMBOL_NAME(sys_kill) + .long SYMBOL_NAME(sys_rename) + .long SYMBOL_NAME(sys_mkdir) + .long SYMBOL_NAME(sys_rmdir) /* 40 */ + .long SYMBOL_NAME(sys_dup) + .long SYMBOL_NAME(sys_pipe) + .long SYMBOL_NAME(sys_times) + .long SYMBOL_NAME(sys_ni_syscall) /* old prof syscall holder */ + .long SYMBOL_NAME(sys_brk) /* 45 */ + .long SYMBOL_NAME(sys_setgid16) + .long SYMBOL_NAME(sys_getgid16) + .long SYMBOL_NAME(sys_signal) + .long SYMBOL_NAME(sys_geteuid16) + .long SYMBOL_NAME(sys_getegid16) /* 50 */ + .long SYMBOL_NAME(sys_acct) + .long SYMBOL_NAME(sys_umount) /* recycled never used phys() */ + .long SYMBOL_NAME(sys_ni_syscall) /* old lock syscall holder */ + .long SYMBOL_NAME(sys_ioctl) + .long SYMBOL_NAME(sys_fcntl) /* 55 */ + .long SYMBOL_NAME(sys_ni_syscall) /* old mpx syscall holder */ + .long SYMBOL_NAME(sys_setpgid) + .long SYMBOL_NAME(sys_ni_syscall) /* old ulimit syscall holder */ + .long SYMBOL_NAME(sys_olduname) + .long SYMBOL_NAME(sys_umask) /* 60 */ + .long SYMBOL_NAME(sys_chroot) + .long SYMBOL_NAME(sys_ustat) + .long SYMBOL_NAME(sys_dup2) + .long SYMBOL_NAME(sys_getppid) + .long SYMBOL_NAME(sys_getpgrp) /* 65 */ + .long SYMBOL_NAME(sys_setsid) + .long SYMBOL_NAME(sys_sigaction) + .long SYMBOL_NAME(sys_sgetmask) + .long SYMBOL_NAME(sys_ssetmask) + .long SYMBOL_NAME(sys_setreuid16) /* 70 */ + .long SYMBOL_NAME(sys_setregid16) + .long SYMBOL_NAME(sys_sigsuspend) + .long SYMBOL_NAME(sys_sigpending) + .long SYMBOL_NAME(sys_sethostname) + .long SYMBOL_NAME(sys_setrlimit) /* 75 */ + .long SYMBOL_NAME(sys_old_getrlimit) + .long SYMBOL_NAME(sys_getrusage) + .long SYMBOL_NAME(sys_gettimeofday) + .long SYMBOL_NAME(sys_settimeofday) + .long SYMBOL_NAME(sys_getgroups16) /* 80 */ + .long SYMBOL_NAME(sys_setgroups16) + .long SYMBOL_NAME(old_select) + .long SYMBOL_NAME(sys_symlink) + .long SYMBOL_NAME(sys_lstat) + .long SYMBOL_NAME(sys_readlink) /* 85 */ + .long SYMBOL_NAME(sys_uselib) + .long SYMBOL_NAME(sys_swapon) + .long SYMBOL_NAME(sys_reboot) + .long SYMBOL_NAME(old_readdir) + .long SYMBOL_NAME(old_mmap) /* 90 */ + .long SYMBOL_NAME(sys_munmap) + .long SYMBOL_NAME(sys_truncate) + .long SYMBOL_NAME(sys_ftruncate) + .long SYMBOL_NAME(sys_fchmod) + .long SYMBOL_NAME(sys_fchown16) /* 95 */ + .long SYMBOL_NAME(sys_getpriority) + .long SYMBOL_NAME(sys_setpriority) + .long SYMBOL_NAME(sys_ni_syscall) /* old profil syscall holder */ + .long SYMBOL_NAME(sys_statfs) + .long SYMBOL_NAME(sys_fstatfs) /* 100 */ + .long SYMBOL_NAME(sys_ioperm) + .long SYMBOL_NAME(sys_socketcall) + .long SYMBOL_NAME(sys_syslog) + .long SYMBOL_NAME(sys_setitimer) + .long SYMBOL_NAME(sys_getitimer) /* 105 */ + .long SYMBOL_NAME(sys_newstat) + .long SYMBOL_NAME(sys_newlstat) + .long SYMBOL_NAME(sys_newfstat) + .long SYMBOL_NAME(sys_uname) + .long SYMBOL_NAME(sys_iopl) /* 110 */ + .long SYMBOL_NAME(sys_vhangup) + .long SYMBOL_NAME(sys_ni_syscall) /* old "idle" system call */ + .long SYMBOL_NAME(sys_ni_syscall) /* was VM86 */ + .long SYMBOL_NAME(sys_wait4) + .long SYMBOL_NAME(sys_swapoff) /* 115 */ + .long SYMBOL_NAME(sys_sysinfo) + .long SYMBOL_NAME(sys_ipc) + .long SYMBOL_NAME(sys_fsync) + .long SYMBOL_NAME(sys_sigreturn) + .long SYMBOL_NAME(sys_clone) /* 120 */ + .long SYMBOL_NAME(sys_setdomainname) + .long SYMBOL_NAME(sys_newuname) + .long SYMBOL_NAME(sys_modify_ldt) + .long SYMBOL_NAME(sys_adjtimex) + .long SYMBOL_NAME(sys_mprotect) /* 125 */ + .long SYMBOL_NAME(sys_sigprocmask) + .long SYMBOL_NAME(sys_create_module) + .long SYMBOL_NAME(sys_init_module) + .long SYMBOL_NAME(sys_delete_module) + .long SYMBOL_NAME(sys_get_kernel_syms) /* 130 */ + .long SYMBOL_NAME(sys_quotactl) + .long SYMBOL_NAME(sys_getpgid) + .long SYMBOL_NAME(sys_fchdir) + .long SYMBOL_NAME(sys_bdflush) + .long SYMBOL_NAME(sys_sysfs) /* 135 */ + .long SYMBOL_NAME(sys_personality) + .long SYMBOL_NAME(sys_ni_syscall) /* for afs_syscall */ + .long SYMBOL_NAME(sys_setfsuid16) + .long SYMBOL_NAME(sys_setfsgid16) + .long SYMBOL_NAME(sys_llseek) /* 140 */ + .long SYMBOL_NAME(sys_getdents) + .long SYMBOL_NAME(sys_select) + .long SYMBOL_NAME(sys_flock) + .long SYMBOL_NAME(sys_msync) + .long SYMBOL_NAME(sys_readv) /* 145 */ + .long SYMBOL_NAME(sys_writev) + .long SYMBOL_NAME(sys_getsid) + .long SYMBOL_NAME(sys_fdatasync) + .long SYMBOL_NAME(sys_sysctl) + .long SYMBOL_NAME(sys_mlock) /* 150 */ + .long SYMBOL_NAME(sys_munlock) + .long SYMBOL_NAME(sys_mlockall) + .long SYMBOL_NAME(sys_munlockall) + .long SYMBOL_NAME(sys_sched_setparam) + .long SYMBOL_NAME(sys_sched_getparam) /* 155 */ + .long SYMBOL_NAME(sys_sched_setscheduler) + .long SYMBOL_NAME(sys_sched_getscheduler) + .long SYMBOL_NAME(sys_sched_yield) + .long SYMBOL_NAME(sys_sched_get_priority_max) + .long SYMBOL_NAME(sys_sched_get_priority_min) /* 160 */ + .long SYMBOL_NAME(sys_sched_rr_get_interval) + .long SYMBOL_NAME(sys_nanosleep) + .long SYMBOL_NAME(sys_mremap) + .long SYMBOL_NAME(sys_setresuid16) + .long SYMBOL_NAME(sys_getresuid16) /* 165 */ + .long SYMBOL_NAME(sys_ni_syscall) /* was VM86 */ + .long SYMBOL_NAME(sys_query_module) + .long SYMBOL_NAME(sys_poll) + .long SYMBOL_NAME(sys_nfsservctl) + .long SYMBOL_NAME(sys_setresgid16) /* 170 */ + .long SYMBOL_NAME(sys_getresgid16) + .long SYMBOL_NAME(sys_prctl) + .long SYMBOL_NAME(sys_rt_sigreturn) + .long SYMBOL_NAME(sys_rt_sigaction) + .long SYMBOL_NAME(sys_rt_sigprocmask) /* 175 */ + .long SYMBOL_NAME(sys_rt_sigpending) + .long SYMBOL_NAME(sys_rt_sigtimedwait) + .long SYMBOL_NAME(sys_rt_sigqueueinfo) + .long SYMBOL_NAME(sys_rt_sigsuspend) + .long SYMBOL_NAME(sys_pread) /* 180 */ + .long SYMBOL_NAME(sys_pwrite) + .long SYMBOL_NAME(sys_chown16) + .long SYMBOL_NAME(sys_getcwd) + .long SYMBOL_NAME(sys_capget) + .long SYMBOL_NAME(sys_capset) /* 185 */ + .long SYMBOL_NAME(sys_sigaltstack) + .long SYMBOL_NAME(sys_sendfile) + .long SYMBOL_NAME(sys_ni_syscall) /* streams1 */ + .long SYMBOL_NAME(sys_ni_syscall) /* streams2 */ + .long SYMBOL_NAME(sys_vfork) /* 190 */ + .long SYMBOL_NAME(sys_getrlimit) + .long SYMBOL_NAME(sys_mmap2) + .long SYMBOL_NAME(sys_truncate64) + .long SYMBOL_NAME(sys_ftruncate64) + .long SYMBOL_NAME(sys_stat64) /* 195 */ + .long SYMBOL_NAME(sys_lstat64) + .long SYMBOL_NAME(sys_fstat64) + .long SYMBOL_NAME(sys_lchown) + .long SYMBOL_NAME(sys_getuid) + .long SYMBOL_NAME(sys_getgid) /* 200 */ + .long SYMBOL_NAME(sys_geteuid) + .long SYMBOL_NAME(sys_getegid) + .long SYMBOL_NAME(sys_setreuid) + .long SYMBOL_NAME(sys_setregid) + .long SYMBOL_NAME(sys_getgroups) /* 205 */ + .long SYMBOL_NAME(sys_setgroups) + .long SYMBOL_NAME(sys_fchown) + .long SYMBOL_NAME(sys_setresuid) + .long SYMBOL_NAME(sys_getresuid) + .long SYMBOL_NAME(sys_setresgid) /* 210 */ + .long SYMBOL_NAME(sys_getresgid) + .long SYMBOL_NAME(sys_chown) + .long SYMBOL_NAME(sys_setuid) + .long SYMBOL_NAME(sys_setgid) + .long SYMBOL_NAME(sys_setfsuid) /* 215 */ + .long SYMBOL_NAME(sys_setfsgid) + .long SYMBOL_NAME(sys_pivot_root) + .long SYMBOL_NAME(sys_mincore) + .long SYMBOL_NAME(sys_madvise) + .long SYMBOL_NAME(sys_getdents64) /* 220 */ + .long SYMBOL_NAME(sys_fcntl64) + .long SYMBOL_NAME(sys_ni_syscall) /* reserved for TUX */ + .long SYMBOL_NAME(sys_ni_syscall) /* Reserved for Security */ + .long SYMBOL_NAME(sys_gettid) + .long SYMBOL_NAME(sys_readahead) /* 225 */ + .long SYMBOL_NAME(sys_setxattr) + .long SYMBOL_NAME(sys_lsetxattr) + .long SYMBOL_NAME(sys_fsetxattr) + .long SYMBOL_NAME(sys_getxattr) + .long SYMBOL_NAME(sys_lgetxattr) /* 230 */ + .long SYMBOL_NAME(sys_fgetxattr) + .long SYMBOL_NAME(sys_listxattr) + .long SYMBOL_NAME(sys_llistxattr) + .long SYMBOL_NAME(sys_flistxattr) + .long SYMBOL_NAME(sys_removexattr) /* 235 */ + .long SYMBOL_NAME(sys_lremovexattr) + .long SYMBOL_NAME(sys_fremovexattr) + .long SYMBOL_NAME(sys_tkill) + .long SYMBOL_NAME(sys_sendfile64) + .long SYMBOL_NAME(sys_ni_syscall) /* 240 reserved for futex */ + .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sched_setaffinity */ + .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sched_getaffinity */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_set_thread_area */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_get_thread_area */ + .long SYMBOL_NAME(sys_ni_syscall) /* 245 sys_io_setup */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_destroy */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_getevents */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_submit */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_cancel */ + .long SYMBOL_NAME(sys_ni_syscall) /* 250 sys_alloc_hugepages */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_free_hugepages */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_exit_group */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_lookup_dcookie */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_create */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_ctl 255 */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_wait */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_remap_file_pages */ + .long SYMBOL_NAME(sys_ni_syscall) /* sys_set_tid_address */ + + .rept NR_syscalls-(.-sys_call_table)/4 + .long SYMBOL_NAME(sys_ni_syscall) + .endr diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/head.S b/xenolinux-2.4.25-sparse/arch/xen/kernel/head.S new file mode 100644 index 0000000000..361815a58b --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/head.S @@ -0,0 +1,66 @@ + +.text +#include +#include +#include +#include +#include +#include +#include + +/* Offsets in start_info structure */ +#define MOD_START 16 +#define MOD_LEN 20 + +startup_32: + cld + + lss stack_start,%esp + + /* Copy initrd somewhere safe before it's clobbered by BSS. */ + mov MOD_LEN(%esi),%ecx + shr $2,%ecx + jz 2f /* bail from copy loop if no initrd */ + mov $SYMBOL_NAME(_end),%edi + add MOD_LEN(%esi),%edi + mov MOD_START(%esi),%eax + add MOD_LEN(%esi),%eax +1: sub $4,%eax + sub $4,%edi + mov (%eax),%ebx + mov %ebx,(%edi) + loop 1b + mov %edi,MOD_START(%esi) + + /* Clear BSS first so that there are no surprises... */ +2: xorl %eax,%eax + movl $SYMBOL_NAME(__bss_start),%edi + movl $SYMBOL_NAME(_end),%ecx + subl %edi,%ecx + rep stosb + + /* Copy the necessary stuff from start_info structure. */ + mov $SYMBOL_NAME(start_info_union),%edi + mov $128,%ecx + rep movsl + + jmp SYMBOL_NAME(start_kernel) + +ENTRY(stack_start) + .long SYMBOL_NAME(init_task_union)+8192, __KERNEL_DS + +.org 0x1000 +ENTRY(empty_zero_page) + +.org 0x2000 +ENTRY(default_ldt) + +.org 0x3000 +ENTRY(cpu0_pte_quicklist) + +.org 0x3400 +ENTRY(cpu0_pgd_quicklist) + +.org 0x3800 +ENTRY(stext) +ENTRY(_stext) diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/hypervisor.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/hypervisor.c new file mode 100644 index 0000000000..7c6aca05c5 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/hypervisor.c @@ -0,0 +1,170 @@ +/****************************************************************************** + * hypervisor.c + * + * Communication to/from hypervisor. + * + * Copyright (c) 2002, K A Fraser + */ + +#include +#include +#include +#include +#include +#include +#include + +multicall_entry_t multicall_list[8]; +int nr_multicall_ents = 0; + +static unsigned long event_mask = 0; + +asmlinkage unsigned int do_physirq(int irq, struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + unsigned long irqs; + shared_info_t *shared = HYPERVISOR_shared_info; + + /* do this manually */ + kstat.irqs[cpu][irq]++; + ack_hypervisor_event(irq); + + barrier(); + irqs = xchg(&shared->physirq_pend, 0); + + __asm__ __volatile__ ( + " push %1 ;" + " sub $4,%%esp ;" + " jmp 3f ;" + "1: btrl %%eax,%0 ;" /* clear bit */ + " mov %%eax,(%%esp) ;" + " call do_IRQ ;" /* do_IRQ(event) */ + "3: bsfl %0,%%eax ;" /* %eax == bit # */ + " jnz 1b ;" + " add $8,%%esp ;" + /* we use %ebx because it is callee-saved */ + : : "b" (irqs), "r" (regs) + /* clobbered by callback function calls */ + : "eax", "ecx", "edx", "memory" ); + + /* do this manually */ + end_hypervisor_event(irq); + + return 0; +} + +void do_hypervisor_callback(struct pt_regs *regs) +{ + unsigned long events, flags; + shared_info_t *shared = HYPERVISOR_shared_info; + + do { + /* Specialised local_irq_save(). */ + flags = test_and_clear_bit(EVENTS_MASTER_ENABLE_BIT, + &shared->events_mask); + barrier(); + + events = xchg(&shared->events, 0); + events &= event_mask; + + if ( (events & EVENT_PHYSIRQ) != 0 ) + { + do_physirq(_EVENT_PHYSIRQ, regs); + events &= ~EVENT_PHYSIRQ; + } + + __asm__ __volatile__ ( + " push %1 ;" + " sub $4,%%esp ;" + " jmp 2f ;" + "1: btrl %%eax,%0 ;" /* clear bit */ + " add %2,%%eax ;" + " mov %%eax,(%%esp) ;" + " call do_IRQ ;" /* do_IRQ(event) */ + "2: bsfl %0,%%eax ;" /* %eax == bit # */ + " jnz 1b ;" + " add $8,%%esp ;" + /* we use %ebx because it is callee-saved */ + : : "b" (events), "r" (regs), "i" (HYPEREVENT_IRQ_BASE) + /* clobbered by callback function calls */ + : "eax", "ecx", "edx", "memory" ); + + /* Specialised local_irq_restore(). */ + if ( flags ) set_bit(EVENTS_MASTER_ENABLE_BIT, &shared->events_mask); + barrier(); + } + while ( shared->events ); +} + +/* + * Define interface to generic handling in irq.c + */ + +static void shutdown_hypervisor_event(unsigned int irq) +{ + clear_bit(HYPEREVENT_FROM_IRQ(irq), &event_mask); + clear_bit(HYPEREVENT_FROM_IRQ(irq), &HYPERVISOR_shared_info->events_mask); +} + +static void enable_hypervisor_event(unsigned int irq) +{ + set_bit(HYPEREVENT_FROM_IRQ(irq), &event_mask); + set_bit(HYPEREVENT_FROM_IRQ(irq), &HYPERVISOR_shared_info->events_mask); + if ( test_bit(EVENTS_MASTER_ENABLE_BIT, + &HYPERVISOR_shared_info->events_mask) ) + do_hypervisor_callback(NULL); +} + +static void disable_hypervisor_event(unsigned int irq) +{ + clear_bit(HYPEREVENT_FROM_IRQ(irq), &event_mask); + clear_bit(HYPEREVENT_FROM_IRQ(irq), &HYPERVISOR_shared_info->events_mask); +} + +static void ack_hypervisor_event(unsigned int irq) +{ + int ev = HYPEREVENT_FROM_IRQ(irq); + if ( !(event_mask & (1<events_mask); +} + +static unsigned int startup_hypervisor_event(unsigned int irq) +{ + enable_hypervisor_event(irq); + return 0; +} + +static void end_hypervisor_event(unsigned int irq) +{ +} + +static struct hw_interrupt_type hypervisor_irq_type = { + "Hypervisor-event", + startup_hypervisor_event, + shutdown_hypervisor_event, + enable_hypervisor_event, + disable_hypervisor_event, + ack_hypervisor_event, + end_hypervisor_event, + NULL +}; + +void __init init_IRQ(void) +{ + int i; + + for ( i = 0; i < NR_HYPEREVENT_IRQS; i++ ) + { + irq_desc[i + HYPEREVENT_IRQ_BASE].status = IRQ_DISABLED; + irq_desc[i + HYPEREVENT_IRQ_BASE].action = 0; + irq_desc[i + HYPEREVENT_IRQ_BASE].depth = 1; + irq_desc[i + HYPEREVENT_IRQ_BASE].handler = &hypervisor_irq_type; + } + + /* Also initialise the physical IRQ handlers. */ + physirq_init(); +} diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/i386_ksyms.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/i386_ksyms.c new file mode 100644 index 0000000000..267516500f --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/i386_ksyms.c @@ -0,0 +1,175 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern void dump_thread(struct pt_regs *, struct user *); +extern spinlock_t rtc_lock; + +#if defined(CONFIG_APMXXX) || defined(CONFIG_APM_MODULEXXX) +extern void machine_real_restart(unsigned char *, int); +EXPORT_SYMBOL(machine_real_restart); +extern void default_idle(void); +EXPORT_SYMBOL(default_idle); +#endif + +#ifdef CONFIG_SMP +extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); +extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); +#endif + +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) +extern struct drive_info_struct drive_info; +EXPORT_SYMBOL(drive_info); +#endif + +// XXX extern unsigned long get_cmos_time(void); + +/* platform dependent support */ +EXPORT_SYMBOL(boot_cpu_data); +EXPORT_SYMBOL(dump_thread); +EXPORT_SYMBOL(dump_fpu); +EXPORT_SYMBOL(dump_extended_fpu); +EXPORT_SYMBOL(__ioremap); +EXPORT_SYMBOL(iounmap); +EXPORT_SYMBOL(enable_irq); +EXPORT_SYMBOL(disable_irq); +EXPORT_SYMBOL(disable_irq_nosync); +EXPORT_SYMBOL(probe_irq_mask); +EXPORT_SYMBOL(kernel_thread); +EXPORT_SYMBOL(pm_idle); +EXPORT_SYMBOL(pm_power_off); +EXPORT_SYMBOL(apm_info); +//EXPORT_SYMBOL(gdt); +EXPORT_SYMBOL(empty_zero_page); +EXPORT_SYMBOL(phys_to_machine_mapping); + + +#ifdef CONFIG_DEBUG_IOVIRT +EXPORT_SYMBOL(__io_virt_debug); +#endif + +EXPORT_SYMBOL_NOVERS(__down_failed); +EXPORT_SYMBOL_NOVERS(__down_failed_interruptible); +EXPORT_SYMBOL_NOVERS(__down_failed_trylock); +EXPORT_SYMBOL_NOVERS(__up_wakeup); +/* Networking helper routines. */ +EXPORT_SYMBOL(csum_partial_copy_generic); +/* Delay loops */ +EXPORT_SYMBOL(__ndelay); +EXPORT_SYMBOL(__udelay); +EXPORT_SYMBOL(__delay); +EXPORT_SYMBOL(__const_udelay); + +EXPORT_SYMBOL_NOVERS(__get_user_1); +EXPORT_SYMBOL_NOVERS(__get_user_2); +EXPORT_SYMBOL_NOVERS(__get_user_4); + +EXPORT_SYMBOL(strtok); +EXPORT_SYMBOL(strpbrk); +EXPORT_SYMBOL(strstr); + +EXPORT_SYMBOL(strncpy_from_user); +EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(clear_user); +EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(__generic_copy_from_user); +EXPORT_SYMBOL(__generic_copy_to_user); +EXPORT_SYMBOL(strnlen_user); + + +EXPORT_SYMBOL(pci_alloc_consistent); +EXPORT_SYMBOL(pci_free_consistent); + +#ifdef CONFIG_PCI +EXPORT_SYMBOL(pcibios_penalize_isa_irq); +EXPORT_SYMBOL(pci_mem_start); +#endif + + +#ifdef CONFIG_X86_USE_3DNOW +EXPORT_SYMBOL(_mmx_memcpy); +EXPORT_SYMBOL(mmx_clear_page); +EXPORT_SYMBOL(mmx_copy_page); +#endif + +#ifdef CONFIG_SMP +EXPORT_SYMBOL(cpu_data); +EXPORT_SYMBOL(kernel_flag_cacheline); +EXPORT_SYMBOL(smp_num_cpus); +EXPORT_SYMBOL(cpu_online_map); +EXPORT_SYMBOL_NOVERS(__write_lock_failed); +EXPORT_SYMBOL_NOVERS(__read_lock_failed); + +/* Global SMP irq stuff */ +EXPORT_SYMBOL(synchronize_irq); +EXPORT_SYMBOL(global_irq_holder); +EXPORT_SYMBOL(__global_cli); +EXPORT_SYMBOL(__global_sti); +EXPORT_SYMBOL(__global_save_flags); +EXPORT_SYMBOL(__global_restore_flags); +EXPORT_SYMBOL(smp_call_function); + +/* TLB flushing */ +EXPORT_SYMBOL(flush_tlb_page); +#endif + +#ifdef CONFIG_X86_IO_APIC +EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); +#endif + +#ifdef CONFIG_VT +EXPORT_SYMBOL(screen_info); +#endif + +EXPORT_SYMBOL(get_wchan); + +EXPORT_SYMBOL(rtc_lock); + +#undef memcpy +#undef memset +extern void * memset(void *,int,__kernel_size_t); +extern void * memcpy(void *,const void *,__kernel_size_t); +EXPORT_SYMBOL_NOVERS(memcpy); +EXPORT_SYMBOL_NOVERS(memset); + +#ifdef CONFIG_HAVE_DEC_LOCK +EXPORT_SYMBOL(atomic_dec_and_lock); +#endif + +#ifdef CONFIG_MULTIQUAD +EXPORT_SYMBOL(xquad_portio); +#endif + +#include +EXPORT_SYMBOL(create_xen_proc_entry); +EXPORT_SYMBOL(remove_xen_proc_entry); + +EXPORT_SYMBOL(do_hypervisor_callback); +EXPORT_SYMBOL(HYPERVISOR_shared_info); diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/ioport.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/ioport.c new file mode 100644 index 0000000000..fc0164045f --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/ioport.c @@ -0,0 +1,48 @@ +#include +#include +#include +#include +#include +#include + + +asmlinkage int sys_iopl(unsigned int new_io_pl) +{ + unsigned int old_io_pl = current->thread.io_pl; + dom0_op_t op; + + if ( !(start_info.flags & SIF_PRIVILEGED) ) + return -EPERM; + + if ( new_io_pl > 3 ) + return -EINVAL; + + /* Need "raw I/O" privileges for direct port access. */ + if ( (new_io_pl > old_io_pl) && !capable(CAP_SYS_RAWIO) ) + return -EPERM; + + /* Maintain OS privileges even if user attempts to relinquish them. */ + if ( (new_io_pl == 0) && (start_info.flags & SIF_PRIVILEGED) ) + new_io_pl = 1; + + /* Change our version of the privilege levels. */ + current->thread.io_pl = new_io_pl; + + /* Force the change at ring 0. */ + op.cmd = DOM0_IOPL; + op.u.iopl.domain = DOMID_SELF; + op.u.iopl.iopl = new_io_pl; + HYPERVISOR_dom0_op(&op); + + return 0; +} + + +asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on) +{ + printk(KERN_INFO "ioperm not fully supported - %s\n", + turn_on ? "set iopl to 3" : "ignore resource release"); + return turn_on ? sys_iopl(3) : 0; +} + + diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/irq.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/irq.c new file mode 100644 index 0000000000..c88e976125 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/irq.c @@ -0,0 +1,1137 @@ +/* + * linux/arch/i386/kernel/irq.c + * + * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + */ + +/* + * (mostly architecture independent, will move to kernel/irq.c in 2.5.) + * + * IRQs are in fact implemented a bit like signal handlers for the kernel. + * Naturally it's not a 1:1 relation, but there are similarities. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +/* + * Linux has a controller-independent x86 interrupt architecture. + * every controller has a 'controller-template', that is used + * by the main code to do the right thing. Each driver-visible + * interrupt source is transparently wired to the apropriate + * controller. Thus drivers need not be aware of the + * interrupt-controller. + * + * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC, + * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC. + * (IO-APICs assumed to be messaging to Pentium local-APICs) + * + * the code is designed to be easily extended with new/different + * interrupt controllers, without having to do assembly magic. + */ + +/* + * Controller mappings for all interrupt sources: + */ +irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = + { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}}; + +static void register_irq_proc (unsigned int irq); + +/* + * Special irq handlers. + */ + +void no_action(int cpl, void *dev_id, struct pt_regs *regs) { } + +/* + * Generic no controller code + */ + +static void enable_none(unsigned int irq) { } +static unsigned int startup_none(unsigned int irq) { return 0; } +static void disable_none(unsigned int irq) { } +static void ack_none(unsigned int irq) +{ + printk("unexpected IRQ trap at vector %02x\n", irq); +} + +/* startup is the same as "enable", shutdown is same as "disable" */ +#define shutdown_none disable_none +#define end_none enable_none + +struct hw_interrupt_type no_irq_type = { + "none", + startup_none, + shutdown_none, + enable_none, + disable_none, + ack_none, + end_none +}; + +atomic_t irq_err_count; +#ifdef CONFIG_X86_IO_APIC +#ifdef APIC_MISMATCH_DEBUG +atomic_t irq_mis_count; +#endif +#endif + +/* + * Generic, controller-independent functions: + */ + +int show_interrupts(struct seq_file *p, void *v) +{ + int i, j; + struct irqaction * action; + + seq_printf(p, " "); + for (j=0; jtypename); + seq_printf(p, " %s", action->name); + + for (action=action->next; action; action = action->next) + seq_printf(p, ", %s", action->name); + seq_putc(p,'\n'); + } + seq_printf(p, "NMI: "); + for (j = 0; j < smp_num_cpus; j++) + seq_printf(p, "%10u ", + nmi_count(cpu_logical_map(j))); + seq_printf(p, "\n"); +#if CONFIG_X86_LOCAL_APIC + seq_printf(p, "LOC: "); + for (j = 0; j < smp_num_cpus; j++) + seq_printf(p, "%10u ", + apic_timer_irqs[cpu_logical_map(j)]); + seq_printf(p, "\n"); +#endif + seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); +#ifdef CONFIG_X86_IO_APIC +#ifdef APIC_MISMATCH_DEBUG + seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); +#endif +#endif + + return 0; +} + + +/* + * Global interrupt locks for SMP. Allow interrupts to come in on any + * CPU, yet make cli/sti act globally to protect critical regions.. + */ + +#ifdef CONFIG_SMP +unsigned char global_irq_holder = NO_PROC_ID; +unsigned volatile long global_irq_lock; /* pendantic: long for set_bit --RR */ + +extern void show_stack(unsigned long* esp); + +static void show(char * str) +{ + int i; + int cpu = smp_processor_id(); + + printk("\n%s, CPU %d:\n", str, cpu); + printk("irq: %d [",irqs_running()); + for(i=0;i < smp_num_cpus;i++) + printk(" %d",local_irq_count(i)); + printk(" ]\nbh: %d [",spin_is_locked(&global_bh_lock) ? 1 : 0); + for(i=0;i < smp_num_cpus;i++) + printk(" %d",local_bh_count(i)); + + printk(" ]\nStack dumps:"); + for(i = 0; i < smp_num_cpus; i++) { + unsigned long esp; + if (i == cpu) + continue; + printk("\nCPU %d:",i); + esp = init_tss[i].esp0; + if (!esp) { + /* tss->esp0 is set to NULL in cpu_init(), + * it's initialized when the cpu returns to user + * space. -- manfreds + */ + printk(" "); + continue; + } + esp &= ~(THREAD_SIZE-1); + esp += sizeof(struct task_struct); + show_stack((void*)esp); + } + printk("\nCPU %d:",cpu); + show_stack(NULL); + printk("\n"); +} + +#define MAXCOUNT 100000000 + +/* + * I had a lockup scenario where a tight loop doing + * spin_unlock()/spin_lock() on CPU#1 was racing with + * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but + * apparently the spin_unlock() information did not make it + * through to CPU#0 ... nasty, is this by design, do we have to limit + * 'memory update oscillation frequency' artificially like here? + * + * Such 'high frequency update' races can be avoided by careful design, but + * some of our major constructs like spinlocks use similar techniques, + * it would be nice to clarify this issue. Set this define to 0 if you + * want to check whether your system freezes. I suspect the delay done + * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but + * i thought that such things are guaranteed by design, since we use + * the 'LOCK' prefix. + */ +#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 0 + +#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND +# define SYNC_OTHER_CORES(x) udelay(x+1) +#else +/* + * We have to allow irqs to arrive between __sti and __cli + */ +# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop") +#endif + +static inline void wait_on_irq(int cpu) +{ + int count = MAXCOUNT; + + for (;;) { + + /* + * Wait until all interrupts are gone. Wait + * for bottom half handlers unless we're + * already executing in one.. + */ + if (!irqs_running()) + if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock)) + break; + + /* Duh, we have to loop. Release the lock to avoid deadlocks */ + clear_bit(0,&global_irq_lock); + + for (;;) { + if (!--count) { + show("wait_on_irq"); + count = ~0; + } + __sti(); + SYNC_OTHER_CORES(cpu); + __cli(); + if (irqs_running()) + continue; + if (global_irq_lock) + continue; + if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock)) + continue; + if (!test_and_set_bit(0,&global_irq_lock)) + break; + } + } +} + +/* + * This is called when we want to synchronize with + * interrupts. We may for example tell a device to + * stop sending interrupts: but to make sure there + * are no interrupts that are executing on another + * CPU we need to call this function. + */ +void synchronize_irq(void) +{ + if (irqs_running()) { + /* Stupid approach */ + cli(); + sti(); + } +} + +static inline void get_irqlock(int cpu) +{ + if (test_and_set_bit(0,&global_irq_lock)) { + /* do we already hold the lock? */ + if ((unsigned char) cpu == global_irq_holder) + return; + /* Uhhuh.. Somebody else got it. Wait.. */ + do { + do { + rep_nop(); + } while (test_bit(0,&global_irq_lock)); + } while (test_and_set_bit(0,&global_irq_lock)); + } + /* + * We also to make sure that nobody else is running + * in an interrupt context. + */ + wait_on_irq(cpu); + + /* + * Ok, finally.. + */ + global_irq_holder = cpu; +} + +void __global_cli(void) +{ + panic("__global_cli"); +} + +void __global_sti(void) +{ + panic("__global_sti"); +} + +/* + * SMP flags value to restore to: + * 0 - global cli + * 1 - global sti + * 2 - local cli + * 3 - local sti + */ +unsigned long __global_save_flags(void) +{ + panic("__global_save_flags"); +} + +void __global_restore_flags(unsigned long flags) +{ + panic("__global_restore_flags"); +} + +#endif + +/* + * This should really return information about whether + * we should do bottom half handling etc. Right now we + * end up _always_ checking the bottom half, which is a + * waste of time and is not what some drivers would + * prefer. + */ +int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action) +{ + int status; + int cpu = smp_processor_id(); + + irq_enter(cpu, irq); + + status = 1; /* Force the "do bottom halves" bit */ + + if (!(action->flags & SA_INTERRUPT)) + __sti(); + + do { + status |= action->flags; + action->handler(irq, action->dev_id, regs); + action = action->next; + } while (action); + if (status & SA_SAMPLE_RANDOM) + add_interrupt_randomness(irq); + __cli(); + + irq_exit(cpu, irq); + + return status; +} + +/* + * Generic enable/disable code: this just calls + * down into the PIC-specific version for the actual + * hardware disable after having gotten the irq + * controller lock. + */ + +/** + * disable_irq_nosync - disable an irq without waiting + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Disables and Enables are + * nested. + * Unlike disable_irq(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. + * + * This function may be called from IRQ context. + */ + +inline void disable_irq_nosync(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->depth++) { + desc->status |= IRQ_DISABLED; + desc->handler->disable(irq); + } + spin_unlock_irqrestore(&desc->lock, flags); +} + +/** + * disable_irq - disable an irq and wait for completion + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Enables and Disables are + * nested. + * This function waits for any pending IRQ handlers for this interrupt + * to complete before returning. If you use this function while + * holding a resource the IRQ handler may need you will deadlock. + * + * This function may be called - with care - from IRQ context. + */ + +void disable_irq(unsigned int irq) +{ + disable_irq_nosync(irq); + + if (!local_irq_count(smp_processor_id())) { + do { + barrier(); + cpu_relax(); + } while (irq_desc[irq].status & IRQ_INPROGRESS); + } +} + +/** + * enable_irq - enable handling of an irq + * @irq: Interrupt to enable + * + * Undoes the effect of one call to disable_irq(). If this + * matches the last disable, processing of interrupts on this + * IRQ line is re-enabled. + * + * This function may be called from IRQ context. + */ + +void enable_irq(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + switch (desc->depth) { + case 1: { + unsigned int status = desc->status & ~IRQ_DISABLED; + desc->status = status; + if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { + desc->status = status | IRQ_REPLAY; + hw_resend_irq(desc->handler,irq); + } + desc->handler->enable(irq); + /* fall-through */ + } + default: + desc->depth--; + break; + case 0: + printk("enable_irq(%u) unbalanced from %p\n", irq, + __builtin_return_address(0)); + } + spin_unlock_irqrestore(&desc->lock, flags); +} + +/* + * do_IRQ handles all normal device IRQ's (the special + * SMP cross-CPU interrupts have their own specific + * handlers). + */ +asmlinkage unsigned int do_IRQ(int irq, struct pt_regs *regs) +{ + /* + * We ack quickly, we don't want the irq controller + * thinking we're snobs just because some other CPU has + * disabled global interrupts (we have already done the + * INT_ACK cycles, it's too late to try to pretend to the + * controller that we aren't taking the interrupt). + * + * 0 return value means that this irq is already being + * handled by some other CPU. (or is disabled) + */ + int cpu = smp_processor_id(); + irq_desc_t *desc = irq_desc + irq; + struct irqaction * action; + unsigned int status; +#ifdef CONFIG_DEBUG_STACKOVERFLOW + long esp; + + /* Debugging check for stack overflow: is there less than 1KB free? */ + __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191)); + if (unlikely(esp < (sizeof(struct task_struct) + 1024))) { + extern void show_stack(unsigned long *); + + printk("do_IRQ: stack overflow: %ld\n", + esp - sizeof(struct task_struct)); + __asm__ __volatile__("movl %%esp,%0" : "=r" (esp)); + show_stack((void *)esp); + } +#endif + + kstat.irqs[cpu][irq]++; + spin_lock(&desc->lock); + desc->handler->ack(irq); + /* + REPLAY is when Linux resends an IRQ that was dropped earlier + WAITING is used by probe to mark irqs that are being tested + */ + status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); + status |= IRQ_PENDING; /* we _want_ to handle it */ + + /* + * If the IRQ is disabled for whatever reason, we cannot + * use the action we have. + */ + action = NULL; + if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) { + action = desc->action; + status &= ~IRQ_PENDING; /* we commit to handling */ + status |= IRQ_INPROGRESS; /* we are handling it */ + } + desc->status = status; + + /* + * If there is no IRQ handler or it was disabled, exit early. + Since we set PENDING, if another processor is handling + a different instance of this same irq, the other processor + will take care of it. + */ + if (!action) + goto out; + + /* + * Edge triggered interrupts need to remember + * pending events. + * This applies to any hw interrupts that allow a second + * instance of the same irq to arrive while we are in do_IRQ + * or in the handler. But the code here only handles the _second_ + * instance of the irq, not the third or fourth. So it is mostly + * useful for irq hardware that does not mask cleanly in an + * SMP environment. + */ + for (;;) { + spin_unlock(&desc->lock); + handle_IRQ_event(irq, regs, action); + spin_lock(&desc->lock); + + if (!(desc->status & IRQ_PENDING)) + break; + desc->status &= ~IRQ_PENDING; + } + desc->status &= ~IRQ_INPROGRESS; +out: + /* + * The ->end() handler has to deal with interrupts which got + * disabled while the handler was running. + */ + desc->handler->end(irq); + spin_unlock(&desc->lock); + + if (softirq_pending(cpu)) + do_softirq(); + return 1; +} + +/** + * request_irq - allocate an interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the + * interrupt line and IRQ handling. From the point this + * call is made your handler function may be invoked. Since + * your handler function must clear any interrupt the board + * raises, you must take care both to initialise your hardware + * and to set up the interrupt handler in the right order. + * + * Dev_id must be globally unique. Normally the address of the + * device data structure is used as the cookie. Since the handler + * receives this value it makes sense to use it. + * + * If your interrupt is shared you must pass a non NULL dev_id + * as this is required when freeing the interrupt. + * + * Flags: + * + * SA_SHIRQ Interrupt is shared + * + * SA_INTERRUPT Disable local interrupts while processing + * + * SA_SAMPLE_RANDOM The interrupt can be used for entropy + * + */ + +int request_irq(unsigned int irq, + void (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char * devname, + void *dev_id) +{ + int retval; + struct irqaction * action; + +#if 1 + /* + * Sanity-check: shared interrupts should REALLY pass in + * a real dev-ID, otherwise we'll have trouble later trying + * to figure out which interrupt is which (messes up the + * interrupt freeing logic etc). + */ + if (irqflags & SA_SHIRQ) { + if (!dev_id) + printk("Bad boy: %s (at 0x%x) called us without a dev_id!\n", devname, (&irq)[-1]); + } +#endif + + if (irq >= NR_IRQS) + return -EINVAL; + if (!handler) + return -EINVAL; + + action = (struct irqaction *) + kmalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = irqflags; + action->mask = 0; + action->name = devname; + action->next = NULL; + action->dev_id = dev_id; + + retval = setup_irq(irq, action); + if (retval) + kfree(action); + return retval; +} + +/** + * free_irq - free an interrupt + * @irq: Interrupt line to free + * @dev_id: Device identity to free + * + * Remove an interrupt handler. The handler is removed and if the + * interrupt line is no longer in use by any driver it is disabled. + * On a shared IRQ the caller must ensure the interrupt is disabled + * on the card it drives before calling this function. The function + * does not return until any executing interrupts for this IRQ + * have completed. + * + * This function may be called from interrupt context. + * + * Bugs: Attempting to free an irq in a handler for the same irq hangs + * the machine. + */ + +void free_irq(unsigned int irq, void *dev_id) +{ + irq_desc_t *desc; + struct irqaction **p; + unsigned long flags; + + if (irq >= NR_IRQS) + return; + + desc = irq_desc + irq; + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; + for (;;) { + struct irqaction * action = *p; + if (action) { + struct irqaction **pp = p; + p = &action->next; + if (action->dev_id != dev_id) + continue; + + /* Found it - now remove it from the list of entries */ + *pp = action->next; + if (!desc->action) { + desc->status |= IRQ_DISABLED; + desc->handler->shutdown(irq); + } + spin_unlock_irqrestore(&desc->lock,flags); + +#ifdef CONFIG_SMP + /* Wait to make sure it's not being used on another CPU */ + while (desc->status & IRQ_INPROGRESS) { + barrier(); + cpu_relax(); + } +#endif + kfree(action); + return; + } + printk("Trying to free free IRQ%d\n",irq); + spin_unlock_irqrestore(&desc->lock,flags); + return; + } +} + +/* + * IRQ autodetection code.. + * + * This depends on the fact that any interrupt that + * comes in on to an unassigned handler will get stuck + * with "IRQ_WAITING" cleared and the interrupt + * disabled. + */ + +static DECLARE_MUTEX(probe_sem); + +/** + * probe_irq_on - begin an interrupt autodetect + * + * Commence probing for an interrupt. The interrupts are scanned + * and a mask of potential interrupt lines is returned. + * + */ + +unsigned long probe_irq_on(void) +{ + unsigned int i; + irq_desc_t *desc; + unsigned long val; + unsigned long delay; + + down(&probe_sem); + /* + * something may have generated an irq long ago and we want to + * flush such a longstanding irq before considering it as spurious. + */ + for (i = NR_IRQS-1; i > 0; i--) { + desc = irq_desc + i; + + spin_lock_irq(&desc->lock); + if (!irq_desc[i].action) + irq_desc[i].handler->startup(i); + spin_unlock_irq(&desc->lock); + } + + /* Wait for longstanding interrupts to trigger. */ + for (delay = jiffies + HZ/50; time_after(delay, jiffies); ) + /* about 20ms delay */ synchronize_irq(); + + /* + * enable any unassigned irqs + * (we must startup again here because if a longstanding irq + * happened in the previous stage, it may have masked itself) + */ + for (i = NR_IRQS-1; i > 0; i--) { + desc = irq_desc + i; + + spin_lock_irq(&desc->lock); + if (!desc->action) { + desc->status |= IRQ_AUTODETECT | IRQ_WAITING; + if (desc->handler->startup(i)) + desc->status |= IRQ_PENDING; + } + spin_unlock_irq(&desc->lock); + } + + /* + * Wait for spurious interrupts to trigger + */ + for (delay = jiffies + HZ/10; time_after(delay, jiffies); ) + /* about 100ms delay */ synchronize_irq(); + + /* + * Now filter out any obviously spurious interrupts + */ + val = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + /* It triggered already - consider it spurious. */ + if (!(status & IRQ_WAITING)) { + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } else + if (i < 32) + val |= 1 << i; + } + spin_unlock_irq(&desc->lock); + } + + return val; +} + +/* + * Return a mask of triggered interrupts (this + * can handle only legacy ISA interrupts). + */ + +/** + * probe_irq_mask - scan a bitmap of interrupt lines + * @val: mask of interrupts to consider + * + * Scan the ISA bus interrupt lines and return a bitmap of + * active interrupts. The interrupt probe logic state is then + * returned to its previous value. + * + * Note: we need to scan all the irq's even though we will + * only return ISA irq numbers - just so that we reset them + * all to a known state. + */ +unsigned int probe_irq_mask(unsigned long val) +{ + int i; + unsigned int mask; + + mask = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + if (i < 16 && !(status & IRQ_WAITING)) + mask |= 1 << i; + + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } + spin_unlock_irq(&desc->lock); + } + up(&probe_sem); + + return mask & val; +} + +/* + * Return the one interrupt that triggered (this can + * handle any interrupt source). + */ + +/** + * probe_irq_off - end an interrupt autodetect + * @val: mask of potential interrupts (unused) + * + * Scans the unused interrupt lines and returns the line which + * appears to have triggered the interrupt. If no interrupt was + * found then zero is returned. If more than one interrupt is + * found then minus the first candidate is returned to indicate + * their is doubt. + * + * The interrupt probe logic state is returned to its previous + * value. + * + * BUGS: When used in a module (which arguably shouldnt happen) + * nothing prevents two IRQ probe callers from overlapping. The + * results of this are non-optimal. + */ + +int probe_irq_off(unsigned long val) +{ + int i, irq_found, nr_irqs; + + nr_irqs = 0; + irq_found = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + if (!(status & IRQ_WAITING)) { + if (!nr_irqs) + irq_found = i; + nr_irqs++; + } + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } + spin_unlock_irq(&desc->lock); + } + up(&probe_sem); + + if (nr_irqs > 1) + irq_found = -irq_found; + return irq_found; +} + +/* this was setup_x86_irq but it seems pretty generic */ +int setup_irq(unsigned int irq, struct irqaction * new) +{ + int shared = 0; + unsigned long flags; + struct irqaction *old, **p; + irq_desc_t *desc = irq_desc + irq; + + /* + * Some drivers like serial.c use request_irq() heavily, + * so we have to be careful not to interfere with a + * running system. + */ + if (new->flags & SA_SAMPLE_RANDOM) { + /* + * This function might sleep, we want to call it first, + * outside of the atomic block. + * Yes, this might clear the entropy pool if the wrong + * driver is attempted to be loaded, without actually + * installing a new handler, but is this really a problem, + * only the sysadmin is able to do this. + */ + rand_initialize_irq(irq); + } + + /* + * The following block of code has to be executed atomically + */ + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; + if ((old = *p) != NULL) { + /* Can't share interrupts unless both agree to */ + if (!(old->flags & new->flags & SA_SHIRQ)) { + spin_unlock_irqrestore(&desc->lock,flags); + return -EBUSY; + } + + /* add new interrupt at end of irq queue */ + do { + p = &old->next; + old = *p; + } while (old); + shared = 1; + } + + *p = new; + + if (!shared) { + desc->depth = 0; + desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | IRQ_INPROGRESS); + desc->handler->startup(irq); + } + spin_unlock_irqrestore(&desc->lock,flags); + + register_irq_proc(irq); + return 0; +} + +static struct proc_dir_entry * root_irq_dir; +static struct proc_dir_entry * irq_dir [NR_IRQS]; + +#define HEX_DIGITS 8 + +static unsigned int parse_hex_value (const char *buffer, + unsigned long count, unsigned long *ret) +{ + unsigned char hexnum [HEX_DIGITS]; + unsigned long value; + int i; + + if (!count) + return -EINVAL; + if (count > HEX_DIGITS) + count = HEX_DIGITS; + if (copy_from_user(hexnum, buffer, count)) + return -EFAULT; + + /* + * Parse the first 8 characters as a hex string, any non-hex char + * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same. + */ + value = 0; + + for (i = 0; i < count; i++) { + unsigned int c = hexnum[i]; + + switch (c) { + case '0' ... '9': c -= '0'; break; + case 'a' ... 'f': c -= 'a'-10; break; + case 'A' ... 'F': c -= 'A'-10; break; + default: + goto out; + } + value = (value << 4) | c; + } +out: + *ret = value; + return 0; +} + +#if CONFIG_SMP + +static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; + +static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; +static int irq_affinity_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + if (count < HEX_DIGITS+1) + return -EINVAL; + return sprintf (page, "%08lx\n", irq_affinity[(long)data]); +} + +static int irq_affinity_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + int irq = (long) data, full_count = count, err; + unsigned long new_value; + + if (!irq_desc[irq].handler->set_affinity) + return -EIO; + + err = parse_hex_value(buffer, count, &new_value); + + /* + * Do not allow disabling IRQs completely - it's a too easy + * way to make the system unusable accidentally :-) At least + * one online CPU still has to be targeted. + */ + if (!(new_value & cpu_online_map)) + return -EINVAL; + + irq_affinity[irq] = new_value; + irq_desc[irq].handler->set_affinity(irq, new_value); + + return full_count; +} + +#endif + +static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + unsigned long *mask = (unsigned long *) data; + if (count < HEX_DIGITS+1) + return -EINVAL; + return sprintf (page, "%08lx\n", *mask); +} + +static int prof_cpu_mask_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + unsigned long *mask = (unsigned long *) data, full_count = count, err; + unsigned long new_value; + + err = parse_hex_value(buffer, count, &new_value); + if (err) + return err; + + *mask = new_value; + return full_count; +} + +#define MAX_NAMELEN 10 + +static void register_irq_proc (unsigned int irq) +{ + char name [MAX_NAMELEN]; + + if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type) || + irq_dir[irq]) + return; + + memset(name, 0, MAX_NAMELEN); + sprintf(name, "%d", irq); + + /* create /proc/irq/1234 */ + irq_dir[irq] = proc_mkdir(name, root_irq_dir); + +#if CONFIG_SMP + { + struct proc_dir_entry *entry; + + /* create /proc/irq/1234/smp_affinity */ + entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]); + + if (entry) { + entry->nlink = 1; + entry->data = (void *)(long)irq; + entry->read_proc = irq_affinity_read_proc; + entry->write_proc = irq_affinity_write_proc; + } + + smp_affinity_entry[irq] = entry; + } +#endif +} + +unsigned long prof_cpu_mask = -1; + +void init_irq_proc (void) +{ + struct proc_dir_entry *entry; + int i; + + /* create /proc/irq */ + root_irq_dir = proc_mkdir("irq", 0); + + /* create /proc/irq/prof_cpu_mask */ + entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir); + + if (!entry) + return; + + entry->nlink = 1; + entry->data = (void *)&prof_cpu_mask; + entry->read_proc = prof_cpu_mask_read_proc; + entry->write_proc = prof_cpu_mask_write_proc; + + /* + * Create entries for all existing IRQs. + */ + for (i = 0; i < NR_IRQS; i++) + register_irq_proc(i); +} + diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/ldt.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/ldt.c new file mode 100644 index 0000000000..6a2bd7a0d9 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/ldt.c @@ -0,0 +1,287 @@ +/* + * linux/kernel/ldt.c + * + * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds + * Copyright (C) 1999 Ingo Molnar + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ +static void flush_ldt(void *mm) +{ + if (current->active_mm) + load_LDT(¤t->active_mm->context); +} +#endif + +static int alloc_ldt(mm_context_t *pc, int mincount, int reload) +{ + void *oldldt; + void *newldt; + int oldsize; + + if (mincount <= pc->size) + return 0; + oldsize = pc->size; + mincount = (mincount+511)&(~511); + if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) + newldt = vmalloc(mincount*LDT_ENTRY_SIZE); + else + newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); + + if (!newldt) + return -ENOMEM; + + if (oldsize) + memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); + + oldldt = pc->ldt; + memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); + wmb(); + pc->ldt = newldt; + pc->size = mincount; + if (reload) { + make_pages_readonly( + pc->ldt, + (pc->size*LDT_ENTRY_SIZE)/PAGE_SIZE); + load_LDT(pc); + flush_page_update_queue(); +#ifdef CONFIG_SMP + if (current->mm->cpu_vm_mask != (1< PAGE_SIZE) + vfree(oldldt); + else + kfree(oldldt); + } + return 0; +} + +static inline int copy_ldt(mm_context_t *new, mm_context_t *old) +{ + int err = alloc_ldt(new, old->size, 0); + if (err < 0) { + printk(KERN_WARNING "ldt allocation failed\n"); + new->size = 0; + return err; + } + memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); + make_pages_readonly(new->ldt, (new->size*LDT_ENTRY_SIZE)/PAGE_SIZE); + return 0; +} + +/* + * we do not have to muck with descriptors here, that is + * done in switch_mm() as needed. + */ +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + struct mm_struct * old_mm; + int retval = 0; + + init_MUTEX(&mm->context.sem); + mm->context.size = 0; + old_mm = current->mm; + if (old_mm && old_mm->context.size > 0) { + down(&old_mm->context.sem); + retval = copy_ldt(&mm->context, &old_mm->context); + up(&old_mm->context.sem); + } + return retval; +} + +/* + * No need to lock the MM as we are the last user + * Do not touch the ldt register, we are already + * in the next thread. + */ +void destroy_context(struct mm_struct *mm) +{ + if (mm->context.size) { + make_pages_writeable( + mm->context.ldt, + (mm->context.size*LDT_ENTRY_SIZE)/PAGE_SIZE); + flush_page_update_queue(); + if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) + vfree(mm->context.ldt); + else + kfree(mm->context.ldt); + mm->context.size = 0; + } +} + +static int read_ldt(void * ptr, unsigned long bytecount) +{ + int err; + unsigned long size; + struct mm_struct * mm = current->mm; + + if (!mm->context.size) + return 0; + if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) + bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; + + down(&mm->context.sem); + size = mm->context.size*LDT_ENTRY_SIZE; + if (size > bytecount) + size = bytecount; + + err = 0; + if (copy_to_user(ptr, mm->context.ldt, size)) + err = -EFAULT; + up(&mm->context.sem); + if (err < 0) + return err; + if (size != bytecount) { + /* zero-fill the rest */ + clear_user(ptr+size, bytecount-size); + } + return bytecount; +} + + +static int read_default_ldt(void * ptr, unsigned long bytecount) +{ + int err; + unsigned long size; + void *address; + + err = 0; + address = &default_ldt[0]; + size = 5*sizeof(struct desc_struct); + if (size > bytecount) + size = bytecount; + + err = size; + if (copy_to_user(ptr, address, size)) + err = -EFAULT; + + return err; +} + +static int write_ldt(void * ptr, unsigned long bytecount, int oldmode) +{ + struct mm_struct * mm = current->mm; + __u32 entry_1, entry_2, *lp; + unsigned long phys_lp, max_limit; + int error; + struct modify_ldt_ldt_s ldt_info; + + error = -EINVAL; + if (bytecount != sizeof(ldt_info)) + goto out; + error = -EFAULT; + if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) + goto out; + + error = -EINVAL; + if (ldt_info.entry_number >= LDT_ENTRIES) + goto out; + if (ldt_info.contents == 3) { + if (oldmode) + goto out; + if (ldt_info.seg_not_present == 0) + goto out; + } + + /* + * This makes our tests for overlap with Xen space easier. There's no good + * reason to have a user segment starting this high anyway. + */ + if (ldt_info.base_addr >= PAGE_OFFSET) + goto out; + + down(&mm->context.sem); + if (ldt_info.entry_number >= mm->context.size) { + error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); + if (error < 0) + goto out_unlock; + } + + + lp = (__u32 *)((ldt_info.entry_number<<3) + (char *)mm->context.ldt); + phys_lp = arbitrary_virt_to_phys(lp); + + /* Allow LDTs to be cleared by the user. */ + if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { + if (oldmode || + (ldt_info.contents == 0 && + ldt_info.read_exec_only == 1 && + ldt_info.seg_32bit == 0 && + ldt_info.limit_in_pages == 0 && + ldt_info.seg_not_present == 1 && + ldt_info.useable == 0 )) { + entry_1 = 0; + entry_2 = 0; + goto install; + } + } + + max_limit = HYPERVISOR_VIRT_START - ldt_info.base_addr; + if ( ldt_info.limit_in_pages ) + max_limit >>= PAGE_SHIFT; + max_limit--; + if ( (ldt_info.limit & 0xfffff) > (max_limit & 0xfffff) ) + ldt_info.limit = max_limit; + + entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) | + (ldt_info.limit & 0x0ffff); + entry_2 = (ldt_info.base_addr & 0xff000000) | + ((ldt_info.base_addr & 0x00ff0000) >> 16) | + (ldt_info.limit & 0xf0000) | + ((ldt_info.read_exec_only ^ 1) << 9) | + (ldt_info.contents << 10) | + ((ldt_info.seg_not_present ^ 1) << 15) | + (ldt_info.seg_32bit << 22) | + (ldt_info.limit_in_pages << 23) | + 0x7000; + if (!oldmode) + entry_2 |= (ldt_info.useable << 20); + + /* Install the new entry ... */ + install: + error = HYPERVISOR_update_descriptor(phys_lp, entry_1, entry_2); + + out_unlock: + up(&mm->context.sem); + out: + return error; +} + +asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount) +{ + int ret = -ENOSYS; + + switch (func) { + case 0: + ret = read_ldt(ptr, bytecount); + break; + case 1: + ret = write_ldt(ptr, bytecount, 1); + break; + case 2: + ret = read_default_ldt(ptr, bytecount); + break; + case 0x11: + ret = write_ldt(ptr, bytecount, 0); + break; + } + return ret; +} diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/pci-dma.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/pci-dma.c new file mode 100644 index 0000000000..dd8842719e --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/pci-dma.c @@ -0,0 +1,37 @@ +/* + * Dynamic DMA mapping support. + * + * On i386 there is no hardware dynamic DMA address translation, + * so consistent alloc/free are merely page allocation/freeing. + * The rest of the dynamic DMA mapping interface is implemented + * in asm/pci.h. + */ + +#include +#include +#include +#include +#include + +void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, + dma_addr_t *dma_handle) +{ + void *ret; + int gfp = GFP_ATOMIC; + + if (hwdev == NULL || ((u32)hwdev->dma_mask < 0xffffffff)) + gfp |= GFP_DMA; + ret = (void *)__get_free_pages(gfp, get_order(size)); + + if (ret != NULL) { + memset(ret, 0, size); + *dma_handle = virt_to_bus(ret); + } + return ret; +} + +void pci_free_consistent(struct pci_dev *hwdev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + free_pages((unsigned long)vaddr, get_order(size)); +} diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/pci-i386.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/pci-i386.c new file mode 100644 index 0000000000..96dcdde6b3 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/pci-i386.c @@ -0,0 +1,410 @@ +/* + * Low-Level PCI Access for i386 machines + * + * Copyright 1993, 1994 Drew Eckhardt + * Visionary Computing + * (Unix and Linux consulting and custom programming) + * Drew@Colorado.EDU + * +1 (303) 786-7975 + * + * Drew's work was sponsored by: + * iX Multiuser Multitasking Magazine + * Hannover, Germany + * hm@ix.de + * + * Copyright 1997--2000 Martin Mares + * + * For more information, please consult the following manuals (look at + * http://www.pcisig.com/ for how to get them): + * + * PCI BIOS Specification + * PCI Local Bus Specification + * PCI to PCI Bridge Specification + * PCI System Design Guide + * + * + * CHANGELOG : + * Jun 17, 1994 : Modified to accommodate the broken pre-PCI BIOS SPECIFICATION + * Revision 2.0 present on 's ASUS mainboard. + * + * Jan 5, 1995 : Modified to probe PCI hardware at boot time by Frederic + * Potter, potter@cao-vlsi.ibp.fr + * + * Jan 10, 1995 : Modified to store the information about configured pci + * devices into a list, which can be accessed via /proc/pci by + * Curtis Varner, cvarner@cs.ucr.edu + * + * Jan 12, 1995 : CPU-PCI bridge optimization support by Frederic Potter. + * Alpha version. Intel & UMC chipset support only. + * + * Apr 16, 1995 : Source merge with the DEC Alpha PCI support. Most of the code + * moved to drivers/pci/pci.c. + * + * Dec 7, 1996 : Added support for direct configuration access of boards + * with Intel compatible access schemes (tsbogend@alpha.franken.de) + * + * Feb 3, 1997 : Set internal functions to static, save/restore flags + * avoid dead locks reading broken PCI BIOS, werner@suse.de + * + * Apr 26, 1997 : Fixed case when there is BIOS32, but not PCI BIOS + * (mj@atrey.karlin.mff.cuni.cz) + * + * May 7, 1997 : Added some missing cli()'s. [mj] + * + * Jun 20, 1997 : Corrected problems in "conf1" type accesses. + * (paubert@iram.es) + * + * Aug 2, 1997 : Split to PCI BIOS handling and direct PCI access parts + * and cleaned it up... Martin Mares + * + * Feb 6, 1998 : No longer using BIOS to find devices and device classes. [mj] + * + * May 1, 1998 : Support for peer host bridges. [mj] + * + * Jun 19, 1998 : Changed to use spinlocks, so that PCI configuration space + * can be accessed from interrupts even on SMP systems. [mj] + * + * August 1998 : Better support for peer host bridges and more paranoid + * checks for direct hardware access. Ugh, this file starts to look as + * a large gallery of common hardware bug workarounds (watch the comments) + * -- the PCI specs themselves are sane, but most implementors should be + * hit hard with \hammer scaled \magstep5. [mj] + * + * Jan 23, 1999 : More improvements to peer host bridge logic. i450NX fixup. [mj] + * + * Feb 8, 1999 : Added UM8886BF I/O address fixup. [mj] + * + * August 1999 : New resource management and configuration access stuff. [mj] + * + * Sep 19, 1999 : Use PCI IRQ routing tables for detection of peer host bridges. + * Based on ideas by Chris Frantz and David Hinds. [mj] + * + * Sep 28, 1999 : Handle unreported/unassigned IRQs. Thanks to Shuu Yamaguchi + * for a lot of patience during testing. [mj] + * + * Oct 8, 1999 : Split to pci-i386.c, pci-pc.c and pci-visws.c. [mj] + */ + +#include +#include +#include +#include +#include +#include + +#include "pci-i386.h" + +void +pcibios_update_resource(struct pci_dev *dev, struct resource *root, + struct resource *res, int resource) +{ + u32 new, check; + int reg; + + new = res->start | (res->flags & PCI_REGION_FLAG_MASK); + if (resource < 6) { + reg = PCI_BASE_ADDRESS_0 + 4*resource; + } else if (resource == PCI_ROM_RESOURCE) { + res->flags |= PCI_ROM_ADDRESS_ENABLE; + new |= PCI_ROM_ADDRESS_ENABLE; + reg = dev->rom_base_reg; + } else { + /* Somebody might have asked allocation of a non-standard resource */ + return; + } + + pci_write_config_dword(dev, reg, new); + pci_read_config_dword(dev, reg, &check); + if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) { + printk(KERN_ERR "PCI: Error while updating region " + "%s/%d (%08x != %08x)\n", dev->slot_name, resource, + new, check); + } +} + +/* + * We need to avoid collisions with `mirrored' VGA ports + * and other strange ISA hardware, so we always want the + * addresses to be allocated in the 0x000-0x0ff region + * modulo 0x400. + * + * Why? Because some silly external IO cards only decode + * the low 10 bits of the IO address. The 0x00-0xff region + * is reserved for motherboard devices that decode all 16 + * bits, so it's ok to allocate at, say, 0x2800-0x28ff, + * but we want to try to avoid allocating at 0x2900-0x2bff + * which might have be mirrored at 0x0100-0x03ff.. + */ +void +pcibios_align_resource(void *data, struct resource *res, + unsigned long size, unsigned long align) +{ + if (res->flags & IORESOURCE_IO) { + unsigned long start = res->start; + + if (start & 0x300) { + start = (start + 0x3ff) & ~0x3ff; + res->start = start; + } + } +} + + +/* + * Handle resources of PCI devices. If the world were perfect, we could + * just allocate all the resource regions and do nothing more. It isn't. + * On the other hand, we cannot just re-allocate all devices, as it would + * require us to know lots of host bridge internals. So we attempt to + * keep as much of the original configuration as possible, but tweak it + * when it's found to be wrong. + * + * Known BIOS problems we have to work around: + * - I/O or memory regions not configured + * - regions configured, but not enabled in the command register + * - bogus I/O addresses above 64K used + * - expansion ROMs left enabled (this may sound harmless, but given + * the fact the PCI specs explicitly allow address decoders to be + * shared between expansion ROMs and other resource regions, it's + * at least dangerous) + * + * Our solution: + * (1) Allocate resources for all buses behind PCI-to-PCI bridges. + * This gives us fixed barriers on where we can allocate. + * (2) Allocate resources for all enabled devices. If there is + * a collision, just mark the resource as unallocated. Also + * disable expansion ROMs during this step. + * (3) Try to allocate resources for disabled devices. If the + * resources were assigned correctly, everything goes well, + * if they weren't, they won't disturb allocation of other + * resources. + * (4) Assign new addresses to resources which were either + * not configured at all or misconfigured. If explicitly + * requested by the user, configure expansion ROM address + * as well. + */ + +static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) +{ + struct list_head *ln; + struct pci_bus *bus; + struct pci_dev *dev; + int idx; + struct resource *r, *pr; + + /* Depth-First Search on bus tree */ + for (ln=bus_list->next; ln != bus_list; ln=ln->next) { + bus = pci_bus_b(ln); + if ((dev = bus->self)) { + printk("alloc bus res: %s\n", dev->slot_name); + for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) { + r = &dev->resource[idx]; + if (!r->start) + { + printk(" res1: 0x%08lx-0x%08lx f=%lx\n", + r->start, r->end, r->flags); + + continue; + } + pr = pci_find_parent_resource(dev, r); + if (!pr || request_resource(pr, r) < 0) + printk(KERN_ERR "PCI: Cannot allocate resource region %d " + "of bridge %s (%p)\n", idx, dev->slot_name, pr); + printk(" res2: %08lx-%08lx f=%lx\n", + r->start, r->end, r->flags); + } + } + pcibios_allocate_bus_resources(&bus->children); + } +} + +static void __init pcibios_allocate_resources(int pass) +{ + struct pci_dev *dev; + int idx, disabled; + u16 command; + struct resource *r, *pr; + + pci_for_each_dev(dev) { + pci_read_config_word(dev, PCI_COMMAND, &command); + for(idx = 0; idx < 6; idx++) { + r = &dev->resource[idx]; + if (r->parent) /* Already allocated */ + continue; + if (!r->start) /* Address not assigned at all */ + continue; + if (r->flags & IORESOURCE_IO) + disabled = !(command & PCI_COMMAND_IO); + else + disabled = !(command & PCI_COMMAND_MEMORY); + if (pass == disabled) { + printk("PCI: Resource %08lx-%08lx (f=%lx, d=%d, p=%d) (%s)\n", + r->start, r->end, r->flags, disabled, pass, dev->slot_name); + pr = pci_find_parent_resource(dev, r); + if (!pr || request_resource(pr, r) < 0) { + printk(KERN_ERR "PCI: Cannot allocate resource region %d" + " of device %s (%p)\n", idx, dev->slot_name, pr); + /* We'll assign a new address later */ + r->end -= r->start; + r->start = 0; + } + } + } + if (!pass) { + r = &dev->resource[PCI_ROM_RESOURCE]; + if (r->flags & PCI_ROM_ADDRESS_ENABLE) { + /* Turn the ROM off, leave the resource region, but keep it unregistered. */ + u32 reg; + printk("PCI: Switching off ROM of %s\n", dev->slot_name); + r->flags &= ~PCI_ROM_ADDRESS_ENABLE; + pci_read_config_dword(dev, dev->rom_base_reg, ®); + pci_write_config_dword(dev, dev->rom_base_reg, reg & ~PCI_ROM_ADDRESS_ENABLE); + } + } + } +} + +static void __init pcibios_assign_resources(void) +{ + struct pci_dev *dev; + int idx; + struct resource *r; + + pci_for_each_dev(dev) { + int class = dev->class >> 8; + + /* Don't touch classless devices and host bridges */ + if (!class || class == PCI_CLASS_BRIDGE_HOST) + continue; + + for(idx=0; idx<6; idx++) { + r = &dev->resource[idx]; + + /* + * Don't touch IDE controllers and I/O ports of video cards! + */ + if ((class == PCI_CLASS_STORAGE_IDE && idx < 4) || + (class == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO))) + continue; + + /* + * We shall assign a new address to this resource, either because + * the BIOS forgot to do so or because we have decided the old + * address was unusable for some reason. + */ + if (!r->start && r->end) + pci_assign_resource(dev, idx); + } + + if (pci_probe & PCI_ASSIGN_ROMS) { + r = &dev->resource[PCI_ROM_RESOURCE]; + r->end -= r->start; + r->start = 0; + if (r->end) + pci_assign_resource(dev, PCI_ROM_RESOURCE); + } + } +} + +void __init pcibios_set_cacheline_size(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + pci_cache_line_size = 32 >> 2; + if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD) + pci_cache_line_size = 64 >> 2; /* K7 & K8 */ + else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL) + pci_cache_line_size = 128 >> 2; /* P4 */ +} + +void __init pcibios_resource_survey(void) +{ + DBG("PCI: Allocating resources\n"); + pcibios_allocate_bus_resources(&pci_root_buses); + pcibios_allocate_resources(0); + pcibios_allocate_resources(1); + pcibios_assign_resources(); +} + +int pcibios_enable_resources(struct pci_dev *dev, int mask) +{ + u16 cmd, old_cmd; + int idx; + struct resource *r; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + old_cmd = cmd; + for(idx=0; idx<6; idx++) { + /* Only set up the requested stuff */ + if (!(mask & (1<resource[idx]; + if (!r->start && r->end) { + printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", dev->slot_name); + return -EINVAL; + } + if (r->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (r->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + if (dev->resource[PCI_ROM_RESOURCE].start) + cmd |= PCI_COMMAND_MEMORY; + if (cmd != old_cmd) { + printk("PCI: Enabling device %s (%04x -> %04x)\n", dev->slot_name, old_cmd, cmd); + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + return 0; +} + +/* + * If we set up a device for bus mastering, we need to check the latency + * timer as certain crappy BIOSes forget to set it properly. + */ +unsigned int pcibios_max_latency = 255; + +void pcibios_set_master(struct pci_dev *dev) +{ + u8 lat; + pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); + if (lat < 16) + lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency; + else if (lat > pcibios_max_latency) + lat = pcibios_max_latency; + else + return; + printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", dev->slot_name, lat); + pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); +} + +int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine) +{ + unsigned long prot; + + /* I/O space cannot be accessed via normal processor loads and + * stores on this platform. + */ + if (mmap_state == pci_mmap_io) + return -EINVAL; + + /* Leave vm_pgoff as-is, the PCI space address is the physical + * address on this platform. + */ + vma->vm_flags |= (VM_SHM | VM_LOCKED | VM_IO); + + prot = pgprot_val(vma->vm_page_prot); + if (boot_cpu_data.x86 > 3) + prot |= _PAGE_PCD | _PAGE_PWT; + vma->vm_page_prot = __pgprot(prot); + + /* Write-combine setting is ignored, it is changed via the mtrr + * interfaces on this platform. + */ + if (remap_page_range(vma->vm_start, vma->vm_pgoff << PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/pci-i386.h b/xenolinux-2.4.25-sparse/arch/xen/kernel/pci-i386.h new file mode 100644 index 0000000000..fe70b10166 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/pci-i386.h @@ -0,0 +1,71 @@ +/* + * Low-Level PCI Access for i386 machines. + * + * (c) 1999 Martin Mares + */ + +#undef DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +#define PCI_PROBE_BIOS 0x0001 +#define PCI_PROBE_CONF1 0x0002 +#define PCI_PROBE_CONF2 0x0004 +#define PCI_NO_SORT 0x0100 +#define PCI_BIOS_SORT 0x0200 +#define PCI_NO_CHECKS 0x0400 +#define PCI_ASSIGN_ROMS 0x1000 +#define PCI_BIOS_IRQ_SCAN 0x2000 +#define PCI_ASSIGN_ALL_BUSSES 0x4000 + +extern unsigned int pci_probe; + +/* pci-i386.c */ + +extern unsigned int pcibios_max_latency; +extern u8 pci_cache_line_size; + +void pcibios_resource_survey(void); +void pcibios_set_cacheline_size(void); +int pcibios_enable_resources(struct pci_dev *, int); + +/* pci-pc.c */ + +extern int pcibios_last_bus; +extern struct pci_bus *pci_root_bus; +extern struct pci_ops *pci_root_ops; + +/* pci-irq.c */ + +struct irq_info { + u8 bus, devfn; /* Bus, device and function */ + struct { + u8 link; /* IRQ line ID, chipset dependent, 0=not routed */ + u16 bitmap; /* Available IRQs */ + } __attribute__((packed)) irq[4]; + u8 slot; /* Slot number, 0=onboard */ + u8 rfu; +} __attribute__((packed)); + +struct irq_routing_table { + u32 signature; /* PIRQ_SIGNATURE should be here */ + u16 version; /* PIRQ_VERSION */ + u16 size; /* Table size in bytes */ + u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ + u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */ + u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */ + u32 miniport_data; /* Crap */ + u8 rfu[11]; + u8 checksum; /* Modulo 256 checksum must give zero */ + struct irq_info slots[0]; +} __attribute__((packed)); + +extern unsigned int pcibios_irq_mask; + +void pcibios_irq_init(void); +void pcibios_fixup_irqs(void); +void pcibios_enable_irq(struct pci_dev *dev); diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/pci-irq.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/pci-irq.c new file mode 100644 index 0000000000..f530244f6a --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/pci-irq.c @@ -0,0 +1,90 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2004 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: phys_dev.c + * Author: Rolf Neugebauer (rolf.neugebauer@intel.com) + * Date: Mar 2004 + * + * Description: XenoLinux wrappers for PCI interrupt handling. + * very simple since someone else is doing all the hard bits + */ + + +/* + * Low-Level PCI Support for PC -- Routing of Interrupts + * + * (c) 1999--2000 Martin Mares + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "pci-i386.h" + +#include + +unsigned int pcibios_irq_mask = 0xfff8; + +void eisa_set_level_irq(unsigned int irq) +{ + /* dummy */ +} + +void __init pcibios_irq_init(void) +{ + printk("PCI: IRQ init\n"); +} + +void __init pcibios_fixup_irqs(void) +{ + struct pci_dev *dev; + physdev_op_t op; + int ret; + + + printk("PCI: IRQ fixup\n"); + pci_for_each_dev(dev) { + + op.cmd = PHYSDEVOP_FIND_IRQ; + op.u.find_irq.seg = 0; + op.u.find_irq.bus = dev->bus->number; + op.u.find_irq.dev = PCI_SLOT(dev->devfn); + op.u.find_irq.func = PCI_FUNC(dev->devfn); + + if ( (ret = HYPERVISOR_physdev_op(&op)) != 0 ) + { + printk(KERN_ALERT "pci find irq error\n"); + return; + } + + dev->irq = op.u.find_irq.irq; + printk(KERN_INFO "PCI IRQ: [%02x:%02x:%02x] -> %d\n", + dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), dev->irq); + } + return; +} + +void pcibios_penalize_isa_irq(int irq) +{ + /* dummy */ +} + +void pcibios_enable_irq(struct pci_dev *dev) +{ + u8 pin; + + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); + + if (pin && !dev->irq) { + printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of " + "device %s.\n", 'A' + pin - 1, dev->slot_name); + } +} diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/pci-pc.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/pci-pc.c new file mode 100644 index 0000000000..afe6e4d494 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/pci-pc.c @@ -0,0 +1,364 @@ +/* + * Low-Level PCI Support for PC + * + * (c) 1999--2000 Martin Mares + * + * adjusted to use Xen's interface by Rolf Neugebauer + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include "pci-i386.h" + +int pcibios_last_bus = -1; +struct pci_bus *pci_root_bus = NULL; +struct pci_ops *pci_root_ops = NULL; + +int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value) = NULL; +int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value) = NULL; + +static int pci_using_acpi_prt = 0; + +/* + * This interrupt-safe spinlock protects all accesses to PCI + * configuration space. + */ +static spinlock_t pci_config_lock = SPIN_LOCK_UNLOCKED; + +unsigned int pci_probe = PCI_PROBE_BIOS; + +/* + * Functions for accessing PCI configuration space with type 1 accesses + */ + +static int pci_confx_read (int seg, int bus, int dev, int fn, int reg, + int len, u32 *value) +{ + int ret; + physdev_op_t op; + + if (bus > 255 || dev > 31 || fn > 7 || reg > 255) + return -EINVAL; + + op.cmd = PHYSDEVOP_CFGREG_READ; + op.u.cfg_read.seg = seg; + op.u.cfg_read.bus = bus; + op.u.cfg_read.dev = dev; + op.u.cfg_read.func = fn; + op.u.cfg_read.reg = reg; + op.u.cfg_read.len = len; + + if ( (ret = HYPERVISOR_physdev_op(&op)) != 0 ) + { + //printk(KERN_ALERT "pci config read error\n"); + return ret; + } + + *value = op.u.cfg_read.value; + + return 0; +} + +static int pci_confx_write (int seg, int bus, int dev, int fn, int reg, + int len, u32 value) +{ + int ret; + physdev_op_t op; + + if ((bus > 255 || dev > 31 || fn > 7 || reg > 255)) + return -EINVAL; + + op.cmd = PHYSDEVOP_CFGREG_WRITE; + op.u.cfg_write.seg = seg; + op.u.cfg_write.bus = bus; + op.u.cfg_write.dev = dev; + op.u.cfg_write.func = fn; + op.u.cfg_write.reg = reg; + op.u.cfg_write.len = len; + op.u.cfg_write.value = value; + + if ( (ret = HYPERVISOR_physdev_op(&op)) != 0 ) + { + //printk(KERN_ALERT "pci config write error\n"); + return ret; + } + return 0; +} + + +static int pci_confx_read_config_byte(struct pci_dev *dev, int where, u8 *value) +{ + int result; + u32 data; + + result = pci_confx_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 1, &data); + + *value = (u8)data; + + return result; +} + +static int pci_confx_read_config_word(struct pci_dev *dev, int where, u16 *value) +{ + int result; + u32 data; + + result = pci_confx_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 2, &data); + + *value = (u16)data; + + return result; +} + +static int pci_confx_read_config_dword(struct pci_dev *dev, int where, u32 *value) +{ + return pci_confx_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 4, value); +} + +static int pci_confx_write_config_byte(struct pci_dev *dev, int where, u8 value) +{ + return pci_confx_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 1, value); +} + +static int pci_confx_write_config_word(struct pci_dev *dev, int where, u16 value) +{ + return pci_confx_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 2, value); +} + +static int pci_confx_write_config_dword(struct pci_dev *dev, int where, u32 value) +{ + return pci_confx_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 4, value); +} + +static struct pci_ops pci_direct_confx = { + pci_confx_read_config_byte, + pci_confx_read_config_word, + pci_confx_read_config_dword, + pci_confx_write_config_byte, + pci_confx_write_config_word, + pci_confx_write_config_dword +}; + + + +static struct pci_ops * __devinit pci_check_xen(void) +{ + unsigned long flags; + + __save_flags(flags); __cli(); + + printk(KERN_INFO "PCI: Using Xen interface\n"); + + __restore_flags(flags); + + return &pci_direct_confx; +} + +struct pci_fixup pcibios_fixups[] = { {0}}; + + +struct irq_routing_table * __devinit pcibios_get_irq_routing_table(void) +{ + return NULL; +} + +int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) +{ + return 0; +} + +/* + * Several buggy motherboards address only 16 devices and mirror + * them to next 16 IDs. We try to detect this `feature' on all + * primary buses (those containing host bridges as they are + * expected to be unique) and remove the ghost devices. + */ + +static void __devinit pcibios_fixup_ghosts(struct pci_bus *b) +{ + struct list_head *ln, *mn; + struct pci_dev *d, *e; + int mirror = PCI_DEVFN(16,0); + int seen_host_bridge = 0; + int i; + + DBG("PCI: Scanning for ghost devices on bus %d\n", b->number); + for (ln=b->devices.next; ln != &b->devices; ln=ln->next) { + d = pci_dev_b(ln); + if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST) + seen_host_bridge++; + for (mn=ln->next; mn != &b->devices; mn=mn->next) { + e = pci_dev_b(mn); + if (e->devfn != d->devfn + mirror || + e->vendor != d->vendor || + e->device != d->device || + e->class != d->class) + continue; + for(i=0; iresource[i].start != d->resource[i].start || + e->resource[i].end != d->resource[i].end || + e->resource[i].flags != d->resource[i].flags) + continue; + break; + } + if (mn == &b->devices) + return; + } + if (!seen_host_bridge) + return; + printk(KERN_WARNING "PCI: Ignoring ghost devices on bus %02x\n", b->number); + + ln = &b->devices; + while (ln->next != &b->devices) { + d = pci_dev_b(ln->next); + if (d->devfn >= mirror) { + list_del(&d->global_list); + list_del(&d->bus_list); + kfree(d); + } else + ln = ln->next; + } +} + +/* + * Discover remaining PCI buses in case there are peer host bridges. + * We use the number of last PCI bus provided by the PCI BIOS. + */ +static void __devinit pcibios_fixup_peer_bridges(void) +{ + int n; + struct pci_bus bus; + struct pci_dev dev; + u16 l; + + if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) + return; + DBG("PCI: Peer bridge fixup\n"); + for (n=0; n <= pcibios_last_bus; n++) { + if (pci_bus_exists(&pci_root_buses, n)) + continue; + bus.number = n; + bus.ops = pci_root_ops; + dev.bus = &bus; + for(dev.devfn=0; dev.devfn<256; dev.devfn += 8) + if (!pci_read_config_word(&dev, PCI_VENDOR_ID, &l) && + l != 0x0000 && l != 0xffff) { + DBG("Found device at %02x:%02x [%04x]\n", n, dev.devfn, l); + printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); + pci_scan_bus(n, pci_root_ops, NULL); + break; + } + } +} + + +/* + * Called after each bus is probed, but before its children + * are examined. + */ + +void __devinit pcibios_fixup_bus(struct pci_bus *b) +{ + pcibios_fixup_ghosts(b); + pci_read_bridge_bases(b); + return; +} + +struct pci_bus * __devinit pcibios_scan_root(int busnum) +{ + struct list_head *list; + struct pci_bus *bus; + + list_for_each(list, &pci_root_buses) { + bus = pci_bus_b(list); + if (bus->number == busnum) { + /* Already scanned */ + return bus; + } + } + + printk("PCI: Probing PCI hardware (bus %02x)\n", busnum); + + return pci_scan_bus(busnum, pci_root_ops, NULL); +} + +void __devinit pcibios_config_init(void) +{ + /* + * Try all known PCI access methods. Note that we support using + * both PCI BIOS and direct access, with a preference for direct. + */ + + pci_root_ops = pci_check_xen(); + pci_config_read = pci_confx_read; + pci_config_write = pci_confx_write; + + return; +} + +void __init pcibios_init(void) +{ + if (!pci_root_ops) + pcibios_config_init(); + if (!pci_root_ops) { + printk(KERN_WARNING "PCI: System does not support PCI\n"); + return; + } + + pcibios_set_cacheline_size(); + + printk(KERN_INFO "PCI: Probing PCI hardware\n"); + + if (!pci_using_acpi_prt) { + pci_root_bus = pcibios_scan_root(0); + pcibios_irq_init(); + pcibios_fixup_peer_bridges(); + pcibios_fixup_irqs(); + } + + pcibios_resource_survey(); +} + +char * __devinit pcibios_setup(char *str) +{ + if (!strcmp(str, "off")) { + pci_probe = 0; + return NULL; + } + return NULL; +} + +unsigned int pcibios_assign_all_busses(void) +{ + return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0; +} + +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + int err; + + if ((err = pcibios_enable_resources(dev, mask)) < 0) + return err; + + pcibios_enable_irq(dev); + + return 0; +} diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/physirq.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/physirq.c new file mode 100644 index 0000000000..1f7a8e4fee --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/physirq.c @@ -0,0 +1,172 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2004 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: physirq.c + * Author: Rolf Neugebauer (rolf.neugebauer@intel.com) + * Date: Mar 2004 + * + * Description: guests may receive virtual interrupts directly + * corresponding to physical interrupts. these virtual + * interrupts require special handling provided + * by the virq irq type. + */ + + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +static void physirq_interrupt(int irq, void *unused, struct pt_regs *ptregs); + +static int setup_event_handler = 0; + +static unsigned int startup_physirq_event(unsigned int irq) +{ + physdev_op_t op; + int err; + + printk("startup_physirq_event %d\n", irq); + + /* + * install a interrupt handler for physirq event when called first time + * we actually are never executing the handler as _EVENT_PHYSIRQ is + * handled specially in hypervisor.c But we need to enable the event etc. + */ + if ( !setup_event_handler ) + { + printk("startup_physirq_event %d: setup event handler\n", irq); + /* set up a event handler to demux virtualised physical interrupts */ + err = request_irq(HYPEREVENT_IRQ(_EVENT_PHYSIRQ), physirq_interrupt, + SA_SAMPLE_RANDOM, "physirq", NULL); + if ( err ) + { + printk(KERN_WARNING "Could not allocate physirq interrupt\n"); + return err; + } + setup_event_handler = 1; + } + + /* + * request the irq from hypervisor + */ + op.cmd = PHYSDEVOP_REQUEST_IRQ; + op.u.request_irq.irq = irq; + if ( (err = HYPERVISOR_physdev_op(&op)) != 0 ) + { + printk(KERN_ALERT "could not get IRQ %d from Xen\n", irq); + return err; + } + return 0; +} +/* + * This is a dummy interrupt handler. + * It should never be called. events for physical interrupts are handled + * differently in hypervisor.c + */ +static void physirq_interrupt(int irq, void *unused, struct pt_regs *ptregs) +{ + printk("XXX This should never be called!"); +} + + +/* + * IRQ is not needed anymore. + */ +static void shutdown_physirq_event(unsigned int irq) +{ + physdev_op_t op; + int err; + + printk("shutdown_phys_irq called."); + + /* + * tell hypervisor + */ + op.cmd = PHYSDEVOP_FREE_IRQ; + op.u.free_irq.irq = irq; + if ( (err = HYPERVISOR_physdev_op(&op)) != 0 ) + { + printk(KERN_ALERT "could not free IRQ %d\n", irq); + return; + } + return; +} + + +static void enable_physirq_event(unsigned int irq) +{ + /* XXX just enable all phys interrupts for now */ + enable_irq(HYPEREVENT_IRQ(_EVENT_PHYSIRQ)); +} + +static void disable_physirq_event(unsigned int irq) +{ + /* XXX just disable all phys interrupts for now */ + disable_irq(HYPEREVENT_IRQ(_EVENT_PHYSIRQ)); +} + +static void ack_physirq_event(unsigned int irq) +{ + /* clear bit */ + if ( irq <= 0 || irq >= 32 ) + { + printk("wrong irq %d\n", irq); + } + + clear_bit(irq, &HYPERVISOR_shared_info->physirq_pend); +} + +static void end_physirq_event(unsigned int irq) +{ + int err; + physdev_op_t op; + + /* call hypervisor */ + op.cmd = PHYSDEVOP_FINISHED_IRQ; + op.u.finished_irq.irq = irq; + if ( (err = HYPERVISOR_physdev_op(&op)) != 0 ) + { + printk(KERN_ALERT "could not finish IRQ %d\n", irq); + return; + } + return; +} + +static struct hw_interrupt_type physirq_irq_type = { + "physical-irq", + startup_physirq_event, + shutdown_physirq_event, + enable_physirq_event, + disable_physirq_event, + ack_physirq_event, + end_physirq_event, + NULL +}; + + + +void __init physirq_init(void) +{ + int i; + + printk("Initialise irq handlers [%d-%d] for physical interrupts.\n", + PHYS_IRQ_BASE, PHYS_IRQ_BASE+NR_PHYS_IRQS-1); + + for ( i = 0; i < NR_PHYS_IRQS; i++ ) + { + irq_desc[i + PHYS_IRQ_BASE].status = IRQ_DISABLED; + irq_desc[i + PHYS_IRQ_BASE].action = 0; + irq_desc[i + PHYS_IRQ_BASE].depth = 1; + irq_desc[i + PHYS_IRQ_BASE].handler = &physirq_irq_type; + } +} diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/process.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/process.c new file mode 100644 index 0000000000..640179661b --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/process.c @@ -0,0 +1,474 @@ +/* + * linux/arch/i386/kernel/process.c + * + * Copyright (C) 1995 Linus Torvalds + * + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + */ + +/* + * This file handles the architecture-dependent parts of process handling.. + */ + +#define __KERNEL_SYSCALLS__ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); + +int hlt_counter; + +/* + * Powermanagement idle function, if any.. + */ +void (*pm_idle)(void); + +/* + * Power off function, if any + */ +void (*pm_power_off)(void); + +void disable_hlt(void) +{ + hlt_counter++; +} + +void enable_hlt(void) +{ + hlt_counter--; +} + +/* + * The idle thread. There's no useful work to be + * done, so just try to conserve power and have a + * low exit latency (ie sit in a loop waiting for + * somebody to say that they'd like to reschedule) + */ +void cpu_idle (void) +{ + extern int set_timeout_timer(void); + + /* Endless idle loop with no priority at all. */ + init_idle(); + current->nice = 20; + current->counter = -100; + + for ( ; ; ) + { + while ( !current->need_resched ) + { + __cli(); + if ( current->need_resched ) + { + /* The race-free check for events failed. */ + __sti(); + break; + } + else if ( set_timeout_timer() == 0 ) + { + /* NB. Blocking reenable events in a race-free manner. */ + HYPERVISOR_block(); + } + else + { + /* No race here: yielding will get us the CPU again anyway. */ + __sti(); + HYPERVISOR_yield(); + } + } + schedule(); + check_pgt_cache(); + } +} + +void machine_restart(char * __unused) +{ + HYPERVISOR_exit(); +} + +void machine_halt(void) +{ + HYPERVISOR_exit(); +} + +void machine_power_off(void) +{ + HYPERVISOR_exit(); +} + +extern void show_trace(unsigned long* esp); + +void show_regs(struct pt_regs * regs) +{ + printk("\n"); + printk("Pid: %d, comm: %20s\n", current->pid, current->comm); + printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs,regs->eip, smp_processor_id()); + if (regs->xcs & 2) + printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); + printk(" EFLAGS: %08lx %s\n",regs->eflags, print_tainted()); + printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", + regs->eax,regs->ebx,regs->ecx,regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx", + regs->esi, regs->edi, regs->ebp); + printk(" DS: %04x ES: %04x\n", + 0xffff & regs->xds,0xffff & regs->xes); + + show_trace(®s->esp); +} + + +/* + * Create a kernel thread + */ +int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +{ + long retval, d0; + + __asm__ __volatile__( + "movl %%esp,%%esi\n\t" + "int $0x80\n\t" /* Linux/i386 system call */ + "cmpl %%esp,%%esi\n\t" /* child or parent? */ + "je 1f\n\t" /* parent - jump */ + /* Load the argument into eax, and push it. That way, it does + * not matter whether the called function is compiled with + * -mregparm or not. */ + "movl %4,%%eax\n\t" + "pushl %%eax\n\t" + "call *%5\n\t" /* call fn */ + "movl %3,%0\n\t" /* exit */ + "int $0x80\n" + "1:\t" + :"=&a" (retval), "=&S" (d0) + :"0" (__NR_clone), "i" (__NR_exit), + "r" (arg), "r" (fn), + "b" (flags | CLONE_VM) + : "memory"); + + return retval; +} + +/* + * Free current thread data structures etc.. + */ +void exit_thread(void) +{ + /* nothing to do ... */ +} + +void flush_thread(void) +{ + struct task_struct *tsk = current; + + memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); + + /* + * Forget coprocessor state.. + */ + clear_fpu(tsk); + tsk->used_math = 0; +} + +void release_thread(struct task_struct *dead_task) +{ + if (dead_task->mm) { + // temporary debugging check + if (dead_task->mm->context.size) { + printk("WARNING: dead process %8s still has LDT? <%p/%p>\n", + dead_task->comm, + dead_task->mm->context.ldt, + dead_task->mm->context.size); + BUG(); + } + } + //release_x86_irqs(dead_task); +} + + +/* + * Save a segment. + */ +#define savesegment(seg,value) \ + asm volatile("movl %%" #seg ",%0":"=m" (*(int *)&(value))) + +int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, + unsigned long unused, + struct task_struct * p, struct pt_regs * regs) +{ + struct pt_regs * childregs; + unsigned long eflags; + + childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1; + struct_cpy(childregs, regs); + childregs->eax = 0; + childregs->esp = esp; + + p->thread.esp = (unsigned long) childregs; + p->thread.esp0 = (unsigned long) (childregs+1); + + p->thread.eip = (unsigned long) ret_from_fork; + + savesegment(fs,p->thread.fs); + savesegment(gs,p->thread.gs); + + unlazy_fpu(current); + struct_cpy(&p->thread.i387, ¤t->thread.i387); + + + __asm__ __volatile__ ( "pushfl; popl %0" : "=r" (eflags) : ); + p->thread.io_pl = (eflags >> 12) & 3; + + return 0; +} + +/* + * fill in the user structure for a core dump.. + */ +void dump_thread(struct pt_regs * regs, struct user * dump) +{ + int i; + +/* changed the size calculations - should hopefully work better. lbt */ + dump->magic = CMAGIC; + dump->start_code = 0; + dump->start_stack = regs->esp & ~(PAGE_SIZE - 1); + dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; + dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; + dump->u_dsize -= dump->u_tsize; + dump->u_ssize = 0; + for (i = 0; i < 8; i++) + dump->u_debugreg[i] = current->thread.debugreg[i]; + + if (dump->start_stack < TASK_SIZE) + dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; + + dump->regs.ebx = regs->ebx; + dump->regs.ecx = regs->ecx; + dump->regs.edx = regs->edx; + dump->regs.esi = regs->esi; + dump->regs.edi = regs->edi; + dump->regs.ebp = regs->ebp; + dump->regs.eax = regs->eax; + dump->regs.ds = regs->xds; + dump->regs.es = regs->xes; + savesegment(fs,dump->regs.fs); + savesegment(gs,dump->regs.gs); + dump->regs.orig_eax = regs->orig_eax; + dump->regs.eip = regs->eip; + dump->regs.cs = regs->xcs; + dump->regs.eflags = regs->eflags; + dump->regs.esp = regs->esp; + dump->regs.ss = regs->xss; + + dump->u_fpvalid = dump_fpu (regs, &dump->i387); +} + +/* + * switch_to(x,yn) should switch tasks from x to y. + * + * We fsave/fwait so that an exception goes off at the right time + * (as a call from the fsave or fwait in effect) rather than to + * the wrong process. Lazy FP saving no longer makes any sense + * with modern CPU's, and this simplifies a lot of things (SMP + * and UP become the same). + * + * NOTE! We used to use the x86 hardware context switching. The + * reason for not using it any more becomes apparent when you + * try to recover gracefully from saved state that is no longer + * valid (stale segment register values in particular). With the + * hardware task-switch, there is no way to fix up bad state in + * a reasonable manner. + * + * The fact that Intel documents the hardware task-switching to + * be slow is a fairly red herring - this code is not noticeably + * faster. However, there _is_ some room for improvement here, + * so the performance issues may eventually be a valid point. + * More important, however, is the fact that this allows us much + * more flexibility. + */ +void __switch_to(struct task_struct *prev_p, struct task_struct *next_p) +{ + struct thread_struct *next = &next_p->thread; + + __cli(); + + /* + * We clobber FS and GS here so that we avoid a GPF when restoring previous + * task's FS/GS values in Xen when the LDT is switched. If we don't do this + * then we can end up erroneously re-flushing the page-update queue when + * we 'execute_multicall_list'. + */ + __asm__ __volatile__ ( + "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs" : : : "eax" ); + + MULTICALL_flush_page_update_queue(); + + /* + * This is basically 'unlazy_fpu', except that we queue a multicall to + * indicate FPU task switch, rather than synchronously trapping to Xen. + */ + if ( prev_p->flags & PF_USEDFPU ) + { + if ( cpu_has_fxsr ) + asm volatile( "fxsave %0 ; fnclex" + : "=m" (prev_p->thread.i387.fxsave) ); + else + asm volatile( "fnsave %0 ; fwait" + : "=m" (prev_p->thread.i387.fsave) ); + prev_p->flags &= ~PF_USEDFPU; + queue_multicall0(__HYPERVISOR_fpu_taskswitch); + } + + queue_multicall2(__HYPERVISOR_stack_switch, __KERNEL_DS, next->esp0); + if ( start_info.flags & SIF_PRIVILEGED ) + { + dom0_op_t op; + op.cmd = DOM0_IOPL; + op.u.iopl.domain = DOMID_SELF; + op.u.iopl.iopl = next->io_pl; + queue_multicall1(__HYPERVISOR_dom0_op, (unsigned long)&op); + } + + /* EXECUTE ALL TASK SWITCH XEN SYSCALLS AT THIS POINT. */ + execute_multicall_list(); + __sti(); + + /* + * Restore %fs and %gs. + */ + loadsegment(fs, next->fs); + loadsegment(gs, next->gs); + + /* + * Now maybe reload the debug registers + */ + if ( next->debugreg[7] != 0 ) + { + HYPERVISOR_set_debugreg(0, next->debugreg[0]); + HYPERVISOR_set_debugreg(1, next->debugreg[1]); + HYPERVISOR_set_debugreg(2, next->debugreg[2]); + HYPERVISOR_set_debugreg(3, next->debugreg[3]); + /* no 4 and 5 */ + HYPERVISOR_set_debugreg(6, next->debugreg[6]); + HYPERVISOR_set_debugreg(7, next->debugreg[7]); + } +} + +asmlinkage int sys_fork(struct pt_regs regs) +{ + return do_fork(SIGCHLD, regs.esp, ®s, 0); +} + +asmlinkage int sys_clone(struct pt_regs regs) +{ + unsigned long clone_flags; + unsigned long newsp; + + clone_flags = regs.ebx; + newsp = regs.ecx; + if (!newsp) + newsp = regs.esp; + return do_fork(clone_flags, newsp, ®s, 0); +} + +/* + * This is trivial, and on the face of it looks like it + * could equally well be done in user mode. + * + * Not so, for quite unobvious reasons - register pressure. + * In user mode vfork() cannot have a stack frame, and if + * done by calling the "clone()" system call directly, you + * do not have enough call-clobbered registers to hold all + * the information you need. + */ +asmlinkage int sys_vfork(struct pt_regs regs) +{ + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0); +} + +/* + * sys_execve() executes a new program. + */ +asmlinkage int sys_execve(struct pt_regs regs) +{ + int error; + char * filename; + + filename = getname((char *) regs.ebx); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, ®s); + if (error == 0) + current->ptrace &= ~PT_DTRACE; + putname(filename); + out: + return error; +} + +/* + * These bracket the sleeping functions.. + */ +extern void scheduling_functions_start_here(void); +extern void scheduling_functions_end_here(void); +#define first_sched ((unsigned long) scheduling_functions_start_here) +#define last_sched ((unsigned long) scheduling_functions_end_here) + +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long ebp, esp, eip; + unsigned long stack_page; + int count = 0; + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + stack_page = (unsigned long)p; + esp = p->thread.esp; + if (!stack_page || esp < stack_page || esp > 8188+stack_page) + return 0; + /* include/asm-i386/system.h:switch_to() pushes ebp last. */ + ebp = *(unsigned long *) esp; + do { + if (ebp < stack_page || ebp > 8184+stack_page) + return 0; + eip = *(unsigned long *) (ebp+4); + if (eip < first_sched || eip >= last_sched) + return eip; + ebp = *(unsigned long *) ebp; + } while (count++ < 16); + return 0; +} +#undef last_sched +#undef first_sched diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/setup.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/setup.c new file mode 100644 index 0000000000..c593bddec7 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/setup.c @@ -0,0 +1,1257 @@ +/* + * linux/arch/i386/kernel/setup.c + * + * Copyright (C) 1995 Linus Torvalds + */ + +/* + * This file handles the architecture-dependent parts of initialization + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_BLK_DEV_RAM +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* dev_(de)activate */ + +/* + * Point at the empty zero page to start with. We map the real shared_info + * page as soon as fixmap is up and running. + */ +shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; + +unsigned long *phys_to_machine_mapping; + +/* + * Machine setup.. + */ + +char ignore_irq13; /* set if exception 16 works */ +struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; + +unsigned long mmu_cr4_features; +//EXPORT_SYMBOL(mmu_cr4_features); + +unsigned char * vgacon_mmap; + +/* + * Bus types .. + */ +#ifdef CONFIG_EISA +int EISA_bus; +#endif +int MCA_bus; + +/* for MCA, but anyone else can use it if they want */ +unsigned int machine_id; +unsigned int machine_submodel_id; +unsigned int BIOS_revision; +unsigned int mca_pentium_flag; + +/* For PCI or other memory-mapped resources */ +unsigned long pci_mem_start = 0x10000000; + +/* + * Setup options + */ +struct drive_info_struct { char dummy[32]; } drive_info; +struct screen_info screen_info; +struct apm_info apm_info; +struct sys_desc_table_struct { + unsigned short length; + unsigned char table[0]; +}; + +unsigned char aux_device_present; + +extern int root_mountflags; +extern char _text, _etext, _edata, _end; + +int enable_acpi_smp_table; + +/* Raw start-of-day parameters from the hypervisor. */ +union start_info_union start_info_union; + +#define COMMAND_LINE_SIZE 256 +static char command_line[COMMAND_LINE_SIZE]; +char saved_command_line[COMMAND_LINE_SIZE]; + +static void __init parse_mem_cmdline (char ** cmdline_p) +{ + char c = ' ', *to = command_line, *from = saved_command_line; + int len = 0; + + /* Save unparsed command line copy for /proc/cmdline */ + memcpy(saved_command_line, start_info.cmd_line, COMMAND_LINE_SIZE); + saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; + + for (;;) { + /* + * "mem=nopentium" disables the 4MB page tables. + * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM + * to , overriding the bios size. + * "mem=XXX[KkmM]@XXX[KkmM]" defines a memory region from + * to +, overriding the bios size. + */ + if (c == ' ' && !memcmp(from, "mem=", 4)) { + if (to != command_line) + to--; + if (!memcmp(from+4, "nopentium", 9)) { + from += 9+4; + } else if (!memcmp(from+4, "exactmap", 8)) { + from += 8+4; + } else { + (void)memparse(from+4, &from); + if (*from == '@') + (void)memparse(from+1, &from); + } + } + + c = *(from++); + if (!c) + break; + if (COMMAND_LINE_SIZE <= ++len) + break; + *(to++) = c; + } + *to = '\0'; + *cmdline_p = command_line; +} + +void __init setup_arch(char **cmdline_p) +{ + unsigned long bootmap_size, start_pfn, max_low_pfn; + unsigned long i; + + extern void hypervisor_callback(void); + extern void failsafe_callback(void); + + extern unsigned long cpu0_pte_quicklist[]; + extern unsigned long cpu0_pgd_quicklist[]; + + HYPERVISOR_set_callbacks( + __KERNEL_CS, (unsigned long)hypervisor_callback, + __KERNEL_CS, (unsigned long)failsafe_callback); + + boot_cpu_data.pgd_quick = cpu0_pgd_quicklist; + boot_cpu_data.pte_quick = cpu0_pte_quicklist; + + ROOT_DEV = MKDEV(RAMDISK_MAJOR,0); + memset(&drive_info, 0, sizeof(drive_info)); + memset(&screen_info, 0, sizeof(screen_info)); + + /* This is drawn from a dump from vgacon:startup in standard Linux. */ + screen_info.orig_video_mode = 3; + screen_info.orig_video_isVGA = 1; + screen_info.orig_video_lines = 25; + screen_info.orig_video_cols = 80; + screen_info.orig_video_ega_bx = 3; + screen_info.orig_video_points = 16; + + memset(&apm_info.bios, 0, sizeof(apm_info.bios)); + aux_device_present = 0; +#ifdef CONFIG_BLK_DEV_RAM + rd_image_start = 0; + rd_prompt = 0; + rd_doload = 0; +#endif + + root_mountflags &= ~MS_RDONLY; + init_mm.start_code = (unsigned long) &_text; + init_mm.end_code = (unsigned long) &_etext; + init_mm.end_data = (unsigned long) &_edata; + init_mm.brk = (unsigned long) &_end; + + parse_mem_cmdline(cmdline_p); + +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) +#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) +#define PFN_PHYS(x) ((x) << PAGE_SHIFT) + +/* + * 128MB for vmalloc and initrd + */ +#define VMALLOC_RESERVE (unsigned long)(128 << 20) +#define MAXMEM (unsigned long)(HYPERVISOR_VIRT_START-PAGE_OFFSET-VMALLOC_RESERVE) +#define MAXMEM_PFN PFN_DOWN(MAXMEM) +#define MAX_NONPAE_PFN (1 << 20) + + /* + * partially used pages are not usable - thus + * we are rounding upwards: + */ +#ifdef CONFIG_BLK_DEV_INITRD + if ( start_info.mod_start ) + start_pfn = PFN_UP(__pa(start_info.mod_start + start_info.mod_len)); + else +#endif + start_pfn = PFN_UP(__pa(&_end)); + max_pfn = start_info.nr_pages; + + /* + * Determine low and high memory ranges: + */ + max_low_pfn = max_pfn; + if (max_low_pfn > MAXMEM_PFN) { + max_low_pfn = MAXMEM_PFN; +#ifndef CONFIG_HIGHMEM + /* Maximum memory usable is what is directly addressable */ + printk(KERN_WARNING "Warning only %ldMB will be used.\n", + MAXMEM>>20); + if (max_pfn > MAX_NONPAE_PFN) + printk(KERN_WARNING "Use a PAE enabled kernel.\n"); + else + printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); +#else /* !CONFIG_HIGHMEM */ +#ifndef CONFIG_X86_PAE + if (max_pfn > MAX_NONPAE_PFN) { + max_pfn = MAX_NONPAE_PFN; + printk(KERN_WARNING "Warning only 4GB will be used.\n"); + printk(KERN_WARNING "Use a PAE enabled kernel.\n"); + } +#endif /* !CONFIG_X86_PAE */ +#endif /* !CONFIG_HIGHMEM */ + } + +#ifdef CONFIG_HIGHMEM + highstart_pfn = highend_pfn = max_pfn; + if (max_pfn > MAXMEM_PFN) { + highstart_pfn = MAXMEM_PFN; + printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", + pages_to_mb(highend_pfn - highstart_pfn)); + } +#endif + + /* + * Initialize the boot-time allocator, and free up all RAM. + * Then reserve space for OS image, and the bootmem bitmap. + */ + bootmap_size = init_bootmem(start_pfn, max_low_pfn); + free_bootmem(0, PFN_PHYS(max_low_pfn)); + reserve_bootmem(0, PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1); + + /* Now reserve space for the hypervisor-provided page tables. */ + { + unsigned long *pgd = (unsigned long *)start_info.pt_base; + unsigned long pte; + int i; + reserve_bootmem(__pa(pgd), PAGE_SIZE); + for ( i = 0; i < (HYPERVISOR_VIRT_START>>22); i++ ) + { + unsigned long pgde = *pgd++; + if ( !(pgde & 1) ) continue; + pte = machine_to_phys(pgde & PAGE_MASK); + reserve_bootmem(pte, PAGE_SIZE); + } + } + cur_pgd = init_mm.pgd = (pgd_t *)start_info.pt_base; + + /* Now initialise the physical->machine mapping table. */ + phys_to_machine_mapping = alloc_bootmem(max_pfn * sizeof(unsigned long)); + for ( i = 0; i < max_pfn; i++ ) + { + unsigned long pgde, *ppte; + unsigned long pfn = i + (PAGE_OFFSET >> PAGE_SHIFT); + pgde = *((unsigned long *)start_info.pt_base + (pfn >> 10)); + ppte = (unsigned long *)machine_to_phys(pgde & PAGE_MASK) + (pfn&1023); + phys_to_machine_mapping[i] = + (*(unsigned long *)__va(ppte)) >> PAGE_SHIFT; + } + +#ifdef CONFIG_BLK_DEV_INITRD + if (start_info.mod_start) { + if ((__pa(start_info.mod_start) + start_info.mod_len) <= + (max_low_pfn << PAGE_SHIFT)) { + initrd_start = start_info.mod_start; + initrd_end = initrd_start + start_info.mod_len; + initrd_below_start_ok = 1; + } + else { + printk(KERN_ERR "initrd extends beyond end of memory " + "(0x%08lx > 0x%08lx)\ndisabling initrd\n", + __pa(start_info.mod_start) + start_info.mod_len, + max_low_pfn << PAGE_SHIFT); + initrd_start = 0; + } + } +#endif + + paging_init(); + + /* We are privileged guest os - should have IO privileges. */ + if ( start_info.flags & SIF_PRIVILEGED ) + { + dom0_op_t op; + op.cmd = DOM0_IOPL; + op.u.iopl.domain = DOMID_SELF; + op.u.iopl.iopl = 1; + if( HYPERVISOR_dom0_op(&op) != 0 ) + panic("Unable to obtain IOPL, despite being SIF_PRIVILEGED"); + current->thread.io_pl = 1; + } + + if (start_info.flags & SIF_INITDOMAIN ) + { + if( !(start_info.flags & SIF_PRIVILEGED) ) + panic("Xen granted us console access but not privileged status"); + +#if defined(CONFIG_VT) +#if defined(CONFIG_VGA_CONSOLE) + conswitchp = &vga_con; +#elif defined(CONFIG_DUMMY_CONSOLE) + conswitchp = &dummy_con; +#endif +#endif + } +} + +static int cachesize_override __initdata = -1; +static int __init cachesize_setup(char *str) +{ + get_option (&str, &cachesize_override); + return 1; +} +__setup("cachesize=", cachesize_setup); + + +static int __init get_model_name(struct cpuinfo_x86 *c) +{ + unsigned int *v; + char *p, *q; + + if (cpuid_eax(0x80000000) < 0x80000004) + return 0; + + v = (unsigned int *) c->x86_model_id; + cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); + cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); + cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); + c->x86_model_id[48] = 0; + + /* Intel chips right-justify this string for some dumb reason; + undo that brain damage */ + p = q = &c->x86_model_id[0]; + while ( *p == ' ' ) + p++; + if ( p != q ) { + while ( *p ) + *q++ = *p++; + while ( q <= &c->x86_model_id[48] ) + *q++ = '\0'; /* Zero-pad the rest */ + } + + return 1; +} + + +static void __init display_cacheinfo(struct cpuinfo_x86 *c) +{ + unsigned int n, dummy, ecx, edx, l2size; + + n = cpuid_eax(0x80000000); + + if (n >= 0x80000005) { + cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); + printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + c->x86_cache_size=(ecx>>24)+(edx>>24); + } + + if (n < 0x80000006) /* Some chips just has a large L1. */ + return; + + ecx = cpuid_ecx(0x80000006); + l2size = ecx >> 16; + + /* AMD errata T13 (order #21922) */ + if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { + if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ + l2size = 64; + if (c->x86_model == 4 && + (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */ + l2size = 256; + } + + /* Intel PIII Tualatin. This comes in two flavours. + * One has 256kb of cache, the other 512. We have no way + * to determine which, so we use a boottime override + * for the 512kb model, and assume 256 otherwise. + */ + if ((c->x86_vendor == X86_VENDOR_INTEL) && (c->x86 == 6) && + (c->x86_model == 11) && (l2size == 0)) + l2size = 256; + + if (c->x86_vendor == X86_VENDOR_CENTAUR) { + /* VIA C3 CPUs (670-68F) need further shifting. */ + if ((c->x86 == 6) && + ((c->x86_model == 7) || (c->x86_model == 8))) { + l2size >>= 8; + } + + /* VIA also screwed up Nehemiah stepping 1, and made + it return '65KB' instead of '64KB' + - Note, it seems this may only be in engineering samples. */ + if ((c->x86==6) && (c->x86_model==9) && + (c->x86_mask==1) && (l2size==65)) + l2size -= 1; + } + + /* Allow user to override all this if necessary. */ + if (cachesize_override != -1) + l2size = cachesize_override; + + if ( l2size == 0 ) + return; /* Again, no L2 cache is possible */ + + c->x86_cache_size = l2size; + + printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", + l2size, ecx & 0xFF); +} + +static void __init init_c3(struct cpuinfo_x86 *c) +{ + /* Test for Centaur Extended Feature Flags presence */ + if (cpuid_eax(0xC0000000) >= 0xC0000001) { + /* store Centaur Extended Feature Flags as + * word 5 of the CPU capability bit array + */ + c->x86_capability[5] = cpuid_edx(0xC0000001); + } + + switch (c->x86_model) { + case 9: /* Nehemiah */ + default: + get_model_name(c); + display_cacheinfo(c); + break; + } +} + +static void __init init_centaur(struct cpuinfo_x86 *c) +{ + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*3231) anyway */ + clear_bit(0*32+31, &c->x86_capability); + + switch (c->x86) { + case 6: + init_c3(c); + break; + default: + panic("Unsupported Centaur CPU (%i)\n", c->x86); + } +} + +static int __init init_amd(struct cpuinfo_x86 *c) +{ + int r; + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_bit(0*32+31, &c->x86_capability); + + r = get_model_name(c); + + switch(c->x86) + { + case 5: /* We don't like AMD K6 */ + panic("Unsupported AMD processor\n"); + case 6: /* An Athlon/Duron. We can trust the BIOS probably */ + break; + } + + display_cacheinfo(c); + return r; +} + + +static void __init init_intel(struct cpuinfo_x86 *c) +{ + char *p = NULL; + unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ + + if (c->cpuid_level > 1) { + /* supports eax=2 call */ + int i, j, n; + int regs[4]; + unsigned char *dp = (unsigned char *)regs; + + /* Number of times to iterate */ + n = cpuid_eax(2) & 0xFF; + + for ( i = 0 ; i < n ; i++ ) { + cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); + + /* If bit 31 is set, this is an unknown format */ + for ( j = 0 ; j < 3 ; j++ ) { + if ( regs[j] < 0 ) regs[j] = 0; + } + + /* Byte 0 is level count, not a descriptor */ + for ( j = 1 ; j < 16 ; j++ ) { + unsigned char des = dp[j]; + unsigned char dl, dh; + unsigned int cs; + + dh = des >> 4; + dl = des & 0x0F; + + /* Black magic... */ + + switch ( dh ) + { + case 0: + switch ( dl ) { + case 6: + /* L1 I cache */ + l1i += 8; + break; + case 8: + /* L1 I cache */ + l1i += 16; + break; + case 10: + /* L1 D cache */ + l1d += 8; + break; + case 12: + /* L1 D cache */ + l1d += 16; + break; + default:; + /* TLB, or unknown */ + } + break; + case 2: + if ( dl ) { + /* L3 cache */ + cs = (dl-1) << 9; + l3 += cs; + } + break; + case 4: + if ( c->x86 > 6 && dl ) { + /* P4 family */ + /* L3 cache */ + cs = 128 << (dl-1); + l3 += cs; + break; + } + /* else same as 8 - fall through */ + case 8: + if ( dl ) { + /* L2 cache */ + cs = 128 << (dl-1); + l2 += cs; + } + break; + case 6: + if (dl > 5) { + /* L1 D cache */ + cs = 8<<(dl-6); + l1d += cs; + } + break; + case 7: + if ( dl >= 8 ) + { + /* L2 cache */ + cs = 64<<(dl-8); + l2 += cs; + } else { + /* L0 I cache, count as L1 */ + cs = dl ? (16 << (dl-1)) : 12; + l1i += cs; + } + break; + default: + /* TLB, or something else we don't know about */ + break; + } + } + } + if ( l1i || l1d ) + printk(KERN_INFO "CPU: L1 I cache: %dK, L1 D cache: %dK\n", + l1i, l1d); + if ( l2 ) + printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); + if ( l3 ) + printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); + + /* + * This assumes the L3 cache is shared; it typically lives in + * the northbridge. The L1 caches are included by the L2 + * cache, and so should not be included for the purpose of + * SMP switching weights. + */ + c->x86_cache_size = l2 ? l2 : (l1i+l1d); + } + + /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */ + if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 ) + clear_bit(X86_FEATURE_SEP, &c->x86_capability); + + /* Names for the Pentium II/Celeron processors + detectable only by also checking the cache size. + Dixon is NOT a Celeron. */ + if (c->x86 == 6) { + switch (c->x86_model) { + case 5: + if (l2 == 0) + p = "Celeron (Covington)"; + if (l2 == 256) + p = "Mobile Pentium II (Dixon)"; + break; + + case 6: + if (l2 == 128) + p = "Celeron (Mendocino)"; + break; + + case 8: + if (l2 == 128) + p = "Celeron (Coppermine)"; + break; + } + } + + if ( p ) + strcpy(c->x86_model_id, p); +} + +void __init get_cpu_vendor(struct cpuinfo_x86 *c) +{ + char *v = c->x86_vendor_id; + + if (!strcmp(v, "GenuineIntel")) + c->x86_vendor = X86_VENDOR_INTEL; + else if (!strcmp(v, "AuthenticAMD")) + c->x86_vendor = X86_VENDOR_AMD; + else if (!strcmp(v, "CentaurHauls")) + c->x86_vendor = X86_VENDOR_CENTAUR; + else + c->x86_vendor = X86_VENDOR_UNKNOWN; +} + +struct cpu_model_info { + int vendor; + int family; + char *model_names[16]; +}; + +/* Naming convention should be: [()] */ +/* This table only is used unless init_() below doesn't set it; */ +/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ +static struct cpu_model_info cpu_models[] __initdata = { + { X86_VENDOR_INTEL, 6, + { "Pentium Pro A-step", "Pentium Pro", NULL, "Pentium II (Klamath)", + NULL, "Pentium II (Deschutes)", "Mobile Pentium II", + "Pentium III (Katmai)", "Pentium III (Coppermine)", NULL, + "Pentium III (Cascades)", NULL, NULL, NULL, NULL }}, + { X86_VENDOR_AMD, 6, /* Is this this really necessary?? */ + { "Athlon", "Athlon", + "Athlon", NULL, "Athlon", NULL, + NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL }} +}; + +/* Look up CPU names by table lookup. */ +static char __init *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info = cpu_models; + int i; + + if ( c->x86_model >= 16 ) + return NULL; /* Range check */ + + for ( i = 0 ; i < sizeof(cpu_models)/sizeof(struct cpu_model_info) ; i++ ) { + if ( info->vendor == c->x86_vendor && + info->family == c->x86 ) { + return info->model_names[c->x86_model]; + } + info++; + } + return NULL; /* Not found */ +} + + + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + + +/* Probe for the CPUID instruction */ +static int __init have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + + + +#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) +unsigned char eddnr; +struct edd_info edd[EDDMAXNR]; +unsigned int edd_disk80_sig; +/** + * copy_edd() - Copy the BIOS EDD information + * from empty_zero_page into a safe place. + * + */ +static inline void copy_edd(void) +{ + eddnr = EDD_NR; + memcpy(edd, EDD_BUF, sizeof(edd)); + edd_disk80_sig = DISK80_SIGNATURE_BUFFER; +} +#else +static inline void copy_edd(void) {} +#endif + +/* + * This does the hard work of actually picking apart the CPU stuff... + */ +void __init identify_cpu(struct cpuinfo_x86 *c) +{ + int junk, i; + u32 xlvl, tfms; + + c->loops_per_jiffy = loops_per_jiffy; + c->x86_cache_size = -1; + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->cpuid_level = -1; /* CPUID not detected */ + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_model_id[0] = '\0'; /* Unset */ + memset(&c->x86_capability, 0, sizeof c->x86_capability); + c->hard_math = 1; + + if ( !have_cpuid_p() ) { + panic("Processor must support CPUID\n"); + } else { + /* CPU does have CPUID */ + + /* Get vendor name */ + cpuid(0x00000000, &c->cpuid_level, + (int *)&c->x86_vendor_id[0], + (int *)&c->x86_vendor_id[8], + (int *)&c->x86_vendor_id[4]); + + get_cpu_vendor(c); + /* Initialize the standard set of capabilities */ + /* Note that the vendor-specific code below might override */ + + /* Intel-defined flags: level 0x00000001 */ + if ( c->cpuid_level >= 0x00000001 ) { + u32 capability, excap; + cpuid(0x00000001, &tfms, &junk, &excap, &capability); + c->x86_capability[0] = capability; + c->x86_capability[4] = excap; + c->x86 = (tfms >> 8) & 15; + c->x86_model = (tfms >> 4) & 15; + if (c->x86 == 0xf) { + c->x86 += (tfms >> 20) & 0xff; + c->x86_model += ((tfms >> 16) & 0xF) << 4; + } + c->x86_mask = tfms & 15; + } else { + /* Have CPUID level 0 only - unheard of */ + c->x86 = 4; + } + + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + if ( (xlvl & 0xffff0000) == 0x80000000 ) { + if ( xlvl >= 0x80000001 ) + c->x86_capability[1] = cpuid_edx(0x80000001); + if ( xlvl >= 0x80000004 ) + get_model_name(c); /* Default name */ + } + + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ( (xlvl & 0xffff0000) == 0x80860000 ) { + if ( xlvl >= 0x80860001 ) + c->x86_capability[2] = cpuid_edx(0x80860001); + } + } + + printk(KERN_DEBUG "CPU: Before vendor init, caps: %08x %08x %08x, vendor = %d\n", + c->x86_capability[0], + c->x86_capability[1], + c->x86_capability[2], + c->x86_vendor); + + /* + * Vendor-specific initialization. In this section we + * canonicalize the feature flags, meaning if there are + * features a certain CPU supports which CPUID doesn't + * tell us, CPUID claiming incorrect flags, or other bugs, + * we handle them here. + * + * At the end of this section, c->x86_capability better + * indicate the features this CPU genuinely supports! + */ + switch ( c->x86_vendor ) { + case X86_VENDOR_AMD: + init_amd(c); + break; + + case X86_VENDOR_INTEL: + init_intel(c); + break; + + case X86_VENDOR_CENTAUR: + init_centaur(c); + break; + + default: + printk("Unsupported CPU vendor (%d) -- please report!\n"); + } + + printk(KERN_DEBUG "CPU: After vendor init, caps: %08x %08x %08x %08x\n", + c->x86_capability[0], + c->x86_capability[1], + c->x86_capability[2], + c->x86_capability[3]); + + + /* If the model name is still unset, do table lookup. */ + if ( !c->x86_model_id[0] ) { + char *p; + p = table_lookup_model(c); + if ( p ) + strcpy(c->x86_model_id, p); + else + /* Last resort... */ + sprintf(c->x86_model_id, "%02x/%02x", + c->x86_vendor, c->x86_model); + } + + /* Now the feature flags better reflect actual CPU features! */ + + printk(KERN_DEBUG "CPU: After generic, caps: %08x %08x %08x %08x\n", + c->x86_capability[0], + c->x86_capability[1], + c->x86_capability[2], + c->x86_capability[3]); + + /* + * On SMP, boot_cpu_data holds the common feature set between + * all CPUs; so make sure that we indicate which features are + * common between the CPUs. The first time this routine gets + * executed, c == &boot_cpu_data. + */ + if ( c != &boot_cpu_data ) { + /* AND the already accumulated flags with these */ + for ( i = 0 ; i < NCAPINTS ; i++ ) + boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; + } + + printk(KERN_DEBUG "CPU: Common caps: %08x %08x %08x %08x\n", + boot_cpu_data.x86_capability[0], + boot_cpu_data.x86_capability[1], + boot_cpu_data.x86_capability[2], + boot_cpu_data.x86_capability[3]); +} + + +/* These need to match */ +static char *cpu_vendor_names[] __initdata = { + "Intel", "Cyrix", "AMD", "UMC", "NexGen", "Centaur", "Rise", "Transmeta" }; + + +void __init print_cpu_info(struct cpuinfo_x86 *c) +{ + char *vendor = NULL; + + if (c->x86_vendor < sizeof(cpu_vendor_names)/sizeof(char *)) + vendor = cpu_vendor_names[c->x86_vendor]; + else if (c->cpuid_level >= 0) + vendor = c->x86_vendor_id; + + if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) + printk("%s ", vendor); + + if (!c->x86_model_id[0]) + printk("%d86", c->x86); + else + printk("%s", c->x86_model_id); + + if (c->x86_mask || c->cpuid_level >= 0) + printk(" stepping %02x\n", c->x86_mask); + else + printk("\n"); +} + +/* + * Get CPU information for use by the procfs. + */ +static int show_cpuinfo(struct seq_file *m, void *v) +{ + /* + * These flag bits must match the definitions in . + * NULL means this bit is undefined or reserved; either way it doesn't + * have meaning as far as Linux is concerned. Note that it's important + * to realize there is a difference between this table and CPUID -- if + * applications want to get the raw CPUID data, they should access + * /dev/cpu//cpuid instead. + */ + static char *x86_cap_flags[] = { + /* Intel-defined */ + "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", + "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", + "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", + "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", + + /* AMD-defined */ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "mp", NULL, NULL, "mmxext", NULL, + NULL, NULL, NULL, NULL, NULL, "lm", "3dnowext", "3dnow", + + /* Transmeta-defined */ + "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Other (Linux-defined) */ + "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Intel-defined (#2) */ + "pni", NULL, NULL, "monitor", "ds_cpl", NULL, NULL, "tm2", + "est", NULL, "cid", NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* VIA/Cyrix/Centaur-defined */ + NULL, NULL, "xstore", NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + }; + struct cpuinfo_x86 *c = v; + int i, n = c - cpu_data; + int fpu_exception; + +#ifdef CONFIG_SMP + if (!(cpu_online_map & (1<x86_vendor_id[0] ? c->x86_vendor_id : "unknown", + c->x86, + c->x86_model, + c->x86_model_id[0] ? c->x86_model_id : "unknown"); + + if (c->x86_mask || c->cpuid_level >= 0) + seq_printf(m, "stepping\t: %d\n", c->x86_mask); + else + seq_printf(m, "stepping\t: unknown\n"); + + if ( test_bit(X86_FEATURE_TSC, &c->x86_capability) ) { + seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n", + cpu_khz / 1000, (cpu_khz % 1000)); + } + + /* Cache size */ + if (c->x86_cache_size >= 0) + seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); + + /* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */ + fpu_exception = c->hard_math && (ignore_irq13 || cpu_has_fpu); + seq_printf(m, "fdiv_bug\t: %s\n" + "hlt_bug\t\t: %s\n" + "f00f_bug\t: %s\n" + "coma_bug\t: %s\n" + "fpu\t\t: %s\n" + "fpu_exception\t: %s\n" + "cpuid level\t: %d\n" + "wp\t\t: %s\n" + "flags\t\t:", + c->fdiv_bug ? "yes" : "no", + c->hlt_works_ok ? "no" : "yes", + c->f00f_bug ? "yes" : "no", + c->coma_bug ? "yes" : "no", + c->hard_math ? "yes" : "no", + fpu_exception ? "yes" : "no", + c->cpuid_level, + c->wp_works_ok ? "yes" : "no"); + + for ( i = 0 ; i < 32*NCAPINTS ; i++ ) + if ( test_bit(i, &c->x86_capability) && + x86_cap_flags[i] != NULL ) + seq_printf(m, " %s", x86_cap_flags[i]); + + seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n", + c->loops_per_jiffy/(500000/HZ), + (c->loops_per_jiffy/(5000/HZ)) % 100); + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < NR_CPUS ? cpu_data + *pos : NULL; +} +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} +static void c_stop(struct seq_file *m, void *v) +{ +} +struct seq_operations cpuinfo_op = { + start: c_start, + next: c_next, + stop: c_stop, + show: show_cpuinfo, +}; + +unsigned long cpu_initialized __initdata = 0; + +/* + * cpu_init() initializes state that is per-CPU. Some data is already + * initialized (naturally) in the bootstrap process, such as the GDT + * and IDT. We reload them nevertheless, this function acts as a + * 'CPU state barrier', nothing should get across. + */ +void __init cpu_init (void) +{ + int nr = smp_processor_id(); + + if (test_and_set_bit(nr, &cpu_initialized)) { + printk(KERN_WARNING "CPU#%d already initialized!\n", nr); + for (;;) __sti(); + } + printk(KERN_INFO "Initializing CPU#%d\n", nr); + + /* + * set up and load the per-CPU TSS and LDT + */ + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + if(current->mm) + BUG(); + enter_lazy_tlb(&init_mm, current, nr); + + HYPERVISOR_stack_switch(__KERNEL_DS, current->thread.esp0); + + load_LDT(&init_mm.context); + flush_page_update_queue(); + + /* Force FPU initialization. */ + current->flags &= ~PF_USEDFPU; + current->used_math = 0; + stts(); +} + + +/****************************************************************************** + * Time-to-die callback handling. + */ + +static void die_irq(int irq, void *unused, struct pt_regs *regs) +{ + extern void ctrl_alt_del(void); + ctrl_alt_del(); +} + +static int __init setup_die_event(void) +{ + (void)request_irq(HYPEREVENT_IRQ(_EVENT_DIE), die_irq, 0, "die", NULL); + return 0; +} + +__initcall(setup_die_event); + + +/****************************************************************************** + * Stop/pickle callback handling. + */ + +#include + +static void stop_task(void *unused) +{ + /* Hmmm... a cleaner interface to suspend/resume blkdevs would be nice. */ + extern void blkdev_suspend(void); + extern void blkdev_resume(void); + + unsigned long *pfn_to_mfn_frame_list = NULL; + suspend_record_t *suspend_record = NULL; + struct net_device *dev; + char name[6]; + int i, j; + + if ( (pfn_to_mfn_frame_list = (unsigned long *)__get_free_page(GFP_KERNEL)) + == NULL ) + goto out; + if ( (suspend_record = (suspend_record_t *)__get_free_page(GFP_KERNEL)) + == NULL ) + goto out; + + suspend_record->pfn_to_mfn_frame_list = + virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT; + suspend_record->nr_pfns = max_pfn; + + j = 0; + for ( i = 0; i < max_pfn; i += (PAGE_SIZE / sizeof(unsigned long)) ) + pfn_to_mfn_frame_list[j++] = + virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT; + + /* + * NB. This is /not/ a full dev_close() as that loses route information! + * Instead we do essentialy the same as dev_close() but without notifying + * various registered subsystems about the NETDEV_DOWN event. + */ + rtnl_lock(); + for ( i = 0; i < 10; i++ ) + { + sprintf(name, "eth%d", i); + if ( ((dev = __dev_get_by_name(name)) != NULL) && + (dev->flags & IFF_UP) ) + { + dev_deactivate(dev); + clear_bit(__LINK_STATE_START, &dev->state); + if ( dev->stop != NULL ) + dev->stop(dev); + dev->flags &= ~IFF_UP; + } + } + rtnl_unlock(); + + blkdev_suspend(); + + __cli(); + + HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; + clear_fixmap(FIX_SHARED_INFO); + + memcpy(&suspend_record->resume_info, &start_info, sizeof(start_info)); + + HYPERVISOR_stop(virt_to_machine(suspend_record) >> PAGE_SHIFT); + + memcpy(&start_info, &suspend_record->resume_info, sizeof(start_info)); + + set_fixmap(FIX_SHARED_INFO, start_info.shared_info); + HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); + memset(empty_zero_page, 0, PAGE_SIZE); + + __sti(); + + blkdev_resume(); + + /* + * We now do the opposite of the network suspend code. Basically it's + * dev_open() but without notifying anyone about NETDEV_UP. + */ + rtnl_lock(); + for ( i = 0; i < 10; i++ ) + { + sprintf(name, "eth%d", i); + if ( ((dev = __dev_get_by_name(name)) != NULL) && + !(dev->flags & IFF_UP) ) + { + set_bit(__LINK_STATE_START, &dev->state); + if ( (dev->open == NULL) || (dev->open(dev) == 0) ) + { + dev->flags |= IFF_UP; + dev_activate(dev); + } + else + { + clear_bit(__LINK_STATE_START, &dev->state); + } + } + } + rtnl_unlock(); + + out: + if ( pfn_to_mfn_frame_list != NULL ) + free_page((unsigned long)pfn_to_mfn_frame_list); + if ( suspend_record != NULL ) + free_page((unsigned long)suspend_record); +} + +static struct tq_struct stop_tq; + +static void stop_irq(int irq, void *unused, struct pt_regs *regs) +{ + stop_tq.routine = stop_task; + schedule_task(&stop_tq); +} + +static int __init setup_stop_event(void) +{ + (void)request_irq(HYPEREVENT_IRQ(_EVENT_STOP), stop_irq, 0, "stop", NULL); + return 0; +} + +__initcall(setup_stop_event); + diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/signal.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/signal.c new file mode 100644 index 0000000000..f646c5c0ca --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/signal.c @@ -0,0 +1,717 @@ +/* + * linux/arch/i386/kernel/signal.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson + * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG_SIG 0 + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +int FASTCALL(do_signal(struct pt_regs *regs, sigset_t *oldset)); + +int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from) +{ + if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t))) + return -EFAULT; + if (from->si_code < 0) + return __copy_to_user(to, from, sizeof(siginfo_t)); + else { + int err; + + /* If you change siginfo_t structure, please be sure + this code is fixed accordingly. + It should never copy any pad contained in the structure + to avoid security leaks, but must copy the generic + 3 ints plus the relevant union member. */ + err = __put_user(from->si_signo, &to->si_signo); + err |= __put_user(from->si_errno, &to->si_errno); + err |= __put_user((short)from->si_code, &to->si_code); + /* First 32bits of unions are always present. */ + err |= __put_user(from->si_pid, &to->si_pid); + switch (from->si_code >> 16) { + case __SI_FAULT >> 16: + break; + case __SI_CHLD >> 16: + err |= __put_user(from->si_utime, &to->si_utime); + err |= __put_user(from->si_stime, &to->si_stime); + err |= __put_user(from->si_status, &to->si_status); + default: + err |= __put_user(from->si_uid, &to->si_uid); + break; + /* case __SI_RT: This is not generated by the kernel as of now. */ + } + return err; + } +} + +/* + * Atomically swap in the new signal mask, and wait for a signal. + */ +asmlinkage int +sys_sigsuspend(int history0, int history1, old_sigset_t mask) +{ + struct pt_regs * regs = (struct pt_regs *) &history0; + sigset_t saveset; + + mask &= _BLOCKABLE; + spin_lock_irq(¤t->sigmask_lock); + saveset = current->blocked; + siginitset(¤t->blocked, mask); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + regs->eax = -EINTR; + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal(regs, &saveset)) + return -EINTR; + } +} + +asmlinkage int +sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize) +{ + struct pt_regs * regs = (struct pt_regs *) &unewset; + sigset_t saveset, newset; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + + if (copy_from_user(&newset, unewset, sizeof(newset))) + return -EFAULT; + sigdelsetmask(&newset, ~_BLOCKABLE); + + spin_lock_irq(¤t->sigmask_lock); + saveset = current->blocked; + current->blocked = newset; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + regs->eax = -EINTR; + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal(regs, &saveset)) + return -EINTR; + } +} + +asmlinkage int +sys_sigaction(int sig, const struct old_sigaction *act, + struct old_sigaction *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + if (act) { + old_sigset_t mask; + if (verify_area(VERIFY_READ, act, sizeof(*act)) || + __get_user(new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + return -EFAULT; + __get_user(new_ka.sa.sa_flags, &act->sa_flags); + __get_user(mask, &act->sa_mask); + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || + __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + return -EFAULT; + __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + } + + return ret; +} + +asmlinkage int +sys_sigaltstack(const stack_t *uss, stack_t *uoss) +{ + struct pt_regs *regs = (struct pt_regs *) &uss; + return do_sigaltstack(uss, uoss, regs->esp); +} + + +/* + * Do a signal return; undo the signal stack. + */ + +struct sigframe +{ + char *pretcode; + int sig; + struct sigcontext sc; + struct _fpstate fpstate; + unsigned long extramask[_NSIG_WORDS-1]; + char retcode[8]; +}; + +struct rt_sigframe +{ + char *pretcode; + int sig; + struct siginfo *pinfo; + void *puc; + struct siginfo info; + struct ucontext uc; + struct _fpstate fpstate; + char retcode[8]; +}; + +static int +restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, int *peax) +{ + unsigned int err = 0; + +#define COPY(x) err |= __get_user(regs->x, &sc->x) + +#define COPY_SEG(seg) \ + { unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + regs->x##seg = tmp; } + +#define COPY_SEG_STRICT(seg) \ + { unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + regs->x##seg = tmp|3; } + +#define GET_SEG(seg) \ + { unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + loadsegment(seg,tmp); } + + GET_SEG(gs); + GET_SEG(fs); + COPY_SEG(es); + COPY_SEG(ds); + COPY(edi); + COPY(esi); + COPY(ebp); + COPY(esp); + COPY(ebx); + COPY(edx); + COPY(ecx); + COPY(eip); + COPY_SEG_STRICT(cs); + COPY_SEG_STRICT(ss); + + { + unsigned int tmpflags; + err |= __get_user(tmpflags, &sc->eflags); + regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5); + regs->orig_eax = -1; /* disable syscall checks */ + } + + { + struct _fpstate * buf; + err |= __get_user(buf, &sc->fpstate); + if (buf) { + if (verify_area(VERIFY_READ, buf, sizeof(*buf))) + goto badframe; + err |= restore_i387(buf); + } + } + + err |= __get_user(*peax, &sc->eax); + return err; + +badframe: + return 1; +} + +asmlinkage int sys_sigreturn(unsigned long __unused) +{ + struct pt_regs *regs = (struct pt_regs *) &__unused; + struct sigframe *frame = (struct sigframe *)(regs->esp - 8); + sigset_t set; + int eax; + + if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__get_user(set.sig[0], &frame->sc.oldmask) + || (_NSIG_WORDS > 1 + && __copy_from_user(&set.sig[1], &frame->extramask, + sizeof(frame->extramask)))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + if (restore_sigcontext(regs, &frame->sc, &eax)) + goto badframe; + return eax; + +badframe: + force_sig(SIGSEGV, current); + return 0; +} + +asmlinkage int sys_rt_sigreturn(unsigned long __unused) +{ + struct pt_regs *regs = (struct pt_regs *) &__unused; + struct rt_sigframe *frame = (struct rt_sigframe *)(regs->esp - 4); + sigset_t set; + stack_t st; + int eax; + + if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax)) + goto badframe; + + if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st))) + goto badframe; + /* It is more difficult to avoid calling this function than to + call it and ignore errors. */ + do_sigaltstack(&st, NULL, regs->esp); + + return eax; + +badframe: + force_sig(SIGSEGV, current); + return 0; +} + +/* + * Set up a signal frame. + */ + +static int +setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate, + struct pt_regs *regs, unsigned long mask) +{ + int tmp, err = 0; + + tmp = 0; + __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); + err |= __put_user(tmp, (unsigned int *)&sc->gs); + __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); + err |= __put_user(tmp, (unsigned int *)&sc->fs); + + err |= __put_user(regs->xes, (unsigned int *)&sc->es); + err |= __put_user(regs->xds, (unsigned int *)&sc->ds); + err |= __put_user(regs->edi, &sc->edi); + err |= __put_user(regs->esi, &sc->esi); + err |= __put_user(regs->ebp, &sc->ebp); + err |= __put_user(regs->esp, &sc->esp); + err |= __put_user(regs->ebx, &sc->ebx); + err |= __put_user(regs->edx, &sc->edx); + err |= __put_user(regs->ecx, &sc->ecx); + err |= __put_user(regs->eax, &sc->eax); + err |= __put_user(current->thread.trap_no, &sc->trapno); + err |= __put_user(current->thread.error_code, &sc->err); + err |= __put_user(regs->eip, &sc->eip); + err |= __put_user(regs->xcs, (unsigned int *)&sc->cs); + err |= __put_user(regs->eflags, &sc->eflags); + err |= __put_user(regs->esp, &sc->esp_at_signal); + err |= __put_user(regs->xss, (unsigned int *)&sc->ss); + + tmp = save_i387(fpstate); + if (tmp < 0) + err = 1; + else + err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); + + /* non-iBCS2 extensions.. */ + err |= __put_user(mask, &sc->oldmask); + err |= __put_user(current->thread.cr2, &sc->cr2); + + return err; +} + +/* + * Determine which stack to use.. + */ +static inline void * +get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) +{ + unsigned long esp; + + /* Default to using normal stack */ + esp = regs->esp; + + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa.sa_flags & SA_ONSTACK) { + if (sas_ss_flags(esp) == 0) + esp = current->sas_ss_sp + current->sas_ss_size; + } + + /* This is the legacy signal stack switching. */ + else if ((regs->xss & 0xffff) != __USER_DS && + !(ka->sa.sa_flags & SA_RESTORER) && + ka->sa.sa_restorer) { + esp = (unsigned long) ka->sa.sa_restorer; + } + + return (void *)((esp - frame_size) & -8ul); +} + +static void setup_frame(int sig, struct k_sigaction *ka, + sigset_t *set, struct pt_regs * regs) +{ + struct sigframe *frame; + int err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + err |= __put_user((current->exec_domain + && current->exec_domain->signal_invmap + && sig < 32 + ? current->exec_domain->signal_invmap[sig] + : sig), + &frame->sig); + if (err) + goto give_sigsegv; + + err |= setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); + if (err) + goto give_sigsegv; + + if (_NSIG_WORDS > 1) { + err |= __copy_to_user(frame->extramask, &set->sig[1], + sizeof(frame->extramask)); + } + if (err) + goto give_sigsegv; + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->sa.sa_flags & SA_RESTORER) { + err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); + } else { + err |= __put_user(frame->retcode, &frame->pretcode); + /* This is popl %eax ; movl $,%eax ; int $0x80 */ + err |= __put_user(0xb858, (short *)(frame->retcode+0)); + err |= __put_user(__NR_sigreturn, (int *)(frame->retcode+2)); + err |= __put_user(0x80cd, (short *)(frame->retcode+6)); + } + + if (err) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->esp = (unsigned long) frame; + regs->eip = (unsigned long) ka->sa.sa_handler; + + set_fs(USER_DS); + regs->xds = __USER_DS; + regs->xes = __USER_DS; + regs->xss = __USER_DS; + regs->xcs = __USER_CS; + regs->eflags &= ~TF_MASK; + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", + current->comm, current->pid, frame, regs->eip, frame->pretcode); +#endif + + return; + +give_sigsegv: + if (sig == SIGSEGV) + ka->sa.sa_handler = SIG_DFL; + force_sig(SIGSEGV, current); +} + +static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs * regs) +{ + struct rt_sigframe *frame; + int err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + err |= __put_user((current->exec_domain + && current->exec_domain->signal_invmap + && sig < 32 + ? current->exec_domain->signal_invmap[sig] + : sig), + &frame->sig); + err |= __put_user(&frame->info, &frame->pinfo); + err |= __put_user(&frame->uc, &frame->puc); + err |= copy_siginfo_to_user(&frame->info, info); + if (err) + goto give_sigsegv; + + /* Create the ucontext. */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->esp), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto give_sigsegv; + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->sa.sa_flags & SA_RESTORER) { + err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); + } else { + err |= __put_user(frame->retcode, &frame->pretcode); + /* This is movl $,%eax ; int $0x80 */ + err |= __put_user(0xb8, (char *)(frame->retcode+0)); + err |= __put_user(__NR_rt_sigreturn, (int *)(frame->retcode+1)); + err |= __put_user(0x80cd, (short *)(frame->retcode+5)); + } + + if (err) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->esp = (unsigned long) frame; + regs->eip = (unsigned long) ka->sa.sa_handler; + + set_fs(USER_DS); + regs->xds = __USER_DS; + regs->xes = __USER_DS; + regs->xss = __USER_DS; + regs->xcs = __USER_CS; + regs->eflags &= ~TF_MASK; + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", + current->comm, current->pid, frame, regs->eip, frame->pretcode); +#endif + + return; + +give_sigsegv: + if (sig == SIGSEGV) + ka->sa.sa_handler = SIG_DFL; + force_sig(SIGSEGV, current); +} + +/* + * OK, we're invoking a handler + */ + +static void +handle_signal(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) +{ + /* Are we from a system call? */ + if (regs->orig_eax >= 0) { + /* If so, check system call restarting.. */ + switch (regs->eax) { + case -ERESTARTNOHAND: + regs->eax = -EINTR; + break; + + case -ERESTARTSYS: + if (!(ka->sa.sa_flags & SA_RESTART)) { + regs->eax = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + regs->eax = regs->orig_eax; + regs->eip -= 2; + } + } + + /* Set up the stack frame */ + if (ka->sa.sa_flags & SA_SIGINFO) + setup_rt_frame(sig, ka, info, oldset, regs); + else + setup_frame(sig, ka, oldset, regs); + + if (ka->sa.sa_flags & SA_ONESHOT) + ka->sa.sa_handler = SIG_DFL; + + if (!(ka->sa.sa_flags & SA_NODEFER)) { + spin_lock_irq(¤t->sigmask_lock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigaddset(¤t->blocked,sig); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + } +} + +/* + * Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + */ +int do_signal(struct pt_regs *regs, sigset_t *oldset) +{ + siginfo_t info; + struct k_sigaction *ka; + + /* + * We want the common case to go fast, which + * is why we may in certain cases get here from + * kernel mode. Just return without doing anything + * if so. + */ + if ((regs->xcs & 2) != 2) + return 1; + + if (!oldset) + oldset = ¤t->blocked; + + for (;;) { + unsigned long signr; + + spin_lock_irq(¤t->sigmask_lock); + signr = dequeue_signal(¤t->blocked, &info); + spin_unlock_irq(¤t->sigmask_lock); + + if (!signr) + break; + + if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { + /* Let the debugger run. */ + current->exit_code = signr; + current->state = TASK_STOPPED; + notify_parent(current, SIGCHLD); + schedule(); + + /* We're back. Did the debugger cancel the sig? */ + if (!(signr = current->exit_code)) + continue; + current->exit_code = 0; + + /* The debugger continued. Ignore SIGSTOP. */ + if (signr == SIGSTOP) + continue; + + /* Update the siginfo structure. Is this good? */ + if (signr != info.si_signo) { + info.si_signo = signr; + info.si_errno = 0; + info.si_code = SI_USER; + info.si_pid = current->p_pptr->pid; + info.si_uid = current->p_pptr->uid; + } + + /* If the (new) signal is now blocked, requeue it. */ + if (sigismember(¤t->blocked, signr)) { + send_sig_info(signr, &info, current); + continue; + } + } + + ka = ¤t->sig->action[signr-1]; + if (ka->sa.sa_handler == SIG_IGN) { + if (signr != SIGCHLD) + continue; + /* Check for SIGCHLD: it's special. */ + while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) + /* nothing */; + continue; + } + + if (ka->sa.sa_handler == SIG_DFL) { + int exit_code = signr; + + /* Init gets no signals it doesn't want. */ + if (current->pid == 1) + continue; + + switch (signr) { + case SIGCONT: case SIGCHLD: case SIGWINCH: case SIGURG: + continue; + + case SIGTSTP: case SIGTTIN: case SIGTTOU: + if (is_orphaned_pgrp(current->pgrp)) + continue; + /* FALLTHRU */ + + case SIGSTOP: { + struct signal_struct *sig; + current->state = TASK_STOPPED; + current->exit_code = signr; + sig = current->p_pptr->sig; + if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) + notify_parent(current, SIGCHLD); + schedule(); + continue; + } + + case SIGQUIT: case SIGILL: case SIGTRAP: + case SIGABRT: case SIGFPE: case SIGSEGV: + case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ: + if (do_coredump(signr, regs)) + exit_code |= 0x80; + /* FALLTHRU */ + + default: + sig_exit(signr, exit_code, &info); + /* NOTREACHED */ + } + } + + /* Reenable any watchpoints before delivering the + * signal to user space. The processor register will + * have been cleared if the watchpoint triggered + * inside the kernel. + */ + if ( current->thread.debugreg[7] != 0 ) + HYPERVISOR_set_debugreg(7, current->thread.debugreg[7]); + + /* Whee! Actually deliver the signal. */ + handle_signal(signr, ka, &info, oldset, regs); + return 1; + } + + /* Did we come from a system call? */ + if (regs->orig_eax >= 0) { + /* Restart the system call - no handlers present */ + if (regs->eax == -ERESTARTNOHAND || + regs->eax == -ERESTARTSYS || + regs->eax == -ERESTARTNOINTR) { + regs->eax = regs->orig_eax; + regs->eip -= 2; + } + } + return 0; +} diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/time.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/time.c new file mode 100644 index 0000000000..bff0f26b4f --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/time.c @@ -0,0 +1,654 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2002-2003 - Keir Fraser - University of Cambridge + **************************************************************************** + * + * File: arch/xen/kernel/time.c + * Author: Rolf Neugebauer and Keir Fraser + * + * Description: Interface with Xen to get correct notion of time + */ + +/* + * linux/arch/i386/kernel/time.c + * + * Copyright (C) 1991, 1992, 1995 Linus Torvalds + * + * This file contains the PC-specific time handling details: + * reading the RTC at bootup, etc.. + * 1994-07-02 Alan Modra + * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime + * 1995-03-26 Markus Kuhn + * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887 + * precision CMOS clock update + * 1996-05-03 Ingo Molnar + * fixed time warps in do_[slow|fast]_gettimeoffset() + * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 + * "A Kernel Model for Precision Timekeeping" by Dave Mills + * 1998-09-05 (Various) + * More robust do_fast_gettimeoffset() algorithm implemented + * (works with APM, Cyrix 6x86MX and Centaur C6), + * monotonic gettimeofday() with fast_get_timeoffset(), + * drift-proof precision TSC calibration on boot + * (C. Scott Ananian , Andrew D. + * Balsa , Philip Gladstone ; + * ported from 2.0.35 Jumbo-9 by Michael Krause ). + * 1998-12-16 Andrea Arcangeli + * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy + * because was not accounting lost_ticks. + * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli + * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to + * serialize accesses to xtime/lost_ticks). + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; +extern rwlock_t xtime_lock; +extern unsigned long wall_jiffies; + +unsigned long cpu_khz; /* get this from Xen, used elsewhere */ + +static unsigned int rdtsc_bitshift; +static u32 st_scale_f; /* convert ticks -> usecs */ +static u32 st_scale_i; /* convert ticks -> usecs */ + +/* These are peridically updated in shared_info, and then copied here. */ +static u32 shadow_tsc_stamp; +static u64 shadow_system_time; +static u32 shadow_time_version; +static struct timeval shadow_tv; + +/* + * We use this to ensure that gettimeofday() is monotonically increasing. We + * only break this guarantee if the wall clock jumps backwards "a long way". + */ +static struct timeval last_seen_tv = {0,0}; + +#ifdef CONFIG_XEN_PRIVILEGED_GUEST +/* Periodically propagate synchronised time base to the RTC and to Xen. */ +static long last_update_to_rtc, last_update_to_xen; +#endif + +/* Periodically take synchronised time base from Xen, if we need it. */ +static long last_update_from_xen; /* UTC seconds when last read Xen clock. */ + +/* Keep track of last time we did processing/updating of jiffies and xtime. */ +static u64 processed_system_time; /* System time (ns) at last processing. */ + +#define NS_PER_TICK (1000000000ULL/HZ) + +#define HANDLE_USEC_UNDERFLOW(_tv) \ + do { \ + while ( (_tv).tv_usec < 0 ) \ + { \ + (_tv).tv_usec += 1000000; \ + (_tv).tv_sec--; \ + } \ + } while ( 0 ) +#define HANDLE_USEC_OVERFLOW(_tv) \ + do { \ + while ( (_tv).tv_usec >= 1000000 ) \ + { \ + (_tv).tv_usec -= 1000000; \ + (_tv).tv_sec++; \ + } \ + } while ( 0 ) + + +/* Does this guest OS track Xen time, or set its wall clock independently? */ +static int independent_wallclock = 0; +static int __init __independent_wallclock(char *str) +{ + independent_wallclock = 1; + return 1; +} +__setup("independent_wallclock", __independent_wallclock); + + +#ifdef CONFIG_XEN_PRIVILEGED_GUEST +/* + * In order to set the CMOS clock precisely, set_rtc_mmss has to be + * called 500 ms after the second nowtime has started, because when + * nowtime is written into the registers of the CMOS clock, it will + * jump to the next second precisely 500 ms later. Check the Motorola + * MC146818A or Dallas DS12887 data sheet for details. + * + * BUG: This routine does not handle hour overflow properly; it just + * sets the minutes. Usually you'll only notice that after reboot! + */ +static int set_rtc_mmss(unsigned long nowtime) +{ + int retval = 0; + int real_seconds, real_minutes, cmos_minutes; + unsigned char save_control, save_freq_select; + + /* gets recalled with irq locally disabled */ + spin_lock(&rtc_lock); + save_control = CMOS_READ(RTC_CONTROL); + CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); + + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); + CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + + cmos_minutes = CMOS_READ(RTC_MINUTES); + if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD ) + BCD_TO_BIN(cmos_minutes); + + /* + * since we're only adjusting minutes and seconds, don't interfere with + * hour overflow. This avoids messing with unknown time zones but requires + * your RTC not to be off by more than 15 minutes + */ + real_seconds = nowtime % 60; + real_minutes = nowtime / 60; + if ( ((abs(real_minutes - cmos_minutes) + 15)/30) & 1 ) + real_minutes += 30; /* correct for half hour time zone */ + real_minutes %= 60; + + if ( abs(real_minutes - cmos_minutes) < 30 ) + { + if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD ) + { + BIN_TO_BCD(real_seconds); + BIN_TO_BCD(real_minutes); + } + CMOS_WRITE(real_seconds,RTC_SECONDS); + CMOS_WRITE(real_minutes,RTC_MINUTES); + } + else + { + printk(KERN_WARNING + "set_rtc_mmss: can't update from %d to %d\n", + cmos_minutes, real_minutes); + retval = -1; + } + + /* The following flags have to be released exactly in this order, + * otherwise the DS12887 (popular MC146818A clone with integrated + * battery and quartz) will not reset the oscillator and will not + * update precisely 500 ms later. You won't find this mentioned in + * the Dallas Semiconductor data sheets, but who believes data + * sheets anyway ... -- Markus Kuhn + */ + CMOS_WRITE(save_control, RTC_CONTROL); + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + spin_unlock(&rtc_lock); + + return retval; +} +#endif + + +/* + * Reads a consistent set of time-base values from Xen, into a shadow data + * area. Must be called with the xtime_lock held for writing. + */ +static void __get_time_values_from_xen(void) +{ + do { + shadow_time_version = HYPERVISOR_shared_info->time_version2; + rmb(); + shadow_tv.tv_sec = HYPERVISOR_shared_info->wc_sec; + shadow_tv.tv_usec = HYPERVISOR_shared_info->wc_usec; + shadow_tsc_stamp = HYPERVISOR_shared_info->tsc_timestamp.tsc_bits; + shadow_system_time = HYPERVISOR_shared_info->system_time; + rmb(); + } + while ( shadow_time_version != HYPERVISOR_shared_info->time_version1 ); +} + +#define TIME_VALUES_UP_TO_DATE \ + (shadow_time_version == HYPERVISOR_shared_info->time_version2) + + +/* + * Returns the system time elapsed, in ns, since the current shadow_timestamp + * was calculated. Must be called with the xtime_lock held for reading. + */ +static inline unsigned long __get_time_delta_usecs(void) +{ + s32 delta_tsc; + u32 low; + u64 delta, tsc; + + rdtscll(tsc); + low = (u32)(tsc >> rdtsc_bitshift); + delta_tsc = (s32)(low - shadow_tsc_stamp); + if ( unlikely(delta_tsc < 0) ) delta_tsc = 0; + delta = ((u64)delta_tsc * st_scale_f); + delta >>= 32; + delta += ((u64)delta_tsc * st_scale_i); + + return (unsigned long)delta; +} + + +/* + * Returns the current time-of-day in UTC timeval format. + */ +void do_gettimeofday(struct timeval *tv) +{ + unsigned long flags, lost; + struct timeval _tv; + + again: + read_lock_irqsave(&xtime_lock, flags); + + _tv.tv_usec = __get_time_delta_usecs(); + if ( (lost = (jiffies - wall_jiffies)) != 0 ) + _tv.tv_usec += lost * (1000000 / HZ); + _tv.tv_sec = xtime.tv_sec; + _tv.tv_usec += xtime.tv_usec; + + if ( unlikely(!TIME_VALUES_UP_TO_DATE) ) + { + /* + * We may have blocked for a long time, rendering our calculations + * invalid (e.g. the time delta may have overflowed). Detect that + * and recalculate with fresh values. + */ + read_unlock_irqrestore(&xtime_lock, flags); + write_lock_irqsave(&xtime_lock, flags); + __get_time_values_from_xen(); + write_unlock_irqrestore(&xtime_lock, flags); + goto again; + } + + HANDLE_USEC_OVERFLOW(_tv); + + /* Ensure that time-of-day is monotonically increasing. */ + if ( (_tv.tv_sec < last_seen_tv.tv_sec) || + ((_tv.tv_sec == last_seen_tv.tv_sec) && + (_tv.tv_usec < last_seen_tv.tv_usec)) ) + _tv = last_seen_tv; + last_seen_tv = _tv; + + read_unlock_irqrestore(&xtime_lock, flags); + + *tv = _tv; +} + + +/* + * Sets the current time-of-day based on passed-in UTC timeval parameter. + */ +void do_settimeofday(struct timeval *tv) +{ + struct timeval newtv; + + if ( !independent_wallclock && !(start_info.flags & SIF_INITDOMAIN) ) + return; + + write_lock_irq(&xtime_lock); + + /* + * Ensure we don't get blocked for a long time so that our time delta + * overflows. If that were to happen then our shadow time values would + * be stale, so we can retry with fresh ones. + */ + again: + tv->tv_usec -= __get_time_delta_usecs(); + if ( unlikely(!TIME_VALUES_UP_TO_DATE) ) + { + __get_time_values_from_xen(); + goto again; + } + + HANDLE_USEC_UNDERFLOW(*tv); + + newtv = *tv; + + tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ); + HANDLE_USEC_UNDERFLOW(*tv); + + xtime = *tv; + time_adjust = 0; /* stop active adjtime() */ + time_status |= STA_UNSYNC; + time_maxerror = NTP_PHASE_LIMIT; + time_esterror = NTP_PHASE_LIMIT; + + /* Reset all our running time counts. They make no sense now. */ + last_seen_tv.tv_sec = 0; + last_update_from_xen = 0; + +#ifdef CONFIG_XEN_PRIVILEGED_GUEST + if ( start_info.flags & SIF_INITDOMAIN ) + { + dom0_op_t op; + last_update_to_rtc = last_update_to_xen = 0; + op.cmd = DOM0_SETTIME; + op.u.settime.secs = newtv.tv_sec; + op.u.settime.usecs = newtv.tv_usec; + op.u.settime.system_time = shadow_system_time; + write_unlock_irq(&xtime_lock); + HYPERVISOR_dom0_op(&op); + } + else +#endif + { + write_unlock_irq(&xtime_lock); + } +} + + +asmlinkage long sys_stime(int *tptr) +{ + int value; + struct timeval tv; + + if ( !capable(CAP_SYS_TIME) ) + return -EPERM; + + if ( get_user(value, tptr) ) + return -EFAULT; + + tv.tv_sec = value; + tv.tv_usec = 0; + + do_settimeofday(&tv); + + return 0; +} + + +/* Convert jiffies to system time. Call with xtime_lock held for reading. */ +static inline u64 __jiffies_to_st(unsigned long j) +{ + return processed_system_time + ((j - jiffies) * NS_PER_TICK); +} + + +static inline void do_timer_interrupt(int irq, void *dev_id, + struct pt_regs *regs) +{ + s64 delta; + unsigned long ticks = 0; + long sec_diff; + + __get_time_values_from_xen(); + + if ( (delta = (s64)(shadow_system_time - processed_system_time)) < 0 ) + { + printk("Timer ISR: Time went backwards: %lld\n", delta); + return; + } + + /* Process elapsed jiffies since last call. */ + while ( delta >= NS_PER_TICK ) + { + ticks++; + delta -= NS_PER_TICK; + processed_system_time += NS_PER_TICK; + } + + if ( ticks != 0 ) + { + do_timer_ticks(ticks); + + if ( user_mode(regs) ) + update_process_times_us(ticks, 0); + else + update_process_times_us(0, ticks); + } + + /* + * Take synchronised time from Xen once a minute if we're not + * synchronised ourselves, and we haven't chosen to keep an independent + * time base. + */ + if ( !independent_wallclock && + ((time_status & STA_UNSYNC) != 0) && + (xtime.tv_sec > (last_update_from_xen + 60)) ) + { + /* Adjust shadow timeval for jiffies that haven't updated xtime yet. */ + shadow_tv.tv_usec -= (jiffies - wall_jiffies) * (1000000/HZ); + HANDLE_USEC_UNDERFLOW(shadow_tv); + + /* + * Reset our running time counts if they are invalidated by a warp + * backwards of more than 500ms. + */ + sec_diff = xtime.tv_sec - shadow_tv.tv_sec; + if ( unlikely(abs(sec_diff) > 1) || + unlikely(((sec_diff * 1000000) + + xtime.tv_usec - shadow_tv.tv_usec) > 500000) ) + { + last_update_to_rtc = last_update_to_xen = 0; + last_seen_tv.tv_sec = 0; + } + + /* Update our unsynchronised xtime appropriately. */ + xtime = shadow_tv; + + last_update_from_xen = xtime.tv_sec; + } + +#ifdef CONFIG_XEN_PRIVILEGED_GUEST + if ( (start_info.flags & SIF_INITDOMAIN) && + ((time_status & STA_UNSYNC) == 0) ) + { + /* Send synchronised time to Xen approximately every minute. */ + if ( xtime.tv_sec > (last_update_to_xen + 60) ) + { + dom0_op_t op; + struct timeval tv = xtime; + + tv.tv_usec += (jiffies - wall_jiffies) * (1000000/HZ); + HANDLE_USEC_OVERFLOW(tv); + + op.cmd = DOM0_SETTIME; + op.u.settime.secs = tv.tv_sec; + op.u.settime.usecs = tv.tv_usec; + op.u.settime.system_time = shadow_system_time; + HYPERVISOR_dom0_op(&op); + + last_update_to_xen = xtime.tv_sec; + } + + /* + * If we have an externally synchronized Linux clock, then update CMOS + * clock accordingly every ~11 minutes. Set_rtc_mmss() has to be called + * as close as possible to 500 ms before the new second starts. + */ + if ( (xtime.tv_sec > (last_update_to_rtc + 660)) && + (xtime.tv_usec >= (500000 - ((unsigned) tick) / 2)) && + (xtime.tv_usec <= (500000 + ((unsigned) tick) / 2)) ) + { + if ( set_rtc_mmss(xtime.tv_sec) == 0 ) + last_update_to_rtc = xtime.tv_sec; + else + last_update_to_rtc = xtime.tv_sec - 600; + } + } +#endif +} + + +static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + write_lock(&xtime_lock); + while ( !TIME_VALUES_UP_TO_DATE ) + do_timer_interrupt(irq, NULL, regs); + write_unlock(&xtime_lock); +} + +static struct irqaction irq_timer = { + timer_interrupt, + SA_INTERRUPT, + 0, + "timer", + NULL, + NULL +}; + + +/* + * This function works out when the the next timer function has to be + * executed (by looking at the timer list) and sets the Xen one-shot + * domain timer to the appropriate value. This is typically called in + * cpu_idle() before the domain blocks. + * + * The function returns a non-0 value on error conditions. + * + * It must be called with interrupts disabled. + */ +extern spinlock_t timerlist_lock; +int set_timeout_timer(void) +{ + struct timer_list *timer; + u64 alarm = 0; + int ret = 0; + + spin_lock(&timerlist_lock); + + /* + * This is safe against long blocking (since calculations are not based on + * TSC deltas). It is also safe against warped system time since + * suspend-resume is cooperative and we would first get locked out. It is + * safe against normal updates of jiffies since interrupts are off. + */ + if ( (timer = next_timer_event()) != NULL ) + alarm = __jiffies_to_st(timer->expires); + + /* Tasks on the timer task queue expect to be executed on the next tick. */ + if ( TQ_ACTIVE(tq_timer) ) + alarm = __jiffies_to_st(jiffies + 1); + + /* Failure is pretty bad, but we'd best soldier on. */ + if ( HYPERVISOR_set_timer_op(alarm) != 0 ) + ret = -1; + + spin_unlock(&timerlist_lock); + + return ret; +} + + +/* Time debugging. */ +static void dbg_time_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + unsigned long flags, j; + u64 s_now, j_st; + struct timeval s_tv, tv; + + struct timer_list *timer; + u64 t_st; + + read_lock_irqsave(&xtime_lock, flags); + s_tv.tv_sec = shadow_tv.tv_sec; + s_tv.tv_usec = shadow_tv.tv_usec; + s_now = shadow_system_time; + read_unlock_irqrestore(&xtime_lock, flags); + + do_gettimeofday(&tv); + + j = jiffies; + j_st = __jiffies_to_st(j); + + timer = next_timer_event(); + t_st = __jiffies_to_st(timer->expires); + + printk(KERN_ALERT "time: shadow_st=0x%X:%08X\n", + (u32)(s_now>>32), (u32)s_now); + printk(KERN_ALERT "time: wct=%lds %ldus shadow_wct=%lds %ldus\n", + tv.tv_sec, tv.tv_usec, s_tv.tv_sec, s_tv.tv_usec); + printk(KERN_ALERT "time: jiffies=%lu(0x%X:%08X) timeout=%lu(0x%X:%08X)\n", + jiffies,(u32)(j_st>>32), (u32)j_st, + timer->expires,(u32)(t_st>>32), (u32)t_st); + printk(KERN_ALERT "time: processed_system_time=0x%X:%08X\n", + (u32)(processed_system_time>>32), (u32)processed_system_time); +} + +static struct irqaction dbg_time = { + dbg_time_int, + SA_SHIRQ, + 0, + "timer_dbg", + &dbg_time_int, + NULL +}; + +void __init time_init(void) +{ + unsigned long long alarm; + u64 __cpu_khz, cpu_freq, scale, scale2; + + __cpu_khz = HYPERVISOR_shared_info->cpu_freq; + do_div(__cpu_khz, 1000); + cpu_khz = (u32)__cpu_khz; + printk("Xen reported: %lu.%03lu MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); + + xtime.tv_sec = HYPERVISOR_shared_info->wc_sec; + xtime.tv_usec = HYPERVISOR_shared_info->wc_usec; + processed_system_time = shadow_system_time; + + rdtsc_bitshift = HYPERVISOR_shared_info->tsc_timestamp.tsc_bitshift; + cpu_freq = HYPERVISOR_shared_info->cpu_freq; + + scale = 1000000LL << (32 + rdtsc_bitshift); + do_div(scale, (u32)cpu_freq); + + if ( (cpu_freq >> 32) != 0 ) + { + scale2 = 1000000LL << rdtsc_bitshift; + do_div(scale2, (u32)(cpu_freq>>32)); + scale += scale2; + } + + st_scale_f = scale & 0xffffffff; + st_scale_i = scale >> 32; + + __get_time_values_from_xen(); + processed_system_time = shadow_system_time; + + (void)setup_irq(HYPEREVENT_IRQ(_EVENT_TIMER), &irq_timer); + + (void)setup_irq(HYPEREVENT_IRQ(_EVENT_DEBUG), &dbg_time); + + rdtscll(alarm); + + clear_bit(_EVENT_TIMER, &HYPERVISOR_shared_info->events); +} + + +/* + * /proc/sys/xen: This really belongs in another file. It can stay here for + * now however. + */ +static ctl_table xen_subtable[] = { + {1, "independent_wallclock", &independent_wallclock, + sizeof(independent_wallclock), 0644, NULL, proc_dointvec}, + {0} +}; +static ctl_table xen_table[] = { + {123, "xen", NULL, 0, 0555, xen_subtable}, + {0} +}; +static int __init xen_sysctl_init(void) +{ + (void)register_sysctl_table(xen_table, 0); + return 0; +} +__initcall(xen_sysctl_init); diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/traps.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/traps.c new file mode 100644 index 0000000000..63288fc282 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/traps.c @@ -0,0 +1,684 @@ +/* + * linux/arch/i386/traps.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + */ + +/* + * 'Traps.c' handles hardware traps and faults after we have saved some + * state in 'asm.s'. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include + +asmlinkage int system_call(void); +asmlinkage void lcall7(void); +asmlinkage void lcall27(void); + +asmlinkage void divide_error(void); +asmlinkage void debug(void); +asmlinkage void int3(void); +asmlinkage void overflow(void); +asmlinkage void bounds(void); +asmlinkage void invalid_op(void); +asmlinkage void device_not_available(void); +asmlinkage void double_fault(void); +asmlinkage void coprocessor_segment_overrun(void); +asmlinkage void invalid_TSS(void); +asmlinkage void segment_not_present(void); +asmlinkage void stack_segment(void); +asmlinkage void general_protection(void); +asmlinkage void page_fault(void); +asmlinkage void safe_page_fault(void); +asmlinkage void coprocessor_error(void); +asmlinkage void simd_coprocessor_error(void); +asmlinkage void alignment_check(void); +asmlinkage void spurious_interrupt_bug(void); +asmlinkage void machine_check(void); + +int kstack_depth_to_print = 24; + + +/* + * If the address is either in the .text section of the + * kernel, or in the vmalloc'ed module regions, it *may* + * be the address of a calling routine + */ + +#ifdef CONFIG_MODULES + +extern struct module *module_list; +extern struct module kernel_module; + +static inline int kernel_text_address(unsigned long addr) +{ + int retval = 0; + struct module *mod; + + if (addr >= (unsigned long) &_stext && + addr <= (unsigned long) &_etext) + return 1; + + for (mod = module_list; mod != &kernel_module; mod = mod->next) { + /* mod_bound tests for addr being inside the vmalloc'ed + * module area. Of course it'd be better to test only + * for the .text subset... */ + if (mod_bound(addr, 0, mod)) { + retval = 1; + break; + } + } + + return retval; +} + +#else + +static inline int kernel_text_address(unsigned long addr) +{ + return (addr >= (unsigned long) &_stext && + addr <= (unsigned long) &_etext); +} + +#endif + +void show_trace(unsigned long * stack) +{ + int i; + unsigned long addr; + + if (!stack) + stack = (unsigned long*)&stack; + + printk("Call Trace: "); + i = 1; + while (((long) stack & (THREAD_SIZE-1)) != 0) { + addr = *stack++; + if (kernel_text_address(addr)) { + if (i && ((i % 6) == 0)) + printk("\n "); + printk("[<%08lx>] ", addr); + i++; + } + } + printk("\n"); +} + +void show_trace_task(struct task_struct *tsk) +{ + unsigned long esp = tsk->thread.esp; + + /* User space on another CPU? */ + if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) + return; + show_trace((unsigned long *)esp); +} + +void show_stack(unsigned long * esp) +{ + unsigned long *stack; + int i; + + // debugging aid: "show_stack(NULL);" prints the + // back trace for this cpu. + + if(esp==NULL) + esp=(unsigned long*)&esp; + + stack = esp; + for(i=0; i < kstack_depth_to_print; i++) { + if (((long) stack & (THREAD_SIZE-1)) == 0) + break; + if (i && ((i % 8) == 0)) + printk("\n "); + printk("%08lx ", *stack++); + } + printk("\n"); + show_trace(esp); +} + +void show_registers(struct pt_regs *regs) +{ + int in_kernel = 1; + unsigned long esp; + unsigned short ss; + + esp = (unsigned long) (®s->esp); + ss = __KERNEL_DS; + if (regs->xcs & 2) { + in_kernel = 0; + esp = regs->esp; + ss = regs->xss & 0xffff; + } + printk(KERN_ALERT "CPU: %d\n", smp_processor_id() ); + printk(KERN_ALERT "EIP: %04x:[<%08lx>] %s\n", + 0xffff & regs->xcs, regs->eip, print_tainted()); + printk(KERN_ALERT "EFLAGS: %08lx\n",regs->eflags); + printk(KERN_ALERT "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + regs->eax, regs->ebx, regs->ecx, regs->edx); + printk(KERN_ALERT "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + regs->esi, regs->edi, regs->ebp, esp); + printk(KERN_ALERT "ds: %04x es: %04x ss: %04x\n", + regs->xds & 0xffff, regs->xes & 0xffff, ss); + printk(KERN_ALERT "Process %s (pid: %d, stackpage=%08lx)", + current->comm, current->pid, 4096+(unsigned long)current); + /* + * When in-kernel, we also print out the stack and code at the + * time of the fault.. + */ + if (in_kernel) { + + printk(KERN_ALERT "\nStack: "); + show_stack((unsigned long*)esp); + +#if 0 + { + int i; + printk(KERN_ALERT "\nCode: "); + if(regs->eip < PAGE_OFFSET) + goto bad; + + for(i=0;i<20;i++) + { + unsigned char c; + if(__get_user(c, &((unsigned char*)regs->eip)[i])) { +bad: + printk(KERN_ALERT " Bad EIP value."); + break; + } + printk("%02x ", c); + } + } +#endif + } + printk(KERN_ALERT "\n"); +} + +spinlock_t die_lock = SPIN_LOCK_UNLOCKED; + +void die(const char * str, struct pt_regs * regs, long err) +{ + console_verbose(); + spin_lock_irq(&die_lock); + bust_spinlocks(1); + printk("%s: %04lx\n", str, err & 0xffff); + show_registers(regs); + bust_spinlocks(0); + spin_unlock_irq(&die_lock); + do_exit(SIGSEGV); +} + +static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) +{ + if (!(2 & regs->xcs)) + die(str, regs, err); +} + + +static void inline do_trap(int trapnr, int signr, char *str, + struct pt_regs * regs, long error_code, + siginfo_t *info) +{ + if (!(regs->xcs & 2)) + goto kernel_trap; + + /*trap_signal:*/ { + struct task_struct *tsk = current; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = trapnr; + if (info) + force_sig_info(signr, info, tsk); + else + force_sig(signr, tsk); + return; + } + + kernel_trap: { + unsigned long fixup = search_exception_table(regs->eip); + if (fixup) + regs->eip = fixup; + else + die(str, regs, error_code); + return; + } +} + +#define DO_ERROR(trapnr, signr, str, name) \ +asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +{ \ + do_trap(trapnr, signr, str, regs, error_code, NULL); \ +} + +#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ +asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +{ \ + siginfo_t info; \ + info.si_signo = signr; \ + info.si_errno = 0; \ + info.si_code = sicode; \ + info.si_addr = (void *)siaddr; \ + do_trap(trapnr, signr, str, regs, error_code, &info); \ +} + +DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip) +DO_ERROR( 3, SIGTRAP, "int3", int3) +DO_ERROR( 4, SIGSEGV, "overflow", overflow) +DO_ERROR( 5, SIGSEGV, "bounds", bounds) +DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) +DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) +DO_ERROR( 8, SIGSEGV, "double fault", double_fault) +DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) +DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) +DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) +DO_ERROR(12, SIGBUS, "stack segment", stack_segment) +DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) +DO_ERROR(18, SIGBUS, "machine check", machine_check) + +asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) +{ + /* + * If we trapped on an LDT access then ensure that the default_ldt is + * loaded, if nothing else. We load default_ldt lazily because LDT + * switching costs time and many applications don't need it. + */ + if ( unlikely((error_code & 6) == 4) ) + { + unsigned long ldt; + __asm__ __volatile__ ( "sldt %0" : "=r" (ldt) ); + if ( ldt == 0 ) + { + mmu_update_t u; + u.ptr = MMU_EXTENDED_COMMAND; + u.ptr |= (unsigned long)&default_ldt[0]; + u.val = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT); + HYPERVISOR_mmu_update(&u, 1); + return; + } + } + + if (!(regs->xcs & 2)) + goto gp_in_kernel; + + current->thread.error_code = error_code; + current->thread.trap_no = 13; + force_sig(SIGSEGV, current); + return; + +gp_in_kernel: + { + unsigned long fixup; + fixup = search_exception_table(regs->eip); + if (fixup) { + regs->eip = fixup; + return; + } + die("general protection fault", regs, error_code); + } +} + + +asmlinkage void do_debug(struct pt_regs * regs, long error_code) +{ + unsigned int condition; + struct task_struct *tsk = current; + siginfo_t info; + + condition = HYPERVISOR_get_debugreg(6); + + /* Mask out spurious debug traps due to lazy DR7 setting */ + if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { + if (!tsk->thread.debugreg[7]) + goto clear_dr7; + } + + /* Save debug status register where ptrace can see it */ + tsk->thread.debugreg[6] = condition; + + /* Mask out spurious TF errors due to lazy TF clearing */ + if (condition & DR_STEP) { + /* + * The TF error should be masked out only if the current + * process is not traced and if the TRAP flag has been set + * previously by a tracing process (condition detected by + * the PT_DTRACE flag); remember that the i386 TRAP flag + * can be modified by the process itself in user mode, + * allowing programs to debug themselves without the ptrace() + * interface. + */ + if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) + goto clear_TF; + } + + /* Ok, finally something we can handle */ + tsk->thread.trap_no = 1; + tsk->thread.error_code = error_code; + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_BRKPT; + + /* If this is a kernel mode trap, save the user PC on entry to + * the kernel, that's what the debugger can make sense of. + */ + info.si_addr = ((regs->xcs & 2) == 0) ? (void *)tsk->thread.eip : + (void *)regs->eip; + force_sig_info(SIGTRAP, &info, tsk); + + /* Disable additional traps. They'll be re-enabled when + * the signal is delivered. + */ + clear_dr7: + HYPERVISOR_set_debugreg(7, 0); + return; + + clear_TF: + regs->eflags &= ~TF_MASK; + return; +} + + +/* + * Note that we play around with the 'TS' bit in an attempt to get + * the correct behaviour even in the presence of the asynchronous + * IRQ13 behaviour + */ +void math_error(void *eip) +{ + struct task_struct * task; + siginfo_t info; + unsigned short cwd, swd; + + /* + * Save the info for the exception handler and clear the error. + */ + task = current; + save_init_fpu(task); + task->thread.trap_no = 16; + task->thread.error_code = 0; + info.si_signo = SIGFPE; + info.si_errno = 0; + info.si_code = __SI_FAULT; + info.si_addr = eip; + /* + * (~cwd & swd) will mask out exceptions that are not set to unmasked + * status. 0x3f is the exception bits in these regs, 0x200 is the + * C1 reg you need in case of a stack fault, 0x040 is the stack + * fault bit. We should only be taking one exception at a time, + * so if this combination doesn't produce any single exception, + * then we have a bad program that isn't syncronizing its FPU usage + * and it will suffer the consequences since we won't be able to + * fully reproduce the context of the exception + */ + cwd = get_fpu_cwd(task); + swd = get_fpu_swd(task); + switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) { + case 0x000: + default: + break; + case 0x001: /* Invalid Op */ + case 0x040: /* Stack Fault */ + case 0x240: /* Stack Fault | Direction */ + info.si_code = FPE_FLTINV; + break; + case 0x002: /* Denormalize */ + case 0x010: /* Underflow */ + info.si_code = FPE_FLTUND; + break; + case 0x004: /* Zero Divide */ + info.si_code = FPE_FLTDIV; + break; + case 0x008: /* Overflow */ + info.si_code = FPE_FLTOVF; + break; + case 0x020: /* Precision */ + info.si_code = FPE_FLTRES; + break; + } + force_sig_info(SIGFPE, &info, task); +} + +asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code) +{ + ignore_irq13 = 1; + math_error((void *)regs->eip); +} + +void simd_math_error(void *eip) +{ + struct task_struct * task; + siginfo_t info; + unsigned short mxcsr; + + /* + * Save the info for the exception handler and clear the error. + */ + task = current; + save_init_fpu(task); + task->thread.trap_no = 19; + task->thread.error_code = 0; + info.si_signo = SIGFPE; + info.si_errno = 0; + info.si_code = __SI_FAULT; + info.si_addr = eip; + /* + * The SIMD FPU exceptions are handled a little differently, as there + * is only a single status/control register. Thus, to determine which + * unmasked exception was caught we must mask the exception mask bits + * at 0x1f80, and then use these to mask the exception bits at 0x3f. + */ + mxcsr = get_fpu_mxcsr(task); + switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { + case 0x000: + default: + break; + case 0x001: /* Invalid Op */ + info.si_code = FPE_FLTINV; + break; + case 0x002: /* Denormalize */ + case 0x010: /* Underflow */ + info.si_code = FPE_FLTUND; + break; + case 0x004: /* Zero Divide */ + info.si_code = FPE_FLTDIV; + break; + case 0x008: /* Overflow */ + info.si_code = FPE_FLTOVF; + break; + case 0x020: /* Precision */ + info.si_code = FPE_FLTRES; + break; + } + force_sig_info(SIGFPE, &info, task); +} + +asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs, + long error_code) +{ + if (cpu_has_xmm) { + /* Handle SIMD FPU exceptions on PIII+ processors. */ + ignore_irq13 = 1; + simd_math_error((void *)regs->eip); + } else { + die_if_kernel("cache flush denied", regs, error_code); + current->thread.trap_no = 19; + current->thread.error_code = error_code; + force_sig(SIGSEGV, current); + } +} + +asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs, + long error_code) +{ +} + +/* + * 'math_state_restore()' saves the current math information in the + * old math state array, and gets the new ones from the current task + * + * Careful.. There are problems with IBM-designed IRQ13 behaviour. + * Don't touch unless you *really* know how it works. + */ +asmlinkage void math_state_restore(struct pt_regs regs) +{ + if (current->used_math) { + restore_fpu(current); + } else { + init_fpu(); + } + current->flags |= PF_USEDFPU; /* So we fnsave on switch_to() */ +} + + +#define _set_gate(gate_addr,type,dpl,addr) \ +do { \ + int __d0, __d1; \ + __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \ + "movw %4,%%dx\n\t" \ + "movl %%eax,%0\n\t" \ + "movl %%edx,%1" \ + :"=m" (*((long *) (gate_addr))), \ + "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \ + :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \ + "3" ((char *) (addr)),"2" (__KERNEL_CS << 16)); \ +} while (0) + +static void __init set_call_gate(void *a, void *addr) +{ + _set_gate(a,12,3,addr); +} + + +/* NB. All these are "trap gates" (i.e. events_mask isn't cleared). */ +static trap_info_t trap_table[] = { + { 0, 0, __KERNEL_CS, (unsigned long)divide_error }, + { 1, 0, __KERNEL_CS, (unsigned long)debug }, + { 3, 3, __KERNEL_CS, (unsigned long)int3 }, + { 4, 3, __KERNEL_CS, (unsigned long)overflow }, + { 5, 3, __KERNEL_CS, (unsigned long)bounds }, + { 6, 0, __KERNEL_CS, (unsigned long)invalid_op }, + { 7, 0, __KERNEL_CS, (unsigned long)device_not_available }, + { 8, 0, __KERNEL_CS, (unsigned long)double_fault }, + { 9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun }, + { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS }, + { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present }, + { 12, 0, __KERNEL_CS, (unsigned long)stack_segment }, + { 13, 0, __KERNEL_CS, (unsigned long)general_protection }, + { 14, 0, __KERNEL_CS, (unsigned long)page_fault }, + { 15, 0, __KERNEL_CS, (unsigned long)spurious_interrupt_bug }, + { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error }, + { 17, 0, __KERNEL_CS, (unsigned long)alignment_check }, + { 18, 0, __KERNEL_CS, (unsigned long)machine_check }, + { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error }, + { SYSCALL_VECTOR, + 3, __KERNEL_CS, (unsigned long)system_call }, + { 0, 0, 0, 0 } +}; + + +void __init trap_init(void) +{ + HYPERVISOR_set_trap_table(trap_table); + HYPERVISOR_set_fast_trap(SYSCALL_VECTOR); + + /* + * The default LDT is a single-entry callgate to lcall7 for iBCS and a + * callgate to lcall27 for Solaris/x86 binaries. + */ + clear_page(&default_ldt[0]); + set_call_gate(&default_ldt[0],lcall7); + set_call_gate(&default_ldt[4],lcall27); + __make_page_readonly(&default_ldt[0]); + + cpu_init(); +} + + +/* + * install_safe_pf_handler / install_normal_pf_handler: + * + * These are used within the failsafe_callback handler in entry.S to avoid + * taking a full page fault when reloading FS and GS. This is because FS and + * GS could be invalid at pretty much any point while Xenolinux executes (we + * don't set them to safe values on entry to the kernel). At *any* point Xen + * may be entered due to a hardware interrupt --- on exit from Xen an invalid + * FS/GS will cause our failsafe_callback to be executed. This could occur, + * for example, while the mmmu_update_queue is in an inconsistent state. This + * is disastrous because the normal page-fault handler touches the update + * queue! + * + * Fortunately, within the failsafe handler it is safe to force DS/ES/FS/GS + * to zero if they cannot be reloaded -- at this point executing a normal + * page fault would not change this effect. The safe page-fault handler + * ensures this end result (blow away the selector value) without the dangers + * of the normal page-fault handler. + * + * NB. Perhaps this can all go away after we have implemented writeable + * page tables. :-) + */ + +asmlinkage void do_safe_page_fault(struct pt_regs *regs, + unsigned long error_code, + unsigned long address) +{ + unsigned long fixup; + + if ( (fixup = search_exception_table(regs->eip)) != 0 ) + { + regs->eip = fixup; + return; + } + + die("Unhandleable 'safe' page fault!", regs, error_code); +} + +unsigned long install_safe_pf_handler(void) +{ + static trap_info_t safe_pf[] = { + { 14, 0, __KERNEL_CS, (unsigned long)safe_page_fault }, + { 0, 0, 0, 0 } + }; + unsigned long flags; + local_irq_save(flags); + HYPERVISOR_set_trap_table(safe_pf); + return flags; /* This is returned in %%eax */ +} + +__attribute__((regparm(3))) /* This function take its arg in %%eax */ +void install_normal_pf_handler(unsigned long flags) +{ + static trap_info_t normal_pf[] = { + { 14, 0, __KERNEL_CS, (unsigned long)page_fault }, + { 0, 0, 0, 0 } + }; + HYPERVISOR_set_trap_table(normal_pf); + local_irq_restore(flags); +} diff --git a/xenolinux-2.4.25-sparse/arch/xen/lib/Makefile b/xenolinux-2.4.25-sparse/arch/xen/lib/Makefile new file mode 100644 index 0000000000..5e00fdc135 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/lib/Makefile @@ -0,0 +1,15 @@ + +.S.o: + $(CC) $(AFLAGS) -c $< -o $*.o + +L_TARGET = lib.a + +obj-y = checksum.o old-checksum.o delay.o \ + usercopy.o getuser.o \ + memcpy.o strstr.o xen_proc.o + +obj-$(CONFIG_X86_USE_3DNOW) += mmx.o +obj-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o +obj-$(CONFIG_DEBUG_IOVIRT) += iodebug.o + +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xen/lib/delay.c b/xenolinux-2.4.25-sparse/arch/xen/lib/delay.c new file mode 100644 index 0000000000..0035bed074 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/lib/delay.c @@ -0,0 +1,52 @@ +/* + * Precise Delay Loops for i386 + * + * Copyright (C) 1993 Linus Torvalds + * Copyright (C) 1997 Martin Mares + * + * The __delay function must _NOT_ be inlined as its execution time + * depends wildly on alignment on many x86 processors. The additional + * jump magic is needed to get the timing stable on all the CPU's + * we have to worry about. + */ + +#include +#include +#include +#include +#include + +#ifdef CONFIG_SMP +#include +#endif + +void __delay(unsigned long loops) +{ + unsigned long bclock, now; + + rdtscl(bclock); + do + { + rep_nop(); + rdtscl(now); + } while ((now-bclock) < loops); +} + +inline void __const_udelay(unsigned long xloops) +{ + int d0; + __asm__("mull %0" + :"=d" (xloops), "=&a" (d0) + :"1" (xloops),"0" (current_cpu_data.loops_per_jiffy)); + __delay(xloops * HZ); +} + +void __udelay(unsigned long usecs) +{ + __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ +} + +void __ndelay(unsigned long nsecs) +{ + __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ +} diff --git a/xenolinux-2.4.25-sparse/arch/xen/lib/xeno_proc.c b/xenolinux-2.4.25-sparse/arch/xen/lib/xeno_proc.c new file mode 100644 index 0000000000..9c06dcdd89 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/lib/xeno_proc.c @@ -0,0 +1,18 @@ + +#include +#include + +static struct proc_dir_entry *xen_base; + +struct proc_dir_entry *create_xen_proc_entry(const char *name, mode_t mode) +{ + if ( xen_base == NULL ) + if ( (xen_base = proc_mkdir("xen", &proc_root)) == NULL ) + panic("Couldn't create /proc/xen"); + return create_proc_entry(name, mode, xen_base); +} + +void remove_xen_proc_entry(const char *name) +{ + remove_proc_entry(name, xen_base); +} diff --git a/xenolinux-2.4.25-sparse/arch/xen/mm/Makefile b/xenolinux-2.4.25-sparse/arch/xen/mm/Makefile new file mode 100644 index 0000000000..d0d16114b6 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/mm/Makefile @@ -0,0 +1,16 @@ +# +# Makefile for the linux i386-specific parts of the memory manager. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +O_TARGET := mm.o + +obj-y := init.o fault.o extable.o pageattr.o hypervisor.o ioremap.o + +export-objs := pageattr.o + +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xen/mm/fault.c b/xenolinux-2.4.25-sparse/arch/xen/mm/fault.c new file mode 100644 index 0000000000..496e974487 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/mm/fault.c @@ -0,0 +1,326 @@ +/* + * linux/arch/i386/mm/fault.c + * + * Copyright (C) 1995 Linus Torvalds + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* For unblank_screen() */ + +#include +#include +#include +#include + +extern void die(const char *,struct pt_regs *,long); + +pgd_t *cur_pgd; + +extern spinlock_t timerlist_lock; + +/* + * Unlock any spinlocks which will prevent us from getting the + * message out (timerlist_lock is acquired through the + * console unblank code) + */ +void bust_spinlocks(int yes) +{ + spin_lock_init(&timerlist_lock); + if (yes) { + oops_in_progress = 1; + } else { + int loglevel_save = console_loglevel; +#ifdef CONFIG_VT + unblank_screen(); +#endif + oops_in_progress = 0; + /* + * OK, the message is on the console. Now we call printk() + * without oops_in_progress set so that printk will give klogd + * a poke. Hold onto your hats... + */ + console_loglevel = 15; /* NMI oopser may have shut the console up */ + printk(" "); + console_loglevel = loglevel_save; + } +} + +/* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate + * routines. + * + * error_code: + * bit 0 == 0 means no page found, 1 means protection fault + * bit 1 == 0 means read, 1 means write + * bit 2 == 0 means kernel, 1 means user-mode + */ +asmlinkage void do_page_fault(struct pt_regs *regs, + unsigned long error_code, + unsigned long address) +{ + struct task_struct *tsk = current; + struct mm_struct *mm; + struct vm_area_struct * vma; + unsigned long page; + unsigned long fixup; + int write; + siginfo_t info; + + /* Set the "privileged fault" bit to something sane. */ + error_code &= 3; + error_code |= (regs->xcs & 2) << 1; + +#if MMU_UPDATE_DEBUG > 0 + if ( (error_code == 0) && (address >= TASK_SIZE) ) + { + unsigned long paddr = __pa(address); + int i; + for ( i = 0; i < mmu_update_queue_idx; i++ ) + { + if ( update_debug_queue[i].ptr == paddr ) + { + printk("XXX now(EIP=%08lx:ptr=%08lx) " + "then(%s/%d:p/v=%08lx/%08lx)\n", + regs->eip, address, + update_debug_queue[i].file, + update_debug_queue[i].line, + update_debug_queue[i].ptr, + update_debug_queue[i].val); + } + } + } +#endif + + if ( flush_page_update_queue() != 0 ) return; + + /* + * We fault-in kernel-space virtual memory on-demand. The + * 'reference' page table is init_mm.pgd. + * + * NOTE! We MUST NOT take any locks for this case. We may + * be in an interrupt or a critical region, and should + * only copy the information from the master page table, + * nothing more. + * + * This verifies that the fault happens in kernel space + * (error_code & 4) == 0, and that the fault was not a + * protection error (error_code & 1) == 0. + */ + if (address >= TASK_SIZE && !(error_code & 5)) + goto vmalloc_fault; + + mm = tsk->mm; + info.si_code = SEGV_MAPERR; + + /* + * If we're in an interrupt or have no user + * context, we must not take the fault.. + */ + if (in_interrupt() || !mm) + goto no_context; + + down_read(&mm->mmap_sem); + + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (error_code & 4) { + /* + * accessing the stack below %esp is always a bug. + * The "+ 32" is there due to some instructions (like + * pusha) doing post-decrement on the stack and that + * doesn't show up until later.. + */ + if (address + 32 < regs->esp) + goto bad_area; + } + if (expand_stack(vma, address)) + goto bad_area; +/* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ +good_area: + info.si_code = SEGV_ACCERR; + write = 0; + switch (error_code & 3) { + default: /* 3: write, present */ + /* fall through */ + case 2: /* write, not present */ + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + write++; + break; + case 1: /* read, present */ + goto bad_area; + case 0: /* read, not present */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + + survive: + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + switch (handle_mm_fault(mm, vma, address, write)) { + case 1: + tsk->min_flt++; + break; + case 2: + tsk->maj_flt++; + break; + case 0: + goto do_sigbus; + default: + goto out_of_memory; + } + + up_read(&mm->mmap_sem); + return; + +/* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ +bad_area: + up_read(&mm->mmap_sem); + + /* User mode accesses just cause a SIGSEGV */ + if (error_code & 4) { + tsk->thread.cr2 = address; + /* Kernel addresses are always protection faults */ + tsk->thread.error_code = error_code | (address >= TASK_SIZE); + tsk->thread.trap_no = 14; + info.si_signo = SIGSEGV; + info.si_errno = 0; + /* info.si_code has been set above */ + info.si_addr = (void *)address; + force_sig_info(SIGSEGV, &info, tsk); + return; + } + +no_context: + /* Are we prepared to handle this kernel fault? */ + if ((fixup = search_exception_table(regs->eip)) != 0) { + regs->eip = fixup; + return; + } + +/* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ + + bust_spinlocks(1); + + if (address < PAGE_SIZE) + printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); + else + printk(KERN_ALERT "Unable to handle kernel paging request"); + printk(" at virtual address %08lx\n",address); + printk(" printing eip:\n"); + printk("%08lx\n", regs->eip); + page = ((unsigned long *) cur_pgd)[address >> 22]; + printk(KERN_ALERT "*pde=%08lx(%08lx)\n", page, machine_to_phys(page)); + if (page & 1) { + page &= PAGE_MASK; + address &= 0x003ff000; + page = machine_to_phys(page); + page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; + printk(KERN_ALERT "*pte=%08lx(%08lx)\n", page, + machine_to_phys(page)); + } + die("Oops", regs, error_code); + bust_spinlocks(0); + do_exit(SIGKILL); + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + if (tsk->pid == 1) { + yield(); + goto survive; + } + up_read(&mm->mmap_sem); + printk("VM: killing process %s\n", tsk->comm); + if (error_code & 4) + do_exit(SIGKILL); + goto no_context; + +do_sigbus: + up_read(&mm->mmap_sem); + + /* + * Send a sigbus, regardless of whether we were in kernel + * or user mode. + */ + tsk->thread.cr2 = address; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 14; + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void *)address; + force_sig_info(SIGBUS, &info, tsk); + + /* Kernel mode? Handle exceptions or die */ + if (!(error_code & 4)) + goto no_context; + return; + +vmalloc_fault: + { + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "tsk" here. We might be inside + * an interrupt in the middle of a task switch.. + */ + int offset = __pgd_offset(address); + pgd_t *pgd, *pgd_k; + pmd_t *pmd, *pmd_k; + pte_t *pte_k; + + pgd = offset + cur_pgd; + pgd_k = init_mm.pgd + offset; + + if (!pgd_present(*pgd_k)) + goto no_context; + set_pgd(pgd, *pgd_k); + + pmd = pmd_offset(pgd, address); + pmd_k = pmd_offset(pgd_k, address); + if (!pmd_present(*pmd_k)) + goto no_context; + set_pmd(pmd, *pmd_k); + XEN_flush_page_update_queue(); /* flush PMD update */ + + pte_k = pte_offset(pmd_k, address); + if (!pte_present(*pte_k)) + goto no_context; + return; + } +} diff --git a/xenolinux-2.4.25-sparse/arch/xen/mm/hypervisor.c b/xenolinux-2.4.25-sparse/arch/xen/mm/hypervisor.c new file mode 100644 index 0000000000..39f6863d66 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/mm/hypervisor.c @@ -0,0 +1,243 @@ +/****************************************************************************** + * xen/mm/hypervisor.c + * + * Update page tables via the hypervisor. + * + * Copyright (c) 2002, K A Fraser + */ + +#include +#include +#include +#include +#include +#include + +/* + * This suffices to protect us if we ever move to SMP domains. + * Further, it protects us against interrupts. At the very least, this is + * required for the network driver which flushes the update queue before + * pushing new receive buffers. + */ +static spinlock_t update_lock = SPIN_LOCK_UNLOCKED; + +#define QUEUE_SIZE 2048 +static mmu_update_t update_queue[QUEUE_SIZE]; +unsigned int mmu_update_queue_idx = 0; +#define idx mmu_update_queue_idx + +#if MMU_UPDATE_DEBUG > 0 +page_update_debug_t update_debug_queue[QUEUE_SIZE] = {{0}}; +#undef queue_l1_entry_update +#undef queue_l2_entry_update +static void DEBUG_allow_pt_reads(void) +{ + pte_t *pte; + mmu_update_t update; + int i; + for ( i = idx-1; i >= 0; i-- ) + { + pte = update_debug_queue[i].ptep; + if ( pte == NULL ) continue; + update_debug_queue[i].ptep = NULL; + update.ptr = virt_to_machine(pte); + update.val = update_debug_queue[i].pteval; + HYPERVISOR_mmu_update(&update, 1); + } +} +static void DEBUG_disallow_pt_read(unsigned long va) +{ + pte_t *pte; + pmd_t *pmd; + pgd_t *pgd; + unsigned long pteval; + /* + * We may fault because of an already outstanding update. + * That's okay -- it'll get fixed up in the fault handler. + */ + mmu_update_t update; + pgd = pgd_offset_k(va); + pmd = pmd_offset(pgd, va); + pte = pte_offset(pmd, va); + update.ptr = virt_to_machine(pte); + pteval = *(unsigned long *)pte; + update.val = pteval & ~_PAGE_PRESENT; + HYPERVISOR_mmu_update(&update, 1); + update_debug_queue[idx].ptep = pte; + update_debug_queue[idx].pteval = pteval; +} +#endif + +#if MMU_UPDATE_DEBUG > 1 +#undef queue_pt_switch +#undef queue_tlb_flush +#undef queue_invlpg +#undef queue_pgd_pin +#undef queue_pgd_unpin +#undef queue_pte_pin +#undef queue_pte_unpin +#endif + + +/* + * MULTICALL_flush_page_update_queue: + * This is a version of the flush which queues as part of a multicall. + */ +void MULTICALL_flush_page_update_queue(void) +{ + unsigned long flags; + unsigned int _idx; + spin_lock_irqsave(&update_lock, flags); + if ( (_idx = idx) != 0 ) + { +#if MMU_UPDATE_DEBUG > 1 + printk("Flushing %d entries from pt update queue\n", idx); +#endif +#if MMU_UPDATE_DEBUG > 0 + DEBUG_allow_pt_reads(); +#endif + idx = 0; + wmb(); /* Make sure index is cleared first to avoid double updates. */ + queue_multicall2(__HYPERVISOR_mmu_update, + (unsigned long)update_queue, + _idx); + } + spin_unlock_irqrestore(&update_lock, flags); +} + +static inline void __flush_page_update_queue(void) +{ + unsigned int _idx = idx; +#if MMU_UPDATE_DEBUG > 1 + printk("Flushing %d entries from pt update queue\n", idx); +#endif +#if MMU_UPDATE_DEBUG > 0 + DEBUG_allow_pt_reads(); +#endif + idx = 0; + wmb(); /* Make sure index is cleared first to avoid double updates. */ + HYPERVISOR_mmu_update(update_queue, _idx); +} + +void _flush_page_update_queue(void) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + if ( idx != 0 ) __flush_page_update_queue(); + spin_unlock_irqrestore(&update_lock, flags); +} + +static inline void increment_index(void) +{ + idx++; + if ( unlikely(idx == QUEUE_SIZE) ) __flush_page_update_queue(); +} + +void queue_l1_entry_update(pte_t *ptr, unsigned long val) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); +#if MMU_UPDATE_DEBUG > 0 + DEBUG_disallow_pt_read((unsigned long)ptr); +#endif + update_queue[idx].ptr = virt_to_machine(ptr); + update_queue[idx].val = val; + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} + +void queue_l2_entry_update(pmd_t *ptr, unsigned long val) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + update_queue[idx].ptr = virt_to_machine(ptr); + update_queue[idx].val = val; + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} + +void queue_pt_switch(unsigned long ptr) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + update_queue[idx].ptr = phys_to_machine(ptr); + update_queue[idx].ptr |= MMU_EXTENDED_COMMAND; + update_queue[idx].val = MMUEXT_NEW_BASEPTR; + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} + +void queue_tlb_flush(void) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + update_queue[idx].ptr = MMU_EXTENDED_COMMAND; + update_queue[idx].val = MMUEXT_TLB_FLUSH; + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} + +void queue_invlpg(unsigned long ptr) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + update_queue[idx].ptr = MMU_EXTENDED_COMMAND; + update_queue[idx].val = ptr & PAGE_MASK; + update_queue[idx].val |= MMUEXT_INVLPG; + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} + +void queue_pgd_pin(unsigned long ptr) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + update_queue[idx].ptr = phys_to_machine(ptr); + update_queue[idx].ptr |= MMU_EXTENDED_COMMAND; + update_queue[idx].val = MMUEXT_PIN_L2_TABLE; + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} + +void queue_pgd_unpin(unsigned long ptr) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + update_queue[idx].ptr = phys_to_machine(ptr); + update_queue[idx].ptr |= MMU_EXTENDED_COMMAND; + update_queue[idx].val = MMUEXT_UNPIN_TABLE; + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} + +void queue_pte_pin(unsigned long ptr) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + update_queue[idx].ptr = phys_to_machine(ptr); + update_queue[idx].ptr |= MMU_EXTENDED_COMMAND; + update_queue[idx].val = MMUEXT_PIN_L1_TABLE; + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} + +void queue_pte_unpin(unsigned long ptr) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + update_queue[idx].ptr = phys_to_machine(ptr); + update_queue[idx].ptr |= MMU_EXTENDED_COMMAND; + update_queue[idx].val = MMUEXT_UNPIN_TABLE; + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} + +void queue_set_ldt(unsigned long ptr, unsigned long len) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + update_queue[idx].ptr = MMU_EXTENDED_COMMAND | ptr; + update_queue[idx].val = MMUEXT_SET_LDT | (len << MMUEXT_CMD_SHIFT); + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} diff --git a/xenolinux-2.4.25-sparse/arch/xen/mm/init.c b/xenolinux-2.4.25-sparse/arch/xen/mm/init.c new file mode 100644 index 0000000000..0bb2d173e4 --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/mm/init.c @@ -0,0 +1,390 @@ +/* + * linux/arch/i386/mm/init.c + * + * Copyright (C) 1995 Linus Torvalds + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_BLK_DEV_INITRD +#include +#endif +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +mmu_gather_t mmu_gathers[NR_CPUS]; +unsigned long highstart_pfn, highend_pfn; +static unsigned long totalram_pages; +static unsigned long totalhigh_pages; + +int do_check_pgt_cache(int low, int high) +{ + int freed = 0; + if(pgtable_cache_size > high) { + do { + if (!QUICKLIST_EMPTY(pgd_quicklist)) { + free_pgd_slow(get_pgd_fast()); + freed++; + } + if (!QUICKLIST_EMPTY(pte_quicklist)) { + pte_free_slow(pte_alloc_one_fast(NULL, 0)); + freed++; + } + } while(pgtable_cache_size > low); + } + return freed; +} + +void show_mem(void) +{ + int i, total = 0, reserved = 0; + int shared = 0, cached = 0; + int highmem = 0; + + printk("Mem-info:\n"); + show_free_areas(); + printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); + i = max_mapnr; + while (i-- > 0) { + total++; + if (PageHighMem(mem_map+i)) + highmem++; + if (PageReserved(mem_map+i)) + reserved++; + else if (PageSwapCache(mem_map+i)) + cached++; + else if (page_count(mem_map+i)) + shared += page_count(mem_map+i) - 1; + } + printk("%d pages of RAM\n", total); + printk("%d pages of HIGHMEM\n",highmem); + printk("%d reserved pages\n",reserved); + printk("%d pages shared\n",shared); + printk("%d pages swap cached\n",cached); + printk("%ld pages in page table cache\n",pgtable_cache_size); + show_buffers(); +} + +/* References to section boundaries */ + +extern char _text, _etext, _edata, __bss_start, _end; +extern char __init_begin, __init_end; + +static inline void set_pte_phys (unsigned long vaddr, + unsigned long phys, pgprot_t prot) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + + pgd = init_mm.pgd + __pgd_offset(vaddr); + if (pgd_none(*pgd)) { + printk("PAE BUG #00!\n"); + return; + } + pmd = pmd_offset(pgd, vaddr); + if (pmd_none(*pmd)) { + printk("PAE BUG #01!\n"); + return; + } + pte = pte_offset(pmd, vaddr); + + queue_l1_entry_update(pte, phys | pgprot_val(prot)); + + /* + * It's enough to flush this one mapping. + * (PGE mappings get flushed as well) + */ + __flush_tlb_one(vaddr); +} + +void __set_fixmap(enum fixed_addresses idx, unsigned long phys, + pgprot_t flags) +{ + unsigned long address = __fix_to_virt(idx); + + if (idx >= __end_of_fixed_addresses) { + printk("Invalid __set_fixmap\n"); + return; + } + set_pte_phys(address, phys, flags); +} + +void clear_fixmap(enum fixed_addresses idx) +{ + set_pte_phys(__fix_to_virt(idx), 0, __pgprot(0)); +} + +static void __init fixrange_init (unsigned long start, + unsigned long end, pgd_t *pgd_base) +{ + pgd_t *pgd, *kpgd; + pmd_t *pmd, *kpmd; + pte_t *pte, *kpte; + int i, j; + unsigned long vaddr; + + vaddr = start; + i = __pgd_offset(vaddr); + j = __pmd_offset(vaddr); + pgd = pgd_base + i; + + for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { +#if CONFIG_X86_PAE + if (pgd_none(*pgd)) { + pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); + set_pgd(pgd, __pgd(__pa(pmd) + 0x1)); + if (pmd != pmd_offset(pgd, 0)) + printk("PAE BUG #02!\n"); + } + pmd = pmd_offset(pgd, vaddr); +#else + pmd = (pmd_t *)pgd; +#endif + for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { + if (pmd_none(*pmd)) { + pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + clear_page(pte); + kpgd = pgd_offset_k((unsigned long)pte); + kpmd = pmd_offset(kpgd, (unsigned long)pte); + kpte = pte_offset(kpmd, (unsigned long)pte); + queue_l1_entry_update(kpte, + (*(unsigned long *)kpte)&~_PAGE_RW); + + set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte))); + } + vaddr += PMD_SIZE; + } + j = 0; + } + + XEN_flush_page_update_queue(); +} + + +static void __init zone_sizes_init(void) +{ + unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned int max_dma, high, low; + + max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + low = max_low_pfn; + high = highend_pfn; + + if (low < max_dma) + zones_size[ZONE_DMA] = low; + else { + zones_size[ZONE_DMA] = max_dma; + zones_size[ZONE_NORMAL] = low - max_dma; +#ifdef CONFIG_HIGHMEM + zones_size[ZONE_HIGHMEM] = high - low; +#endif + } + free_area_init(zones_size); +} + +/* + * paging_init() sets up the page tables - note that the first 8MB are + * already mapped by head.S. + * + * This routines also unmaps the page at virtual kernel address 0, so + * that we can trap those pesky NULL-reference errors in the kernel. + */ +void __init paging_init(void) +{ + unsigned long vaddr; + + zone_sizes_init(); + + /* + * Fixed mappings, only the page table structure has to be created - + * mappings will be set by set_fixmap(): + */ + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; + fixrange_init(vaddr, HYPERVISOR_VIRT_START, init_mm.pgd); + + /* Switch to the real shared_info page, and clear the dummy page. */ + set_fixmap(FIX_SHARED_INFO, start_info.shared_info); + HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); + memset(empty_zero_page, 0, sizeof(empty_zero_page)); + +#ifdef CONFIG_HIGHMEM +#error + kmap_init(); +#endif +} + +static inline int page_is_ram (unsigned long pagenr) +{ + return 1; +} + +#ifdef CONFIG_HIGHMEM +void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) +{ + if (!page_is_ram(pfn)) { + SetPageReserved(page); + return; + } + + if (bad_ppro && page_kills_ppro(pfn)) { + SetPageReserved(page); + return; + } + + ClearPageReserved(page); + set_bit(PG_highmem, &page->flags); + atomic_set(&page->count, 1); + __free_page(page); + totalhigh_pages++; +} +#endif /* CONFIG_HIGHMEM */ + +static void __init set_max_mapnr_init(void) +{ +#ifdef CONFIG_HIGHMEM + highmem_start_page = mem_map + highstart_pfn; + max_mapnr = num_physpages = highend_pfn; + num_mappedpages = max_low_pfn; +#else + max_mapnr = num_mappedpages = num_physpages = max_low_pfn; +#endif +} + +static int __init free_pages_init(void) +{ +#ifdef CONFIG_HIGHMEM +#error Where is this supposed to be initialised? + int bad_ppro; +#endif + int reservedpages, pfn; + + /* this will put all low memory onto the freelists */ + totalram_pages += free_all_bootmem(); + + reservedpages = 0; + for (pfn = 0; pfn < max_low_pfn; pfn++) { + /* + * Only count reserved RAM pages + */ + if (page_is_ram(pfn) && PageReserved(mem_map+pfn)) + reservedpages++; + } +#ifdef CONFIG_HIGHMEM + for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--) + one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro); + totalram_pages += totalhigh_pages; +#endif + return reservedpages; +} + +void __init mem_init(void) +{ + int codesize, reservedpages, datasize, initsize; + + if (!mem_map) + BUG(); + + set_max_mapnr_init(); + + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); + + /* clear the zero-page */ + memset(empty_zero_page, 0, PAGE_SIZE); + + reservedpages = free_pages_init(); + + codesize = (unsigned long) &_etext - (unsigned long) &_text; + datasize = (unsigned long) &_edata - (unsigned long) &_etext; + initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; + + printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", + (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), + max_mapnr << (PAGE_SHIFT-10), + codesize >> 10, + reservedpages << (PAGE_SHIFT-10), + datasize >> 10, + initsize >> 10, + (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) + ); + + boot_cpu_data.wp_works_ok = 1; +} + +void free_initmem(void) +{ + unsigned long addr; + + addr = (unsigned long)(&__init_begin); + for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { + ClearPageReserved(virt_to_page(addr)); + set_page_count(virt_to_page(addr), 1); + free_page(addr); + totalram_pages++; + } + printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (start < end) + printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(start)); + set_page_count(virt_to_page(start), 1); + free_page(start); + totalram_pages++; + } +} +#endif + +void si_meminfo(struct sysinfo *val) +{ + val->totalram = totalram_pages; + val->sharedram = 0; + val->freeram = nr_free_pages(); + val->bufferram = atomic_read(&buffermem_pages); + val->totalhigh = totalhigh_pages; + val->freehigh = nr_free_highpages(); + val->mem_unit = PAGE_SIZE; + return; +} + +#if defined(CONFIG_X86_PAE) +struct kmem_cache_s *pae_pgd_cachep; +void __init pgtable_cache_init(void) +{ + /* + * PAE pgds must be 16-byte aligned: + */ + pae_pgd_cachep = kmem_cache_create("pae_pgd", 32, 0, + SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, NULL, NULL); + if (!pae_pgd_cachep) + panic("init_pae(): Cannot alloc pae_pgd SLAB cache"); +} +#endif /* CONFIG_X86_PAE */ diff --git a/xenolinux-2.4.25-sparse/arch/xen/mm/ioremap.c b/xenolinux-2.4.25-sparse/arch/xen/mm/ioremap.c new file mode 100644 index 0000000000..7b1162de9c --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/mm/ioremap.c @@ -0,0 +1,226 @@ +/* + * arch/xen/mm/ioremap.c + * + * Re-map IO memory to kernel address space so that we can access it. + * + * (C) Copyright 1995 1996 Linus Torvalds + * + * Modifications for Xenolinux (c) 2003 Keir Fraser + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(CONFIG_XEN_PRIVILEGED_GUEST) + +/* These hacky macros avoid phys->machine translations. */ +#define __direct_pte(x) ((pte_t) { (x) } ) +#define __direct_mk_pte(page_nr,pgprot) \ + __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) +#define direct_mk_pte_phys(physpage, pgprot) \ + __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot) + +static inline void direct_remap_area_pte(pte_t *pte, + unsigned long address, + unsigned long size, + unsigned long machine_addr, + pgprot_t prot) +{ + unsigned long end; + + address &= ~PMD_MASK; + end = address + size; + if (end > PMD_SIZE) + end = PMD_SIZE; + if (address >= end) + BUG(); + do { + if (!pte_none(*pte)) { + printk("direct_remap_area_pte: page already exists\n"); + BUG(); + } + set_pte(pte, pte_mkio(direct_mk_pte_phys(machine_addr, prot))); + address += PAGE_SIZE; + machine_addr += PAGE_SIZE; + pte++; + } while (address && (address < end)); +} + +static inline int direct_remap_area_pmd(struct mm_struct *mm, + pmd_t *pmd, + unsigned long address, + unsigned long size, + unsigned long machine_addr, + pgprot_t prot) +{ + unsigned long end; + + address &= ~PGDIR_MASK; + end = address + size; + if (end > PGDIR_SIZE) + end = PGDIR_SIZE; + machine_addr -= address; + if (address >= end) + BUG(); + do { + pte_t * pte = pte_alloc(mm, pmd, address); + if (!pte) + return -ENOMEM; + direct_remap_area_pte(pte, address, end - address, + address + machine_addr, prot); + address = (address + PMD_SIZE) & PMD_MASK; + pmd++; + } while (address && (address < end)); + return 0; +} + +int direct_remap_area_pages(struct mm_struct *mm, + unsigned long address, + unsigned long machine_addr, + unsigned long size, + pgprot_t prot) +{ + int error = 0; + pgd_t * dir; + unsigned long end = address + size; + + machine_addr -= address; + dir = pgd_offset(mm, address); + flush_cache_all(); + if (address >= end) + BUG(); + spin_lock(&mm->page_table_lock); + do { + pmd_t *pmd = pmd_alloc(mm, dir, address); + error = -ENOMEM; + if (!pmd) + break; + error = direct_remap_area_pmd(mm, pmd, address, end - address, + machine_addr + address, prot); + if (error) + break; + address = (address + PGDIR_SIZE) & PGDIR_MASK; + dir++; + } while (address && (address < end)); + spin_unlock(&mm->page_table_lock); + flush_tlb_all(); + return error; +} + +#endif /* CONFIG_XEN_PRIVILEGED_GUEST */ + + +/* + * Remap an arbitrary machine address space into the kernel virtual + * address space. Needed when a privileged instance of Xenolinux wants + * to access space outside its world directly. + * + * NOTE! We need to allow non-page-aligned mappings too: we will obviously + * have to convert them into an offset in a page-aligned mapping, but the + * caller shouldn't need to know that small detail. + */ +void * __ioremap(unsigned long machine_addr, + unsigned long size, + unsigned long flags) +{ +#if defined(CONFIG_XEN_PRIVILEGED_GUEST) + void * addr; + struct vm_struct * area; + unsigned long offset, last_addr; + pgprot_t prot; + + /* Only privileged Xenolinux can make unchecked pagetable updates. */ + if ( !(start_info.flags & SIF_PRIVILEGED) ) + return NULL; + + /* Don't allow wraparound or zero size */ + last_addr = machine_addr + size - 1; + if (!size || last_addr < machine_addr) + return NULL; + + /* Mappings have to be page-aligned */ + offset = machine_addr & ~PAGE_MASK; + machine_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr+1) - machine_addr; + + /* Ok, go for it */ + area = get_vm_area(size, VM_IOREMAP); + if (!area) + return NULL; + addr = area->addr; + prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | + _PAGE_ACCESSED | flags); + if (direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(addr), + machine_addr, size, prot)) { + vfree(addr); + return NULL; + } + return (void *) (offset + (char *)addr); +#else + return NULL; +#endif +} + +void iounmap(void *addr) +{ + vfree((void *)((unsigned long)addr & PAGE_MASK)); +} + +/* implementation of boot time ioremap for purpose of provising access +to the vga console for privileged domains. Unlike boot time ioremap on +other architectures, ours is permanent and not reclaimed when then vmalloc +infrastructure is started */ + +void __init *bt_ioremap(unsigned long machine_addr, unsigned long size) +{ + unsigned long offset, last_addr; + unsigned int nrpages; + enum fixed_addresses idx; + + /* Don't allow wraparound or zero size */ + last_addr = machine_addr + size - 1; + if (!size || last_addr < machine_addr) + return NULL; + + /* + * Mappings have to be page-aligned + */ + offset = machine_addr & ~PAGE_MASK; + machine_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr) - machine_addr; + + /* + * Mappings have to fit in the FIX_BTMAP area. + */ + nrpages = size >> PAGE_SHIFT; + if (nrpages > NR_FIX_BTMAPS) + return NULL; + + /* + * Ok, go for it.. + */ + idx = FIX_BTMAP_BEGIN; + while (nrpages > 0) { + __set_fixmap(idx, machine_addr, + __pgprot(__PAGE_KERNEL|_PAGE_IO)); + machine_addr += PAGE_SIZE; + --idx; + --nrpages; + } + + flush_tlb_all(); + + return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN)); +} + + +#if 0 /* We don't support these functions. They shouldn't be required. */ +void __init bt_iounmap(void *addr, unsigned long size) {} +#endif diff --git a/xenolinux-2.4.25-sparse/arch/xen/vmlinux.lds b/xenolinux-2.4.25-sparse/arch/xen/vmlinux.lds new file mode 100644 index 0000000000..7c4c4f8e9c --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xen/vmlinux.lds @@ -0,0 +1,82 @@ +/* ld script to make i386 Linux kernel + * Written by Martin Mares ; + */ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) +SECTIONS +{ + . = 0xC0000000 + 0x000000; + _text = .; /* Text and read-only data */ + .text : { + *(.text) + *(.fixup) + *(.gnu.warning) + } = 0x9090 + + _etext = .; /* End of text section */ + + .rodata : { *(.rodata) *(.rodata.*) } + .kstrtab : { *(.kstrtab) } + + . = ALIGN(16); /* Exception table */ + __start___ex_table = .; + __ex_table : { *(__ex_table) } + __stop___ex_table = .; + + __start___ksymtab = .; /* Kernel symbol table */ + __ksymtab : { *(__ksymtab) } + __stop___ksymtab = .; + + .data : { /* Data */ + *(.data) + CONSTRUCTORS + } + + _edata = .; /* End of data section */ + + . = ALIGN(8192); /* init_task */ + .data.init_task : { *(.data.init_task) } + + . = ALIGN(4096); /* Init code and data */ + __init_begin = .; + .text.init : { *(.text.init) } + .data.init : { *(.data.init) } + . = ALIGN(16); + __setup_start = .; + .setup.init : { *(.setup.init) } + __setup_end = .; + __initcall_start = .; + .initcall.init : { *(.initcall.init) } + __initcall_end = .; + . = ALIGN(4096); + __init_end = .; + + . = ALIGN(4096); + .data.page_aligned : { *(.data.idt) } + + . = ALIGN(32); + .data.cacheline_aligned : { *(.data.cacheline_aligned) } + + __bss_start = .; /* BSS */ + .bss : { + *(.bss) + } + _end = . ; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.text.exit) + *(.data.exit) + *(.exitcall.exit) + } + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} diff --git a/xenolinux-2.4.25-sparse/arch/xeno/Makefile b/xenolinux-2.4.25-sparse/arch/xeno/Makefile deleted file mode 100644 index a2e6a0e0cd..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/Makefile +++ /dev/null @@ -1,120 +0,0 @@ -# -# xeno/Makefile -# -# This file is included by the global makefile so that you can add your own -# architecture-specific flags and dependencies. Remember to do have actions -# for "archclean" and "archdep" for cleaning up and making dependencies for -# this architecture -# -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# -# Copyright (C) 1994 by Linus Torvalds -# -# 19990713 Artur Skawina -# Added '-march' and '-mpreferred-stack-boundary' support -# - -override EXTRAVERSION := -xeno$(EXTRAVERSION) - -LD=$(CROSS_COMPILE)ld -m elf_i386 -OBJCOPY=$(CROSS_COMPILE)objcopy -O binary -R .note -R .comment -S -LDFLAGS=-e stext -LINKFLAGS =-T $(TOPDIR)/arch/xeno/vmlinux.lds $(LDFLAGS) - -CFLAGS += -pipe - -check_gcc = $(shell if $(CC) $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi) - -# prevent gcc from keeping the stack 16 byte aligned -CFLAGS += $(call check_gcc,-mpreferred-stack-boundary=2,) - -ifdef CONFIG_M686 -CFLAGS += -march=i686 -endif - -ifdef CONFIG_MPENTIUMIII -CFLAGS += -march=i686 -endif - -ifdef CONFIG_MPENTIUM4 -CFLAGS += -march=i686 -endif - -ifdef CONFIG_MK7 -CFLAGS += $(call check_gcc,-march=athlon,-march=i686 -malign-functions=4) -endif - -HEAD := arch/xeno/kernel/head.o arch/xeno/kernel/init_task.o - -SUBDIRS += arch/xeno/kernel arch/xeno/mm arch/xeno/lib -SUBDIRS += arch/xeno/drivers/console arch/xeno/drivers/network -SUBDIRS += arch/xeno/drivers/evtchn arch/xeno/drivers/block -SUBDIRS += arch/xeno/drivers/balloon arch/xeno/drivers/vnetif -ifdef CONFIG_XENO_PRIV -SUBDIRS += arch/xeno/drivers/dom0 -endif - -CORE_FILES += arch/xeno/kernel/kernel.o arch/xeno/mm/mm.o -CORE_FILES += arch/xeno/drivers/evtchn/drv.o -CORE_FILES += arch/xeno/drivers/console/drv.o -CORE_FILES += arch/xeno/drivers/block/drv.o -CORE_FILES += arch/xeno/drivers/network/drv.o -CORE_FILES += arch/xeno/drivers/vnetif/drv.o -ifdef CONFIG_XENO_PRIV -CORE_FILES += arch/xeno/drivers/dom0/drv.o -endif -CORE_FILES += arch/xeno/drivers/balloon/drv.o -LIBS := $(TOPDIR)/arch/xeno/lib/lib.a $(LIBS) $(TOPDIR)/arch/xeno/lib/lib.a - -arch/xeno/kernel: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/kernel - -arch/xeno/mm: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/mm - -arch/xeno/drivers/console: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/console - -arch/xeno/drivers/network: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/network - -arch/xeno/drivers/block: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/block - -arch/xeno/drivers/dom0: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/dom0 - -arch/xeno/drivers/balloon: dummy - $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/balloon - -MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot - -vmlinux: arch/xeno/vmlinux.lds - -FORCE: ; - -.PHONY: bzImage compressed clean archclean archmrproper archdep - -bzImage: vmlinux - @$(MAKEBOOT) xenolinux.gz - -install: bzImage - mkdir -p $(prefix)/boot - install -m0644 arch/$(ARCH)/boot/xenolinux.gz $(prefix)/boot/xenolinux.gz - -dist: bzImage - mkdir -p ../install/boot - install -m0644 arch/$(ARCH)/boot/xenolinux.gz ../install/boot/xenolinux.gz - -archclean: - @$(MAKEBOOT) clean - -archmrproper: - rm -f include/asm-xeno/hypervisor-ifs/arch - -archdep: - rm -f include/asm-xeno/hypervisor-ifs/arch - ( cd include/asm-xeno/hypervisor-ifs ; rm -rf arch ; ln -sf arch-$(SUBARCH) arch) - @$(MAKEBOOT) dep diff --git a/xenolinux-2.4.25-sparse/arch/xeno/boot/Makefile b/xenolinux-2.4.25-sparse/arch/xeno/boot/Makefile deleted file mode 100644 index 0cd954ea7a..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/boot/Makefile +++ /dev/null @@ -1,22 +0,0 @@ -# -# arch/xeno/boot/Makefile -# - -xenolinux.gz: xenolinux - gzip -f -9 < $< > $@ - -xenolinux: $(TOPDIR)/vmlinux - # Guest OS header -- first 8 bytes are identifier 'XenGuest'. - echo -e -n 'XenGuest' >$@ - # Guest OS header -- next 4 bytes are load address (0xC0000000). - echo -e -n '\000\000\000\300' >>$@ - $(OBJCOPY) $< xenolinux.body - # Guest OS header is immediately followed by raw OS image. - # Start address must be at byte 0. - cat xenolinux.body >>$@ - rm -f xenolinux.body - -dep: - -clean: - rm -f xenolinux xenolinux.gz \ No newline at end of file diff --git a/xenolinux-2.4.25-sparse/arch/xeno/config.in b/xenolinux-2.4.25-sparse/arch/xeno/config.in deleted file mode 100644 index 209bbe6d51..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/config.in +++ /dev/null @@ -1,190 +0,0 @@ -# -# For a description of the syntax of this configuration file, -# see Documentation/kbuild/config-language.txt. -# -mainmenu_name "Linux Kernel Configuration" - -define_bool CONFIG_XENO y - -define_bool CONFIG_X86 y -define_bool CONFIG_ISA y -define_bool CONFIG_SBUS n - -define_bool CONFIG_UID16 y - -mainmenu_option next_comment -comment 'Xenolinux' -bool 'Support for privileged operations (domain 0)' CONFIG_XENO_PRIV -endmenu -# The IBM S/390 patch needs this. -define_bool CONFIG_NO_IDLE_HZ y - -mainmenu_option next_comment -comment 'Code maturity level options' -bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL -endmenu - -mainmenu_option next_comment -comment 'Loadable module support' -bool 'Enable loadable module support' CONFIG_MODULES -if [ "$CONFIG_MODULES" = "y" ]; then - bool ' Set version information on all module symbols' CONFIG_MODVERSIONS - bool ' Kernel module loader' CONFIG_KMOD -fi -endmenu - -mainmenu_option next_comment -comment 'Processor type and features' -choice 'Processor family' \ - "Pentium-Pro/Celeron/Pentium-II CONFIG_M686 \ - Pentium-III/Celeron(Coppermine) CONFIG_MPENTIUMIII \ - Pentium-4 CONFIG_MPENTIUM4 \ - Athlon/Duron/K7 CONFIG_MK7 \ - Opteron/Athlon64/Hammer/K8 CONFIG_MK8 \ - VIA-C3-2 CONFIG_MVIAC3_2" Pentium-Pro - - define_bool CONFIG_X86_WP_WORKS_OK y - define_bool CONFIG_X86_INVLPG y - define_bool CONFIG_X86_CMPXCHG y - define_bool CONFIG_X86_XADD y - define_bool CONFIG_X86_BSWAP y - define_bool CONFIG_X86_POPAD_OK y - define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n - define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y - - define_bool CONFIG_X86_GOOD_APIC y - define_bool CONFIG_X86_PGE y - define_bool CONFIG_X86_USE_PPRO_CHECKSUM y - define_bool CONFIG_X86_TSC y - -if [ "$CONFIG_M686" = "y" ]; then - define_int CONFIG_X86_L1_CACHE_SHIFT 5 -fi -if [ "$CONFIG_MPENTIUMIII" = "y" ]; then - define_int CONFIG_X86_L1_CACHE_SHIFT 5 -fi -if [ "$CONFIG_MPENTIUM4" = "y" ]; then - define_int CONFIG_X86_L1_CACHE_SHIFT 7 -fi -if [ "$CONFIG_MK8" = "y" ]; then - define_bool CONFIG_MK7 y -fi -if [ "$CONFIG_MK7" = "y" ]; then - define_int CONFIG_X86_L1_CACHE_SHIFT 6 - define_bool CONFIG_X86_USE_3DNOW y -fi -if [ "$CONFIG_MVIAC3_2" = "y" ]; then - define_int CONFIG_X86_L1_CACHE_SHIFT 5 -fi - -if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then - tristate 'BIOS Enhanced Disk Drive calls determine boot disk (EXPERIMENTAL)' CONFIG_EDD -fi - -choice 'High Memory Support' \ - "off CONFIG_NOHIGHMEM \ - 4GB CONFIG_HIGHMEM4G \ - 64GB CONFIG_HIGHMEM64G" off -if [ "$CONFIG_HIGHMEM4G" = "y" ]; then - define_bool CONFIG_HIGHMEM y -fi -if [ "$CONFIG_HIGHMEM64G" = "y" ]; then - define_bool CONFIG_HIGHMEM y - define_bool CONFIG_X86_PAE y -fi - -#bool 'Symmetric multi-processing support' CONFIG_SMP -#if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then -# define_bool CONFIG_HAVE_DEC_LOCK y -#fi -endmenu - -mainmenu_option next_comment -comment 'General setup' - -bool 'Networking support' CONFIG_NET - -bool 'PCI support' CONFIG_PCI -if [ "$CONFIG_PCI" = "y" ]; then - tristate ' 3c590/3c900 series (592/595/597) "Vortex/Boomerang" support' CONFIG_VORTEX - tristate 'Intel(R) PRO/1000 Gigabit Ethernet support' CONFIG_E1000 - if [ "$CONFIG_E1000" != "n" ]; then - bool ' Use Rx Polling (NAPI)' CONFIG_E1000_NAPI - fi -fi -source drivers/pci/Config.in - -bool 'System V IPC' CONFIG_SYSVIPC -bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT -bool 'Sysctl support' CONFIG_SYSCTL -if [ "$CONFIG_PROC_FS" = "y" ]; then - choice 'Kernel core (/proc/kcore) format' \ - "ELF CONFIG_KCORE_ELF \ - A.OUT CONFIG_KCORE_AOUT" ELF -fi -tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT -tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF -tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC -bool 'Select task to kill on out of memory condition' CONFIG_OOM_KILLER - -endmenu - -if [ "$CONFIG_NET" = "y" ]; then - source net/Config.in -fi - - -# -# Block device driver configuration -# -mainmenu_option next_comment -comment 'Block devices' -tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP -dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET -tristate 'RAM disk support' CONFIG_BLK_DEV_RAM -if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then - int ' Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096 -fi -dep_bool ' Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM -bool 'Per partition statistics in /proc/partitions' CONFIG_BLK_STATS -bool 'XenoLinux virtual block device support' CONFIG_XENOLINUX_BLOCK -#endmenu -define_bool CONFIG_BLK_DEV_HD n -endmenu - -source drivers/char/Config.in - -source fs/Config.in - -mainmenu_option next_comment -comment 'Console drivers' - -bool 'Xen console support' CONFIG_XEN_CONSOLE - -if [ "$CONFIG_VT" = "y" ]; then - bool 'VGA text console' CONFIG_VGA_CONSOLE - bool 'Dummy console' CONFIG_DUMMY_CONSOLE -fi -endmenu - -mainmenu_option next_comment -comment 'Kernel hacking' - -bool 'Kernel debugging' CONFIG_DEBUG_KERNEL -if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; then - bool ' Debug high memory support' CONFIG_DEBUG_HIGHMEM - bool ' Debug memory allocations' CONFIG_DEBUG_SLAB - bool ' Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT - bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ - bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK - bool ' Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE - bool ' Load all symbols for debugging' CONFIG_KALLSYMS - bool ' Compile the kernel with frame pointers' CONFIG_FRAME_POINTER -fi - -int 'Kernel messages buffer length shift (0 = default)' CONFIG_LOG_BUF_SHIFT 0 - -endmenu - -source crypto/Config.in -source lib/Config.in diff --git a/xenolinux-2.4.25-sparse/arch/xeno/defconfig b/xenolinux-2.4.25-sparse/arch/xeno/defconfig deleted file mode 100644 index b7bedbcf3d..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/defconfig +++ /dev/null @@ -1,457 +0,0 @@ -# -# Automatically generated make config: don't edit -# -CONFIG_XENO=y -CONFIG_X86=y -CONFIG_ISA=y -# CONFIG_SBUS is not set -CONFIG_UID16=y - -# -# Xenolinux options -# -# support for privileged domains -CONFIG_XENO_PRIV=y -# on-demand timer setting (taken from s390 patch set) -CONFIG_NO_IDLE_HZ=y - -# -# Code maturity level options -# -# CONFIG_EXPERIMENTAL is not set - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Processor type and features -# -CONFIG_M686=y -# CONFIG_MPENTIUMIII is not set -# CONFIG_MPENTIUM4 is not set -# CONFIG_MK7 is not set -CONFIG_X86_WP_WORKS_OK=y -CONFIG_X86_INVLPG=y -CONFIG_X86_CMPXCHG=y -CONFIG_X86_XADD=y -CONFIG_X86_BSWAP=y -CONFIG_X86_POPAD_OK=y -# CONFIG_RWSEM_GENERIC_SPINLOCK is not set -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_X86_GOOD_APIC=y -CONFIG_X86_PGE=y -CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_TSC=y -CONFIG_X86_L1_CACHE_SHIFT=5 -CONFIG_NOHIGHMEM=y -# CONFIG_HIGHMEM4G is not set -# CONFIG_HIGHMEM64G is not set - -# -# General setup -# -CONFIG_NET=y -# CONFIG_PCI is not set -# CONFIG_PCI_NAMES is not set -CONFIG_SYSVIPC=y -# CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y -CONFIG_KCORE_ELF=y -# CONFIG_KCORE_AOUT is not set -CONFIG_BINFMT_AOUT=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set -# CONFIG_OOM_KILLER is not set - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -# CONFIG_IP_PNP_BOOTP is not set -# CONFIG_IP_PNP_RARP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_INET_ECN is not set -# CONFIG_SYN_COOKIES is not set - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=y -CONFIG_IP_NF_FTP=y -# CONFIG_IP_NF_AMANDA is not set -CONFIG_IP_NF_TFTP=y -CONFIG_IP_NF_IRC=y -CONFIG_IP_NF_IPTABLES=y -# CONFIG_IP_NF_MATCH_LIMIT is not set -# CONFIG_IP_NF_MATCH_MAC is not set -# CONFIG_IP_NF_MATCH_PKTTYPE is not set -# CONFIG_IP_NF_MATCH_MARK is not set -# CONFIG_IP_NF_MATCH_MULTIPORT is not set -# CONFIG_IP_NF_MATCH_TOS is not set -# CONFIG_IP_NF_MATCH_RECENT is not set -# CONFIG_IP_NF_MATCH_ECN is not set -# CONFIG_IP_NF_MATCH_DSCP is not set -# CONFIG_IP_NF_MATCH_AH_ESP is not set -# CONFIG_IP_NF_MATCH_LENGTH is not set -# CONFIG_IP_NF_MATCH_TTL is not set -# CONFIG_IP_NF_MATCH_TCPMSS is not set -# CONFIG_IP_NF_MATCH_HELPER is not set -CONFIG_IP_NF_MATCH_STATE=y -CONFIG_IP_NF_MATCH_CONNTRACK=y -CONFIG_IP_NF_FILTER=y -CONFIG_IP_NF_TARGET_REJECT=y -CONFIG_IP_NF_NAT=y -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=y -CONFIG_IP_NF_TARGET_REDIRECT=y -# CONFIG_IP_NF_NAT_LOCAL is not set -CONFIG_IP_NF_NAT_IRC=y -CONFIG_IP_NF_NAT_FTP=y -CONFIG_IP_NF_NAT_TFTP=y -# CONFIG_IP_NF_MANGLE is not set -CONFIG_IP_NF_TARGET_LOG=y -CONFIG_IP_NF_TARGET_ULOG=y -# CONFIG_IP_NF_TARGET_TCPMSS is not set -# CONFIG_IP_NF_ARPTABLES is not set - -# -# IP: Virtual Server Configuration -# -# CONFIG_IP_VS is not set -# CONFIG_VLAN_8021Q is not set - -# -# -# -# CONFIG_IPX is not set -# CONFIG_ATALK is not set - -# -# Appletalk devices -# -# CONFIG_DEV_APPLETALK is not set -# CONFIG_DECNET is not set -# CONFIG_BRIDGE is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# Block devices -# -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_NBD=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_INITRD=y -# CONFIG_BLK_STATS is not set -CONFIG_XENOLINUX_BLOCK=y -# CONFIG_BLK_DEV_HD is not set - -# -# Character devices -# -CONFIG_VT=y -CONFIG_VT_CONSOLE=y -# CONFIG_SERIAL is not set -# CONFIG_SERIAL_EXTENDED is not set -# CONFIG_SERIAL_NONSTANDARD is not set -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=256 -# CONFIG_PRINTER is not set -# CONFIG_PPDEV is not set -# CONFIG_TIPAR is not set - -# -# I2C support -# -# CONFIG_I2C is not set - -# -# Mice -# -# CONFIG_BUSMOUSE is not set -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -# CONFIG_82C710_MOUSE is not set -# CONFIG_PC110_PAD is not set -# CONFIG_MK712_MOUSE is not set - -# -# Joysticks -# -# CONFIG_INPUT_GAMEPORT is not set -# CONFIG_INPUT_NS558 is not set -# CONFIG_INPUT_LIGHTNING is not set -# CONFIG_INPUT_PCIGAME is not set -# CONFIG_INPUT_CS461X is not set -# CONFIG_INPUT_EMU10K1 is not set -# CONFIG_INPUT_SERIO is not set -# CONFIG_INPUT_SERPORT is not set - -# -# Joysticks -# -# CONFIG_INPUT_ANALOG is not set -# CONFIG_INPUT_A3D is not set -# CONFIG_INPUT_ADI is not set -# CONFIG_INPUT_COBRA is not set -# CONFIG_INPUT_GF2K is not set -# CONFIG_INPUT_GRIP is not set -# CONFIG_INPUT_INTERACT is not set -# CONFIG_INPUT_TMDC is not set -# CONFIG_INPUT_SIDEWINDER is not set -# CONFIG_INPUT_IFORCE_USB is not set -# CONFIG_INPUT_IFORCE_232 is not set -# CONFIG_INPUT_WARRIOR is not set -# CONFIG_INPUT_MAGELLAN is not set -# CONFIG_INPUT_SPACEORB is not set -# CONFIG_INPUT_SPACEBALL is not set -# CONFIG_INPUT_STINGER is not set -# CONFIG_INPUT_DB9 is not set -# CONFIG_INPUT_GAMECON is not set -# CONFIG_INPUT_TURBOGRAFX is not set -# CONFIG_QIC02_TAPE is not set -# CONFIG_IPMI_HANDLER is not set -# CONFIG_IPMI_PANIC_EVENT is not set -# CONFIG_IPMI_DEVICE_INTERFACE is not set -# CONFIG_IPMI_KCS is not set -# CONFIG_IPMI_WATCHDOG is not set - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -# CONFIG_SCx200 is not set -# CONFIG_SCx200_GPIO is not set -# CONFIG_AMD_RNG is not set -# CONFIG_INTEL_RNG is not set -# CONFIG_HW_RANDOM is not set -# CONFIG_AMD_PM768 is not set -# CONFIG_NVRAM is not set -# CONFIG_RTC is not set -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set -# CONFIG_APPLICOM is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_FTAPE is not set -# CONFIG_AGP is not set - -# -# Direct Rendering Manager (XFree86 DRI support) -# -# CONFIG_DRM is not set -# CONFIG_MWAVE is not set -# CONFIG_OBMOUSE is not set - -# -# File systems -# -# CONFIG_QUOTA is not set -# CONFIG_QFMT_V2 is not set -CONFIG_AUTOFS_FS=y -CONFIG_AUTOFS4_FS=y -# CONFIG_REISERFS_FS is not set -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -# CONFIG_ADFS_FS is not set -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BEFS_DEBUG is not set -# CONFIG_BFS_FS is not set -CONFIG_EXT3_FS=y -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=y -CONFIG_MSDOS_FS=y -CONFIG_UMSDOS_FS=y -CONFIG_VFAT_FS=y -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -# CONFIG_CRAMFS is not set -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -# CONFIG_JFS_FS is not set -# CONFIG_JFS_DEBUG is not set -# CONFIG_JFS_STATISTICS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -# CONFIG_ROMFS_FS is not set -CONFIG_EXT2_FS=y -# CONFIG_SYSV_FS is not set -# CONFIG_UDF_FS is not set -# CONFIG_UDF_RW is not set -# CONFIG_UFS_FS is not set -# CONFIG_UFS_FS_WRITE is not set -# CONFIG_XFS_FS is not set -# CONFIG_XFS_QUOTA is not set -# CONFIG_XFS_RT is not set -# CONFIG_XFS_TRACE is not set -# CONFIG_XFS_DEBUG is not set - -# -# Network File Systems -# -# CONFIG_CODA_FS is not set -# CONFIG_INTERMEZZO_FS is not set -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -# CONFIG_NFS_DIRECTIO is not set -CONFIG_ROOT_NFS=y -CONFIG_NFSD=y -CONFIG_NFSD_V3=y -# CONFIG_NFSD_TCP is not set -CONFIG_SUNRPC=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -# CONFIG_SMB_FS is not set -# CONFIG_NCP_FS is not set -# CONFIG_NCPFS_PACKET_SIGNING is not set -# CONFIG_NCPFS_IOCTL_LOCKING is not set -# CONFIG_NCPFS_STRONG is not set -# CONFIG_NCPFS_NFS_NS is not set -# CONFIG_NCPFS_OS2_NS is not set -# CONFIG_NCPFS_SMALLDOS is not set -# CONFIG_NCPFS_NLS is not set -# CONFIG_NCPFS_EXTRAS is not set -CONFIG_ZISOFS_FS=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -# CONFIG_OSF_PARTITION is not set -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -# CONFIG_MAC_PARTITION is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_BSD_DISKLABEL is not set -# CONFIG_MINIX_SUBPARTITION is not set -# CONFIG_SOLARIS_X86_PARTITION is not set -# CONFIG_UNIXWARE_DISKLABEL is not set -# CONFIG_LDM_PARTITION is not set -# CONFIG_SGI_PARTITION is not set -# CONFIG_ULTRIX_PARTITION is not set -# CONFIG_SUN_PARTITION is not set -# CONFIG_EFI_PARTITION is not set -# CONFIG_SMB_NLS is not set -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8559-1" -# CONFIG_NLS_CODEPAGE_437 is not set -# CONFIG_NLS_CODEPAGE_737 is not set -# CONFIG_NLS_CODEPAGE_775 is not set -# CONFIG_NLS_CODEPAGE_850 is not set -# CONFIG_NLS_CODEPAGE_852 is not set -# CONFIG_NLS_CODEPAGE_855 is not set -# CONFIG_NLS_CODEPAGE_857 is not set -# CONFIG_NLS_CODEPAGE_860 is not set -# CONFIG_NLS_CODEPAGE_861 is not set -# CONFIG_NLS_CODEPAGE_862 is not set -# CONFIG_NLS_CODEPAGE_863 is not set -# CONFIG_NLS_CODEPAGE_864 is not set -# CONFIG_NLS_CODEPAGE_865 is not set -# CONFIG_NLS_CODEPAGE_866 is not set -# CONFIG_NLS_CODEPAGE_869 is not set -# CONFIG_NLS_CODEPAGE_936 is not set -# CONFIG_NLS_CODEPAGE_950 is not set -# CONFIG_NLS_CODEPAGE_932 is not set -# CONFIG_NLS_CODEPAGE_949 is not set -# CONFIG_NLS_CODEPAGE_874 is not set -# CONFIG_NLS_ISO8859_8 is not set -# CONFIG_NLS_CODEPAGE_1250 is not set -# CONFIG_NLS_CODEPAGE_1251 is not set -CONFIG_NLS_ISO8859_1=y -# CONFIG_NLS_ISO8859_2 is not set -# CONFIG_NLS_ISO8859_3 is not set -# CONFIG_NLS_ISO8859_4 is not set -# CONFIG_NLS_ISO8859_5 is not set -# CONFIG_NLS_ISO8859_6 is not set -# CONFIG_NLS_ISO8859_7 is not set -# CONFIG_NLS_ISO8859_9 is not set -# CONFIG_NLS_ISO8859_13 is not set -# CONFIG_NLS_ISO8859_14 is not set -# CONFIG_NLS_ISO8859_15 is not set -# CONFIG_NLS_KOI8_R is not set -# CONFIG_NLS_KOI8_U is not set -# CONFIG_NLS_UTF8 is not set - -# -# Console drivers -# -CONFIG_XEN_CONSOLE=y -CONFIG_VGA_CONSOLE=y -CONFIG_DUMMY_CONSOLE=y - -# -# Kernel hacking -# -CONFIG_DEBUG_KERNEL=y -# CONFIG_DEBUG_HIGHMEM is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_IOVIRT is not set -# CONFIG_MAGIC_SYSRQ is not set -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_DEBUG_BUGVERBOSE is not set -CONFIG_KALLSYMS=y -# CONFIG_FRAME_POINTER is not set -CONFIG_LOG_BUF_SHIFT=0 - -# -# Cryptographic options -# -# CONFIG_CRYPTO is not set - -# -# Library routines -# -# CONFIG_CRC32 is not set -CONFIG_ZLIB_INFLATE=y -# CONFIG_ZLIB_DEFLATE is not set diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/balloon/Makefile b/xenolinux-2.4.25-sparse/arch/xeno/drivers/balloon/Makefile deleted file mode 100644 index 9fb2227978..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/balloon/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := balloon.o -include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/balloon/balloon.c b/xenolinux-2.4.25-sparse/arch/xeno/drivers/balloon/balloon.c deleted file mode 100644 index b7e6802077..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/balloon/balloon.c +++ /dev/null @@ -1,282 +0,0 @@ -/****************************************************************************** - * balloon.c - * - * Xeno balloon driver - enables returning/claiming memory to/from xen - * - * Copyright (c) 2003, B Dragovic - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include - -/* USER DEFINES -- THESE SHOULD BE COPIED TO USER-SPACE TOOLS */ -#define USER_INFLATE_BALLOON 1 /* return mem to hypervisor */ -#define USER_DEFLATE_BALLOON 2 /* claim mem from hypervisor */ -typedef struct user_balloon_op { - unsigned int op; - unsigned long size; -} user_balloon_op_t; -/* END OF USER DEFINE */ - -/* Dead entry written into ballon-owned entries in the PMT. */ -#define DEAD 0xdeadbeef - -static struct proc_dir_entry *balloon_pde; -unsigned long credit; - -static inline pte_t *get_ptep(unsigned long addr) -{ - pgd_t *pgd; pmd_t *pmd; pte_t *ptep; - pgd = pgd_offset_k(addr); - - if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG(); - - pmd = pmd_offset(pgd, addr); - if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG(); - - ptep = pte_offset(pmd, addr); - - return ptep; -} - -/* main function for relinquishing bit of memory */ -static unsigned long inflate_balloon(unsigned long num_pages) -{ - dom_mem_op_t dom_mem_op; - unsigned long *parray; - unsigned long *currp; - unsigned long curraddr; - unsigned long ret = 0; - unsigned long vaddr; - unsigned long i, j; - - parray = (unsigned long *)kmalloc(num_pages * - sizeof(unsigned long), GFP_KERNEL); - currp = parray; - - for ( i = 0; i < num_pages; i++ ) - { - /* try to obtain a free page, has to be done with GFP_ATOMIC - * as we do not want to sleep indefinately. - */ - vaddr = __get_free_page(GFP_ATOMIC); - - /* if allocation fails, free all reserved pages */ - if(!vaddr){ - printk("Unable to inflate balloon by %ld, only %ld pages free.", - num_pages, i); - currp = parray; - for(j = 0; j < i; j++){ - free_page(*currp++); - } - goto cleanup; - } - - *currp++ = vaddr; - } - - - currp = parray; - for ( i = 0; i < num_pages; i++ ) - { - curraddr = *currp; - *currp = virt_to_machine(*currp) >> PAGE_SHIFT; - queue_l1_entry_update(get_ptep(curraddr), 0); - phys_to_machine_mapping[__pa(curraddr) >> PAGE_SHIFT] = DEAD; - currp++; - } - - XENO_flush_page_update_queue(); - - dom_mem_op.op = MEMOP_RESERVATION_DECREASE; - dom_mem_op.u.decrease.size = num_pages; - dom_mem_op.u.decrease.pages = parray; - if ( (ret = HYPERVISOR_dom_mem_op(&dom_mem_op)) != num_pages ) - { - printk("Unable to inflate balloon, error %lx\n", ret); - goto cleanup; - } - - credit += num_pages; - ret = num_pages; - - cleanup: - kfree(parray); - - return ret; -} - -/* install new mem pages obtained by deflate_balloon. function walks - * phys->machine mapping table looking for DEAD entries and populates - * them. - */ -static unsigned long process_new_pages(unsigned long * parray, - unsigned long num) -{ - /* currently, this function is rather simplistic as - * it is assumed that domain reclaims only number of - * pages previously released. this is to change soon - * and the code to extend page tables etc. will be - * incorporated here. - */ - - unsigned long tot_pages = start_info.nr_pages; - unsigned long * curr = parray; - unsigned long num_installed; - unsigned long i; - - num_installed = 0; - for ( i = 0; (i < tot_pages) && (num_installed < num); i++ ) - { - if ( phys_to_machine_mapping[i] == DEAD ) - { - phys_to_machine_mapping[i] = *curr; - queue_l1_entry_update( - (pte_t *)((i << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE), i); - queue_l1_entry_update( - get_ptep((unsigned long)__va(i << PAGE_SHIFT)), - ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL)); - - *curr = (unsigned long)__va(i << PAGE_SHIFT); - curr++; - num_installed++; - } - } - - /* now, this is tricky (and will also change for machine addrs that - * are mapped to not previously released addresses). we free pages - * that were allocated by get_free_page (the mappings are different - * now, of course). - */ - curr = parray; - for ( i = 0; i < num_installed; i++ ) - { - free_page(*curr); - curr++; - } - - return num_installed; -} - -unsigned long deflate_balloon(unsigned long num_pages) -{ - dom_mem_op_t dom_mem_op; - unsigned long ret; - unsigned long * parray; - - printk(KERN_ALERT "bd240 debug: deflate balloon called for %lx pages\n", num_pages); - - if ( num_pages > credit ) - { - printk("Can not allocate more pages than previously released.\n"); - return -EAGAIN; - } - - parray = (unsigned long *)kmalloc(num_pages * sizeof(unsigned long), - GFP_KERNEL); - - dom_mem_op.op = MEMOP_RESERVATION_INCREASE; - dom_mem_op.u.increase.size = num_pages; - dom_mem_op.u.increase.pages = parray; - if((ret = HYPERVISOR_dom_mem_op(&dom_mem_op)) != num_pages){ - printk("Unable to deflate balloon, error %lx\n", ret); - goto cleanup; - } - - if((ret = process_new_pages(parray, num_pages)) < num_pages){ - printk("Unable to deflate balloon by specified %lx pages, only %lx.\n", - num_pages, ret); - goto cleanup; - } - - ret = num_pages; - credit -= num_pages; - - cleanup: - kfree(parray); - - return ret; -} - -static int balloon_write(struct file *file, const char *buffer, - u_long count, void *data) -{ - user_balloon_op_t bop; - - /* Only admin can play with the balloon :) */ - if ( !capable(CAP_SYS_ADMIN) ) - return -EPERM; - - if ( copy_from_user(&bop, buffer, sizeof(bop)) ) - return -EFAULT; - - switch ( bop.op ) - { - case USER_INFLATE_BALLOON: - if ( inflate_balloon(bop.size) < bop.size ) - return -EAGAIN; - break; - - case USER_DEFLATE_BALLOON: - deflate_balloon(bop.size); - break; - - default: - printk("Unknown command to balloon driver."); - return -EFAULT; - } - - return sizeof(bop); -} - -/* - * main balloon driver initialization function. - */ -static int __init init_module(void) -{ - printk(KERN_ALERT "Starting Xeno Balloon driver\n"); - - credit = 0; - - balloon_pde = create_xeno_proc_entry("balloon", 0600); - if ( balloon_pde == NULL ) - { - printk(KERN_ALERT "Unable to create balloon driver proc entry!"); - return -1; - } - - balloon_pde->write_proc = balloon_write; - - return 0; -} - -static void __exit cleanup_module(void) -{ - if ( balloon_pde != NULL ) - { - remove_xeno_proc_entry("balloon"); - balloon_pde = NULL; - } -} - -module_init(init_module); -module_exit(cleanup_module); - - diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/Makefile b/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/Makefile deleted file mode 100644 index 35986ca54a..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := block.o vbd.o -include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/block.c b/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/block.c deleted file mode 100644 index c01a44d8f1..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/block.c +++ /dev/null @@ -1,621 +0,0 @@ -/****************************************************************************** - * block.c - * - * Xenolinux virtual block-device driver. - * - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge - */ - -#include "block.h" -#include -#include -#include -#include -#include - -#include - -typedef unsigned char byte; /* from linux/ide.h */ - -#define XLBLK_RESPONSE_IRQ HYPEREVENT_IRQ(_EVENT_BLKDEV) -#define XLBLK_UPDATE_IRQ HYPEREVENT_IRQ(_EVENT_VBD_UPD) -#define DEBUG_IRQ HYPEREVENT_IRQ(_EVENT_DEBUG) - -#define STATE_ACTIVE 0 -#define STATE_SUSPENDED 1 -#define STATE_CLOSED 2 -static unsigned int state = STATE_SUSPENDED; - -static blk_ring_t *blk_ring; -static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */ -static BLK_RING_IDX req_prod; /* Private request producer. */ - -/* We plug the I/O ring if the driver is suspended or if the ring is full. */ -#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \ - (state != STATE_ACTIVE)) - - -/* - * Request queues with outstanding work, but ring is currently full. - * We need no special lock here, as we always access this with the - * io_request_lock held. We only need a small maximum list. - */ -#define MAX_PENDING 8 -static request_queue_t *pending_queues[MAX_PENDING]; -static int nr_pending; - -static kdev_t sg_dev; -static int sg_operation = -1; -static unsigned long sg_next_sect; -#define DISABLE_SCATTERGATHER() (sg_operation = -1) - -static inline void signal_requests_to_xen(void) -{ - block_io_op_t op; - - DISABLE_SCATTERGATHER(); - blk_ring->req_prod = req_prod; - - op.cmd = BLOCK_IO_OP_SIGNAL; - HYPERVISOR_block_io_op(&op); - return; -} - - -/* - * xlblk_update_int/update-vbds_task - handle VBD update events from Xen - * - * Schedule a task for keventd to run, which will update the VBDs and perform - * the corresponding updates to our view of VBD state, so the XenoLinux will - * respond to changes / additions / deletions to the set of VBDs automatically. - */ -static struct tq_struct update_tq; -static void update_vbds_task(void *unused) -{ - xlvbd_update_vbds(); -} -static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs) -{ - update_tq.routine = update_vbds_task; - schedule_task(&update_tq); -} - - -int xenolinux_block_open(struct inode *inode, struct file *filep) -{ - short xldev = inode->i_rdev; - struct gendisk *gd = get_gendisk(xldev); - xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); - short minor = MINOR(xldev); - - if ( gd->part[minor].nr_sects == 0 ) - { - /* - * Device either doesn't exist, or has zero capacity; we use a few - * cheesy heuristics to return the relevant error code - */ - if ( (gd->sizes[minor >> gd->minor_shift] != 0) || - ((minor & (gd->max_p - 1)) != 0) ) - { - /* - * We have a real device, but no such partition, or we just have a - * partition number so guess this is the problem. - */ - return -ENXIO; /* no such device or address */ - } - else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE ) - { - /* This is a removable device => assume that media is missing. */ - return -ENOMEDIUM; /* media not present (this is a guess) */ - } - else - { - /* Just go for the general 'no such device' error. */ - return -ENODEV; /* no such device */ - } - } - - /* Update of usage count is protected by per-device semaphore. */ - disk->usage++; - - return 0; -} - - -int xenolinux_block_release(struct inode *inode, struct file *filep) -{ - xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); - - /* - * When usage drops to zero it may allow more VBD updates to occur. - * Update of usage count is protected by a per-device semaphore. - */ - if ( --disk->usage == 0 ) - { - update_tq.routine = update_vbds_task; - schedule_task(&update_tq); - } - - return 0; -} - - -int xenolinux_block_ioctl(struct inode *inode, struct file *filep, - unsigned command, unsigned long argument) -{ - kdev_t dev = inode->i_rdev; - struct hd_geometry *geo = (struct hd_geometry *)argument; - struct gendisk *gd; - struct hd_struct *part; - int i; - - /* NB. No need to check permissions. That is done for us. */ - - DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", - command, (long) argument, dev); - - gd = get_gendisk(dev); - part = &gd->part[MINOR(dev)]; - - switch ( command ) - { - case BLKGETSIZE: - DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); - return put_user(part->nr_sects, (unsigned long *) argument); - - case BLKGETSIZE64: - DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64, - (u64)part->nr_sects * 512); - return put_user((u64)part->nr_sects * 512, (u64 *) argument); - - case BLKRRPART: /* re-read partition table */ - DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); - return xenolinux_block_revalidate(dev); - - case BLKSSZGET: - return hardsect_size[MAJOR(dev)][MINOR(dev)]; - - case BLKBSZGET: /* get block size */ - DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET); - break; - - case BLKBSZSET: /* set block size */ - DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET); - break; - - case BLKRASET: /* set read-ahead */ - DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET); - break; - - case BLKRAGET: /* get read-ahead */ - DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET); - break; - - case HDIO_GETGEO: - /* note: these values are complete garbage */ - DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO); - if (!argument) return -EINVAL; - if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; - if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; - if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; - if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT; - return 0; - - case HDIO_GETGEO_BIG: - /* note: these values are complete garbage */ - DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); - if (!argument) return -EINVAL; - if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; - if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; - if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; - if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT; - return 0; - - case CDROMMULTISESSION: - DPRINTK("FIXME: support multisession CDs later\n"); - for ( i = 0; i < sizeof(struct cdrom_multisession); i++ ) - if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT; - return 0; - - case SCSI_IOCTL_GET_BUS_NUMBER: - DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in Xen blkdev"); - return -ENOSYS; - - default: - printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", command); - return -ENOSYS; - } - - return 0; -} - -/* check media change: should probably do something here in some cases :-) */ -int xenolinux_block_check(kdev_t dev) -{ - DPRINTK("xenolinux_block_check\n"); - return 0; -} - -int xenolinux_block_revalidate(kdev_t dev) -{ - struct block_device *bd; - struct gendisk *gd; - xl_disk_t *disk; - unsigned long capacity; - int i, rc = 0; - - if ( (bd = bdget(dev)) == NULL ) - return -EINVAL; - - /* - * Update of partition info, and check of usage count, is protected - * by the per-block-device semaphore. - */ - down(&bd->bd_sem); - - if ( ((gd = get_gendisk(dev)) == NULL) || - ((disk = xldev_to_xldisk(dev)) == NULL) || - ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) ) - { - rc = -EINVAL; - goto out; - } - - if ( disk->usage > 1 ) - { - rc = -EBUSY; - goto out; - } - - /* Only reread partition table if VBDs aren't mapped to partitions. */ - if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) ) - { - for ( i = gd->max_p - 1; i >= 0; i-- ) - { - invalidate_device(dev+i, 1); - gd->part[MINOR(dev+i)].start_sect = 0; - gd->part[MINOR(dev+i)].nr_sects = 0; - gd->sizes[MINOR(dev+i)] = 0; - } - - grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity); - } - - out: - up(&bd->bd_sem); - bdput(bd); - return rc; -} - - -/* - * hypervisor_request - * - * request block io - * - * id: for guest use only. - * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*} - * buffer: buffer to read/write into. this should be a - * virtual address in the guest os. - */ -static int hypervisor_request(unsigned long id, - int operation, - char * buffer, - unsigned long sector_number, - unsigned short nr_sectors, - kdev_t device) -{ - unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); - struct gendisk *gd; - blk_ring_req_entry_t *req; - struct buffer_head *bh; - - if ( unlikely(nr_sectors >= (1<<9)) ) - BUG(); - if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) ) - BUG(); - - if ( unlikely(state == STATE_CLOSED) ) - return 1; - - switch ( operation ) - { - - case XEN_BLOCK_READ: - case XEN_BLOCK_WRITE: - gd = get_gendisk(device); - - /* - * Update the sector_number we'll pass down as appropriate; note that - * we could sanity check that resulting sector will be in this - * partition, but this will happen in xen anyhow. - */ - sector_number += gd->part[MINOR(device)].start_sect; - - /* - * If this unit doesn't consist of virtual (i.e., Xen-specified) - * partitions then we clear the partn bits from the device number. - */ - if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & - GENHD_FL_VIRT_PARTNS) ) - device &= ~(gd->max_p - 1); - - if ( (sg_operation == operation) && - (sg_dev == device) && - (sg_next_sect == sector_number) ) - { - req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req; - bh = (struct buffer_head *)id; - bh->b_reqnext = (struct buffer_head *)req->id; - req->id = id; - req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors; - if ( ++req->nr_segments < MAX_BLK_SEGS ) - sg_next_sect += nr_sectors; - else - DISABLE_SCATTERGATHER(); - return 0; - } - else if ( RING_PLUGGED ) - { - return 1; - } - else - { - sg_operation = operation; - sg_dev = device; - sg_next_sect = sector_number + nr_sectors; - } - break; - - default: - panic("unknown op %d\n", operation); - } - - /* Fill out a communications ring structure. */ - req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req; - req->id = id; - req->operation = operation; - req->sector_number = (xen_sector_t)sector_number; - req->device = device; - req->nr_segments = 1; - req->buffer_and_sects[0] = buffer_ma | nr_sectors; - req_prod++; - - return 0; -} - - -/* - * do_xlblk_request - * read a block; request is in a request queue - */ -void do_xlblk_request(request_queue_t *rq) -{ - struct request *req; - struct buffer_head *bh, *next_bh; - int rw, nsect, full, queued = 0; - - DPRINTK("xlblk.c::do_xlblk_request\n"); - - while ( !rq->plugged && !list_empty(&rq->queue_head)) - { - if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) - goto out; - - DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", - req, req->cmd, req->sector, - req->current_nr_sectors, req->nr_sectors, req->bh); - - rw = req->cmd; - if ( rw == READA ) - rw = READ; - if ( unlikely((rw != READ) && (rw != WRITE)) ) - panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw); - - req->errors = 0; - - bh = req->bh; - while ( bh != NULL ) - { - next_bh = bh->b_reqnext; - bh->b_reqnext = NULL; - - full = hypervisor_request( - (unsigned long)bh, - (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, - bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); - - if ( full ) - { - bh->b_reqnext = next_bh; - pending_queues[nr_pending++] = rq; - if ( unlikely(nr_pending >= MAX_PENDING) ) - BUG(); - goto out; - } - - queued++; - - /* Dequeue the buffer head from the request. */ - nsect = bh->b_size >> 9; - bh = req->bh = next_bh; - - if ( bh != NULL ) - { - /* There's another buffer head to do. Update the request. */ - req->hard_sector += nsect; - req->hard_nr_sectors -= nsect; - req->sector = req->hard_sector; - req->nr_sectors = req->hard_nr_sectors; - req->current_nr_sectors = bh->b_size >> 9; - req->buffer = bh->b_data; - } - else - { - /* That was the last buffer head. Finalise the request. */ - if ( unlikely(end_that_request_first(req, 1, "XenBlk")) ) - BUG(); - blkdev_dequeue_request(req); - end_that_request_last(req); - } - } - } - - out: - if ( queued != 0 ) signal_requests_to_xen(); -} - - -static void kick_pending_request_queues(void) -{ - /* We kick pending request queues if the ring is reasonably empty. */ - if ( (nr_pending != 0) && - ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) ) - { - /* Attempt to drain the queue, but bail if the ring becomes full. */ - while ( (nr_pending != 0) && !RING_PLUGGED ) - do_xlblk_request(pending_queues[--nr_pending]); - } -} - - -static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs) -{ - BLK_RING_IDX i; - unsigned long flags; - struct buffer_head *bh, *next_bh; - - if ( unlikely(state == STATE_CLOSED) ) - return; - - spin_lock_irqsave(&io_request_lock, flags); - - for ( i = resp_cons; i != blk_ring->resp_prod; i++ ) - { - blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp; - switch ( bret->operation ) - { - case XEN_BLOCK_READ: - case XEN_BLOCK_WRITE: - if ( unlikely(bret->status != 0) ) - DPRINTK("Bad return from blkdev data request: %lx\n", - bret->status); - for ( bh = (struct buffer_head *)bret->id; - bh != NULL; - bh = next_bh ) - { - next_bh = bh->b_reqnext; - bh->b_reqnext = NULL; - bh->b_end_io(bh, !bret->status); - } - break; - - default: - BUG(); - } - } - - resp_cons = i; - - kick_pending_request_queues(); - - spin_unlock_irqrestore(&io_request_lock, flags); -} - - -static void reset_xlblk_interface(void) -{ - block_io_op_t op; - - nr_pending = 0; - - op.cmd = BLOCK_IO_OP_RESET; - if ( HYPERVISOR_block_io_op(&op) != 0 ) - printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n"); - - op.cmd = BLOCK_IO_OP_RING_ADDRESS; - (void)HYPERVISOR_block_io_op(&op); - - set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT); - blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE); - blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0; - - wmb(); - state = STATE_ACTIVE; -} - - -int __init xlblk_init(void) -{ - int error; - - reset_xlblk_interface(); - - error = request_irq(XLBLK_RESPONSE_IRQ, xlblk_response_int, - SA_SAMPLE_RANDOM, "blkdev", NULL); - if ( error ) - { - printk(KERN_ALERT "Could not allocate receive interrupt\n"); - goto fail; - } - - error = request_irq(XLBLK_UPDATE_IRQ, xlblk_update_int, - SA_INTERRUPT, "blkdev", NULL); - - if ( error ) - { - printk(KERN_ALERT "Could not allocate block update interrupt\n"); - goto fail; - } - - (void)xlvbd_init(); - - return 0; - - fail: - return error; -} - - -static void __exit xlblk_cleanup(void) -{ - xlvbd_cleanup(); - free_irq(XLBLK_RESPONSE_IRQ, NULL); - free_irq(XLBLK_UPDATE_IRQ, NULL); -} - - -#ifdef MODULE -module_init(xlblk_init); -module_exit(xlblk_cleanup); -#endif - - -void blkdev_suspend(void) -{ - state = STATE_SUSPENDED; - wmb(); - - while ( resp_cons != blk_ring->req_prod ) - { - barrier(); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1); - } - - wmb(); - state = STATE_CLOSED; - wmb(); - - clear_fixmap(FIX_BLKRING_BASE); -} - - -void blkdev_resume(void) -{ - reset_xlblk_interface(); - spin_lock_irq(&io_request_lock); - kick_pending_request_queues(); - spin_unlock_irq(&io_request_lock); -} diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/block.h b/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/block.h deleted file mode 100644 index ef8c241387..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/block.h +++ /dev/null @@ -1,82 +0,0 @@ -/****************************************************************************** - * block.h - * - * Shared definitions between all levels of XenoLinux Virtual block devices. - */ - -#ifndef __XENO_DRIVERS_BLOCK_H__ -#define __XENO_DRIVERS_BLOCK_H__ - -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#if 0 -#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a ) -#else -#define DPRINTK(_f, _a...) ((void)0) -#endif - -#if 0 -#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a ) -#else -#define DPRINTK_IOCTL(_f, _a...) ((void)0) -#endif - -/* Private gendisk->flags[] values. */ -#define GENHD_FL_XENO 2 /* Is unit a Xen block device? */ -#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */ - -/* - * We have one of these per vbd, whether ide, scsi or 'other'. - * They hang in an array off the gendisk structure. We may end up putting - * all kinds of interesting stuff here :-) - */ -typedef struct xl_disk { - int usage; -} xl_disk_t; - -extern int xenolinux_control_msg(int operration, char *buffer, int size); -extern int xenolinux_block_open(struct inode *inode, struct file *filep); -extern int xenolinux_block_release(struct inode *inode, struct file *filep); -extern int xenolinux_block_ioctl(struct inode *inode, struct file *filep, - unsigned command, unsigned long argument); -extern int xenolinux_block_check(kdev_t dev); -extern int xenolinux_block_revalidate(kdev_t dev); -extern void do_xlblk_request (request_queue_t *rq); - -extern void xlvbd_update_vbds(void); - -static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev) -{ - struct gendisk *gd = get_gendisk(xldev); - - if ( gd == NULL ) - return NULL; - - return (xl_disk_t *)gd->real_devices + - (MINOR(xldev) >> gd->minor_shift); -} - - -/* Virtual block-device subsystem. */ -extern int xlvbd_init(void); -extern void xlvbd_cleanup(void); - -#endif /* __XENO_DRIVERS_BLOCK_H__ */ diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/vbd.c b/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/vbd.c deleted file mode 100644 index e3473dab28..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/vbd.c +++ /dev/null @@ -1,561 +0,0 @@ -/****************************************************************************** - * vbd.c - * - * Xenolinux virtual block-device driver (xvd). - * - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge - */ - -#include "block.h" -#include - -/* - * For convenience we distinguish between ide, scsi and 'other' (i.e. - * potentially combinations of the two) in the naming scheme and in a few - * other places (like default readahead, etc). - */ -#define XLIDE_MAJOR_NAME "hd" -#define XLSCSI_MAJOR_NAME "sd" -#define XLVBD_MAJOR_NAME "xvd" - -#define XLIDE_DEVS_PER_MAJOR 2 -#define XLSCSI_DEVS_PER_MAJOR 16 -#define XLVBD_DEVS_PER_MAJOR 16 - -#define XLIDE_PARTN_SHIFT 6 /* amount to shift minor to get 'real' minor */ -#define XLIDE_MAX_PART (1 << XLIDE_PARTN_SHIFT) /* minors per ide vbd */ - -#define XLSCSI_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */ -#define XLSCSI_MAX_PART (1 << XLSCSI_PARTN_SHIFT) /* minors per scsi vbd */ - -#define XLVBD_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */ -#define XLVBD_MAX_PART (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */ - -/* The below are for the generic drivers/block/ll_rw_block.c code. */ -static int xlide_blksize_size[256]; -static int xlide_hardsect_size[256]; -static int xlide_max_sectors[256]; -static int xlscsi_blksize_size[256]; -static int xlscsi_hardsect_size[256]; -static int xlscsi_max_sectors[256]; -static int xlvbd_blksize_size[256]; -static int xlvbd_hardsect_size[256]; -static int xlvbd_max_sectors[256]; - -/* Information from Xen about our VBDs. */ -#define MAX_VBDS 64 -static int nr_vbds; -static xen_disk_t *vbd_info; - -static struct block_device_operations xlvbd_block_fops = -{ - open: xenolinux_block_open, - release: xenolinux_block_release, - ioctl: xenolinux_block_ioctl, - check_media_change: xenolinux_block_check, - revalidate: xenolinux_block_revalidate, -}; - -static int xlvbd_get_vbd_info(xen_disk_t *disk_info) -{ - int error; - block_io_op_t op; - - /* Probe for disk information. */ - memset(&op, 0, sizeof(op)); - op.cmd = BLOCK_IO_OP_VBD_PROBE; - op.u.probe_params.domain = 0; - op.u.probe_params.xdi.max = MAX_VBDS; - op.u.probe_params.xdi.disks = disk_info; - op.u.probe_params.xdi.count = 0; - - if ( (error = HYPERVISOR_block_io_op(&op)) != 0 ) - { - printk(KERN_ALERT "Could not probe disks (%d)\n", error); - return -1; - } - - return op.u.probe_params.xdi.count; -} - -/* - * xlvbd_init_device - initialise a VBD device - * @disk: a xen_disk_t describing the VBD - * - * Takes a xen_disk_t * that describes a VBD the domain has access to. - * Performs appropriate initialisation and registration of the device. - * - * Care needs to be taken when making re-entrant calls to ensure that - * corruption does not occur. Also, devices that are in use should not have - * their details updated. This is the caller's responsibility. - */ -static int xlvbd_init_device(xen_disk_t *xd) -{ - int device = xd->device; - int major = MAJOR(device); - int minor = MINOR(device); - int is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */ - int is_scsi= SCSI_BLK_MAJOR(major); /* is this a scsi device? */ - char *major_name; - struct gendisk *gd; - struct block_device *bd; - xl_disk_t *disk; - int i, rc = 0, max_part, partno; - unsigned long capacity; - - unsigned char buf[64]; - - if ( (bd = bdget(device)) == NULL ) - return -1; - - /* - * Update of partition info, and check of usage count, is protected - * by the per-block-device semaphore. - */ - down(&bd->bd_sem); - - if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) ) - { - printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device); - rc = -1; - goto out; - } - - if ( is_ide ) { - - major_name = XLIDE_MAJOR_NAME; - max_part = XLIDE_MAX_PART; - - } else if ( is_scsi ) { - - major_name = XLSCSI_MAJOR_NAME; - max_part = XLSCSI_MAX_PART; - - } else if (XD_VIRTUAL(xd->info)) { - - major_name = XLVBD_MAJOR_NAME; - max_part = XLVBD_MAX_PART; - - } else { - - /* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */ - printk(KERN_ALERT "Assuming device %02x:%02x is CCISS/SCSI\n", - major, minor); - is_scsi = 1; - major_name = "cciss"; - max_part = XLSCSI_MAX_PART; - - } - - partno = minor & (max_part - 1); - - if ( (gd = get_gendisk(device)) == NULL ) - { - rc = register_blkdev(major, major_name, &xlvbd_block_fops); - if ( rc < 0 ) - { - printk(KERN_ALERT "XL VBD: can't get major %d\n", major); - goto out; - } - - if ( is_ide ) - { - blksize_size[major] = xlide_blksize_size; - hardsect_size[major] = xlide_hardsect_size; - max_sectors[major] = xlide_max_sectors; - read_ahead[major] = 8; /* from drivers/ide/ide-probe.c */ - } - else if ( is_scsi ) - { - blksize_size[major] = xlscsi_blksize_size; - hardsect_size[major] = xlscsi_hardsect_size; - max_sectors[major] = xlscsi_max_sectors; - read_ahead[major] = 0; /* XXX 8; -- guessing */ - } - else - { - blksize_size[major] = xlvbd_blksize_size; - hardsect_size[major] = xlvbd_hardsect_size; - max_sectors[major] = xlvbd_max_sectors; - read_ahead[major] = 8; - } - - blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request); - - /* - * Turn off barking 'headactive' mode. We dequeue buffer heads as - * soon as we pass them down to Xen. - */ - blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0); - - /* Construct an appropriate gendisk structure. */ - gd = kmalloc(sizeof(struct gendisk), GFP_KERNEL); - gd->major = major; - gd->major_name = major_name; - - gd->max_p = max_part; - if ( is_ide ) - { - gd->minor_shift = XLIDE_PARTN_SHIFT; - gd->nr_real = XLIDE_DEVS_PER_MAJOR; - } - else if ( is_scsi ) - { - gd->minor_shift = XLSCSI_PARTN_SHIFT; - gd->nr_real = XLSCSI_DEVS_PER_MAJOR; - } - else - { - gd->minor_shift = XLVBD_PARTN_SHIFT; - gd->nr_real = XLVBD_DEVS_PER_MAJOR; - } - - /* - ** The sizes[] and part[] arrays hold the sizes and other - ** information about every partition with this 'major' (i.e. - ** every disk sharing the 8 bit prefix * max partns per disk) - */ - gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL); - gd->part = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), - GFP_KERNEL); - memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int)); - memset(gd->part, 0, max_part * gd->nr_real - * sizeof(struct hd_struct)); - - - gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), - GFP_KERNEL); - memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t)); - - gd->next = NULL; - gd->fops = &xlvbd_block_fops; - - gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), - GFP_KERNEL); - gd->flags = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL); - - memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr)); - memset(gd->flags, 0, gd->nr_real * sizeof(*gd->flags)); - - add_gendisk(gd); - - blk_size[major] = gd->sizes; - } - - if ( XD_READONLY(xd->info) ) - set_device_ro(device, 1); - - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XENO; - - /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */ - capacity = (unsigned long)xd->capacity; - - if ( partno != 0 ) - { - /* - * If this was previously set up as a real disc we will have set - * up partition-table information. Virtual partitions override - * 'real' partitions, and the two cannot coexist on a device. - */ - if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) && - (gd->sizes[minor & ~(max_part-1)] != 0) ) - { - /* - * Any non-zero sub-partition entries must be cleaned out before - * installing 'virtual' partition entries. The two types cannot - * coexist, and virtual partitions are favoured. - */ - kdev_t dev = device & ~(max_part-1); - for ( i = max_part - 1; i > 0; i-- ) - { - invalidate_device(dev+i, 1); - gd->part[MINOR(dev+i)].start_sect = 0; - gd->part[MINOR(dev+i)].nr_sects = 0; - gd->sizes[MINOR(dev+i)] = 0; - } - printk(KERN_ALERT - "Virtual partitions found for /dev/%s - ignoring any " - "real partition information we may have found.\n", - disk_name(gd, MINOR(device), buf)); - } - - /* Need to skankily setup 'partition' information */ - gd->part[minor].start_sect = 0; - gd->part[minor].nr_sects = capacity; - gd->sizes[minor] = capacity; - - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; - } - else - { - gd->part[minor].nr_sects = capacity; - gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9); - - /* Some final fix-ups depending on the device type */ - switch ( XD_TYPE(xd->info) ) - { - case XD_TYPE_CDROM: - case XD_TYPE_FLOPPY: - case XD_TYPE_TAPE: - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; - printk(KERN_ALERT - "Skipping partition check on %s /dev/%s\n", - XD_TYPE(xd->info)==XD_TYPE_CDROM ? "cdrom" : - (XD_TYPE(xd->info)==XD_TYPE_TAPE ? "tape" : - "floppy"), disk_name(gd, MINOR(device), buf)); - break; - - case XD_TYPE_DISK: - /* Only check partitions on real discs (not virtual!). */ - if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS ) - { - printk(KERN_ALERT - "Skipping partition check on virtual /dev/%s\n", - disk_name(gd, MINOR(device), buf)); - break; - } - register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity); - break; - - default: - printk(KERN_ALERT "XenoLinux: unknown device type %d\n", - XD_TYPE(xd->info)); - break; - } - } - - out: - up(&bd->bd_sem); - bdput(bd); - return rc; -} - - -/* - * xlvbd_remove_device - remove a device node if possible - * @device: numeric device ID - * - * Updates the gendisk structure and invalidates devices. - * - * This is OK for now but in future, should perhaps consider where this should - * deallocate gendisks / unregister devices. - */ -static int xlvbd_remove_device(int device) -{ - int i, rc = 0, minor = MINOR(device); - struct gendisk *gd; - struct block_device *bd; - xl_disk_t *disk = NULL; - - if ( (bd = bdget(device)) == NULL ) - return -1; - - /* - * Update of partition info, and check of usage count, is protected - * by the per-block-device semaphore. - */ - down(&bd->bd_sem); - - if ( ((gd = get_gendisk(device)) == NULL) || - ((disk = xldev_to_xldisk(device)) == NULL) ) - BUG(); - - if ( disk->usage != 0 ) - { - printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device); - rc = -1; - goto out; - } - - if ( (minor & (gd->max_p-1)) != 0 ) - { - /* 1: The VBD is mapped to a partition rather than a whole unit. */ - invalidate_device(device, 1); - gd->part[minor].start_sect = 0; - gd->part[minor].nr_sects = 0; - gd->sizes[minor] = 0; - - /* Clear the consists-of-virtual-partitions flag if possible. */ - gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS; - for ( i = 1; i < gd->max_p; i++ ) - if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 ) - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; - - /* - * If all virtual partitions are now gone, and a 'whole unit' VBD is - * present, then we can try to grok the unit's real partition table. - */ - if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) && - (gd->sizes[minor & ~(gd->max_p-1)] != 0) && - !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) ) - { - register_disk(gd, - device&~(gd->max_p-1), - gd->max_p, - &xlvbd_block_fops, - gd->part[minor&~(gd->max_p-1)].nr_sects); - } - } - else - { - /* - * 2: The VBD is mapped to an entire 'unit'. Clear all partitions. - * NB. The partition entries are only cleared if there are no VBDs - * mapped to individual partitions on this unit. - */ - i = gd->max_p - 1; /* Default: clear subpartitions as well. */ - if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS ) - i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */ - while ( i >= 0 ) - { - invalidate_device(device+i, 1); - gd->part[minor+i].start_sect = 0; - gd->part[minor+i].nr_sects = 0; - gd->sizes[minor+i] = 0; - i--; - } - } - - out: - up(&bd->bd_sem); - bdput(bd); - return rc; -} - -/* - * xlvbd_update_vbds - reprobes the VBD status and performs updates driver - * state. The VBDs need to be updated in this way when the domain is - * initialised and also each time we receive an XLBLK_UPDATE event. - */ -void xlvbd_update_vbds(void) -{ - int i, j, k, old_nr, new_nr; - xen_disk_t *old_info, *new_info, *merged_info; - - old_info = vbd_info; - old_nr = nr_vbds; - - new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL); - if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 ) - { - kfree(new_info); - return; - } - - /* - * Final list maximum size is old list + new list. This occurs only when - * old list and new list do not overlap at all, and we cannot yet destroy - * VBDs in the old list because the usage counts are busy. - */ - merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL); - - /* @i tracks old list; @j tracks new list; @k tracks merged list. */ - i = j = k = 0; - - while ( (i < old_nr) && (j < new_nr) ) - { - if ( old_info[i].device < new_info[j].device ) - { - if ( xlvbd_remove_device(old_info[i].device) != 0 ) - memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); - i++; - } - else if ( old_info[i].device > new_info[j].device ) - { - if ( xlvbd_init_device(&new_info[j]) == 0 ) - memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); - j++; - } - else - { - if ( ((old_info[i].capacity == new_info[j].capacity) && - (old_info[i].info == new_info[j].info)) || - (xlvbd_remove_device(old_info[i].device) != 0) ) - memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); - else if ( xlvbd_init_device(&new_info[j]) == 0 ) - memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); - i++; j++; - } - } - - for ( ; i < old_nr; i++ ) - { - if ( xlvbd_remove_device(old_info[i].device) != 0 ) - memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); - } - - for ( ; j < new_nr; j++ ) - { - if ( xlvbd_init_device(&new_info[j]) == 0 ) - memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); - } - - vbd_info = merged_info; - nr_vbds = k; - - kfree(old_info); - kfree(new_info); -} - - -/* - * Set up all the linux device goop for the virtual block devices (vbd's) that - * xen tells us about. Note that although from xen's pov VBDs are addressed - * simply an opaque 16-bit device number, the domain creation tools - * conventionally allocate these numbers to correspond to those used by 'real' - * linux -- this is just for convenience as it means e.g. that the same - * /etc/fstab can be used when booting with or without xen. - */ -int __init xlvbd_init(void) -{ - int i; - - /* - * If compiled as a module, we don't support unloading yet. We therefore - * permanently increment the reference count to disallow it. - */ - SET_MODULE_OWNER(&xlvbd_block_fops); - MOD_INC_USE_COUNT; - - /* Initialize the global arrays. */ - for ( i = 0; i < 256; i++ ) - { - /* from the generic ide code (drivers/ide/ide-probe.c, etc) */ - xlide_blksize_size[i] = 1024; - xlide_hardsect_size[i] = 512; - xlide_max_sectors[i] = 128; /* 'hwif->rqsize' if we knew it */ - - /* from the generic scsi disk code (drivers/scsi/sd.c) */ - xlscsi_blksize_size[i] = 1024; /* XXX 512; */ - xlscsi_hardsect_size[i] = 512; - xlscsi_max_sectors[i] = 128*8; /* XXX 128; */ - - /* we don't really know what to set these too since it depends */ - xlvbd_blksize_size[i] = 512; - xlvbd_hardsect_size[i] = 512; - xlvbd_max_sectors[i] = 128; - } - - vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL); - nr_vbds = xlvbd_get_vbd_info(vbd_info); - - if ( nr_vbds < 0 ) - { - kfree(vbd_info); - vbd_info = NULL; - nr_vbds = 0; - } - else - { - for ( i = 0; i < nr_vbds; i++ ) - xlvbd_init_device(&vbd_info[i]); - } - - return 0; -} - - -#ifdef MODULE -module_init(xlvbd_init); -#endif diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/console/Makefile b/xenolinux-2.4.25-sparse/arch/xeno/drivers/console/Makefile deleted file mode 100644 index aaa546a8f3..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/console/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-$(CONFIG_XEN_CONSOLE) := console.o -include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/console/console.c b/xenolinux-2.4.25-sparse/arch/xeno/drivers/console/console.c deleted file mode 100644 index bd9a546980..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/console/console.c +++ /dev/null @@ -1,508 +0,0 @@ -/****************************************************************************** - * console.c - * - * Virtual console driver. - * - * Copyright (c) 2002-2004, K A Fraser. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static spinlock_t xeno_console_lock = SPIN_LOCK_UNLOCKED; - -#define XENO_TTY_MINOR 123 - -/******************** Kernel console driver ********************************/ - -static void nonpriv_conwrite(const char *s, unsigned int count) -{ - control_if_t *ctrl_if; - evtchn_op_t evtchn_op; - int src, dst, p; - - ctrl_if = (control_if_t *)((char *)HYPERVISOR_shared_info + 2048); - - while ( count != 0 ) - { - /* Wait for the request ring to drain. */ - while ( ctrl_if->tx_resp_prod != ctrl_if->tx_req_prod ) - barrier(); - - p = MASK_CONTROL_IDX(ctrl_if->tx_req_prod); - - ctrl_if->tx_ring[p].type = CMSG_CONSOLE; - ctrl_if->tx_ring[p].subtype = CMSG_CONSOLE_DATA; - ctrl_if->tx_ring[p].id = 0xaa; - src = dst = 0; - while ( (src < count) && (dst < (sizeof(ctrl_if->tx_ring[p].msg)-1)) ) - { - if ( (ctrl_if->tx_ring[p].msg[dst++] = s[src++]) == '\n' ) - ctrl_if->tx_ring[p].msg[dst++] = '\r'; - } - ctrl_if->tx_ring[p].length = dst; - - ctrl_if->tx_req_prod++; - evtchn_op.cmd = EVTCHNOP_send; - evtchn_op.u.send.local_port = 0; - (void)HYPERVISOR_event_channel_op(&evtchn_op); - - s += src; - count -= src; - } -} - -static void priv_conwrite(const char *s, unsigned int count) -{ - int rc; - - while ( count > 0 ) - { - if ( (rc = HYPERVISOR_console_io(CONSOLEIO_write, count, s)) > 0 ) - { - count -= rc; - s += rc; - } - } -} - -static void xen_console_write(struct console *co, const char *s, - unsigned int count) -{ - unsigned long flags; - spin_lock_irqsave(&xeno_console_lock, flags); - if ( !(start_info.flags & SIF_INITDOMAIN) ) - nonpriv_conwrite(s, count); - else - priv_conwrite(s, count); - spin_unlock_irqrestore(&xeno_console_lock, flags); -} - -static kdev_t xen_console_device(struct console *c) -{ - /* - * This is the magic that binds our "struct console" to our - * "tty_struct", defined below. - */ - return MKDEV(TTY_MAJOR, XENO_TTY_MINOR); -} - -static struct console xen_console_info = { - name: "xencons", /* Used to be xen_console, but we're only - actually allowed 8 charcters including - the terminator... */ - write: xen_console_write, - device: xen_console_device, - flags: CON_PRINTBUFFER, - index: -1, -}; - -void xen_console_init(void) -{ - register_console(&xen_console_info); -} - - -/*** Useful function for console debugging -- goes straight to Xen ****/ -asmlinkage int xprintk(const char *fmt, ...) -{ - va_list args; - int printk_len; - static char printk_buf[1024]; - - /* Emit the output into the temporary buffer */ - va_start(args, fmt); - printk_len = vsnprintf(printk_buf, sizeof(printk_buf), fmt, args); - va_end(args); - - /* Send the processed output directly to Xen. */ - xen_console_write(NULL, printk_buf, printk_len); - - return 0; -} - - -/******************** User-space console driver (/dev/console) ************/ - -static struct tty_driver xeno_console_driver; -static int xeno_console_refcount; -static struct tty_struct *xeno_console_table[1]; -static struct termios *xeno_console_termios[1]; -static struct termios *xeno_console_termios_locked[1]; -static struct tty_struct *xeno_console_tty; - -#define WBUF_SIZE 1024 -#define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1)) -static char wbuf[WBUF_SIZE], x_char; -static unsigned int wc, wp; /* write_cons, write_prod */ - -static void __do_console_io(void) -{ - control_if_t *ctrl_if; - control_msg_t *msg; - evtchn_op_t evtchn_op; - CONTROL_RING_IDX c; - int i, l, work_done = 0; - static char rbuf[16]; - - if ( xeno_console_tty == NULL ) - return; - - /* Special-case I/O handling for domain 0. */ - if ( start_info.flags & SIF_INITDOMAIN ) - { - /* Receive work. */ - while ( (l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0 ) - for ( i = 0; i < l; i++ ) - tty_insert_flip_char(xeno_console_tty, rbuf[i], 0); - if ( xeno_console_tty->flip.count != 0 ) - tty_flip_buffer_push(xeno_console_tty); - - /* Transmit work. */ - while ( wc != wp ) - { - l = wp - wc; - if ( l > (WBUF_SIZE - WBUF_MASK(wc)) ) - l = WBUF_SIZE - WBUF_MASK(wc); - priv_conwrite(&wbuf[WBUF_MASK(wc)], l); - wc += l; - wake_up_interruptible(&xeno_console_tty->write_wait); - if ( (xeno_console_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && - (xeno_console_tty->ldisc.write_wakeup != NULL) ) - (xeno_console_tty->ldisc.write_wakeup)(xeno_console_tty); - } - - return; - } - - /* Acknowledge the notification. */ - evtchn_clear_port(0); - - ctrl_if = (control_if_t *)((char *)HYPERVISOR_shared_info + 2048); - - /* Receive work. */ - for ( c = ctrl_if->rx_resp_prod; c != ctrl_if->rx_req_prod; c++ ) - { - msg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(c)]; - if ( (msg->type == CMSG_CONSOLE) && - (msg->subtype == CMSG_CONSOLE_DATA) ) - { - for ( i = 0; i < msg->length; i++ ) - tty_insert_flip_char(xeno_console_tty, msg->msg[i], 0); - } - msg->length = 0; - } - if ( ctrl_if->rx_resp_prod != c ) - { - ctrl_if->rx_resp_prod = c; - work_done = 1; - tty_flip_buffer_push(xeno_console_tty); - } - - /* Transmit work. */ - for ( c = ctrl_if->tx_req_prod; - (c - ctrl_if->tx_resp_prod) != CONTROL_RING_SIZE; - c++ ) - { - if ( (wc == wp) && (x_char == 0) ) - break; - msg = &ctrl_if->tx_ring[MASK_CONTROL_IDX(c)]; - msg->type = CMSG_CONSOLE; - msg->subtype = CMSG_CONSOLE_DATA; - msg->id = 0xaa; - l = 0; - if ( x_char != 0 ) /* Handle XON/XOFF urgently. */ - { - msg->msg[l++] = x_char; - x_char = 0; - } - while ( (l < sizeof(msg->msg)) && (wc != wp) ) - msg->msg[l++] = wbuf[WBUF_MASK(wc++)]; - msg->length = l; - } - if ( ctrl_if->tx_req_prod != c ) - { - ctrl_if->tx_req_prod = c; - work_done = 1; - /* There might be something for waiters to do. */ - wake_up_interruptible(&xeno_console_tty->write_wait); - if ( (xeno_console_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && - (xeno_console_tty->ldisc.write_wakeup != NULL) ) - (xeno_console_tty->ldisc.write_wakeup)(xeno_console_tty); - } - - if ( work_done ) - { - /* Send a notification to the controller. */ - evtchn_op.cmd = EVTCHNOP_send; - evtchn_op.u.send.local_port = 0; - (void)HYPERVISOR_event_channel_op(&evtchn_op); - } -} - -/* This is the callback entry point for domains != 0. */ -static void control_event(unsigned int port) -{ - unsigned long flags; - spin_lock_irqsave(&xeno_console_lock, flags); - __do_console_io(); - spin_unlock_irqrestore(&xeno_console_lock, flags); -} - -/* This is the callback entry point for domain 0. */ -static void control_irq(int irq, void *dev_id, struct pt_regs *regs) -{ - unsigned long flags; - spin_lock_irqsave(&xeno_console_lock, flags); - __do_console_io(); - spin_unlock_irqrestore(&xeno_console_lock, flags); -} - -static int xeno_console_write_room(struct tty_struct *tty) -{ - return WBUF_SIZE - (wp - wc); -} - -static int xeno_console_chars_in_buffer(struct tty_struct *tty) -{ - return wp - wc; -} - -static void xeno_console_send_xchar(struct tty_struct *tty, char ch) -{ - unsigned long flags; - spin_lock_irqsave(&xeno_console_lock, flags); - x_char = ch; - __do_console_io(); - spin_unlock_irqrestore(&xeno_console_lock, flags); -} - -static void xeno_console_throttle(struct tty_struct *tty) -{ - if ( I_IXOFF(tty) ) - xeno_console_send_xchar(tty, STOP_CHAR(tty)); -} - -static void xeno_console_unthrottle(struct tty_struct *tty) -{ - if ( I_IXOFF(tty) ) - { - if ( x_char != 0 ) - x_char = 0; - else - xeno_console_send_xchar(tty, START_CHAR(tty)); - } -} - -static void xeno_console_flush_buffer(struct tty_struct *tty) -{ - unsigned long flags; - spin_lock_irqsave(&xeno_console_lock, flags); - wc = wp = 0; - spin_unlock_irqrestore(&xeno_console_lock, flags); -} - -static inline int __xeno_console_put_char(int ch) -{ - char _ch = (char)ch; - if ( (wp - wc) == WBUF_SIZE ) - return 0; - wbuf[WBUF_MASK(wp++)] = _ch; - return 1; -} - -static int xeno_console_write(struct tty_struct *tty, int from_user, - const u_char * buf, int count) -{ - int i; - unsigned long flags; - - if ( from_user && verify_area(VERIFY_READ, buf, count) ) - return -EINVAL; - - spin_lock_irqsave(&xeno_console_lock, flags); - - for ( i = 0; i < count; i++ ) - { - char ch; - if ( from_user ) - __get_user(ch, buf + i); - else - ch = buf[i]; - if ( !__xeno_console_put_char(ch) ) - break; - } - - if ( i != 0 ) - __do_console_io(); - - spin_unlock_irqrestore(&xeno_console_lock, flags); - - return i; -} - -static void xeno_console_put_char(struct tty_struct *tty, u_char ch) -{ - unsigned long flags; - spin_lock_irqsave(&xeno_console_lock, flags); - (void)__xeno_console_put_char(ch); - spin_unlock_irqrestore(&xeno_console_lock, flags); -} - -static void xeno_console_flush_chars(struct tty_struct *tty) -{ - unsigned long flags; - spin_lock_irqsave(&xeno_console_lock, flags); - __do_console_io(); - spin_unlock_irqrestore(&xeno_console_lock, flags); -} - -static void xeno_console_wait_until_sent(struct tty_struct *tty, int timeout) -{ - unsigned long orig_jiffies = jiffies; - - while ( tty->driver.chars_in_buffer(tty) ) - { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - if ( signal_pending(current) ) - break; - if ( (timeout != 0) && time_after(jiffies, orig_jiffies + timeout) ) - break; - } - - set_current_state(TASK_RUNNING); -} - -static int xeno_console_open(struct tty_struct *tty, struct file *filp) -{ - int line; - unsigned long flags; - - MOD_INC_USE_COUNT; - line = MINOR(tty->device) - tty->driver.minor_start; - if ( line != 0 ) - { - MOD_DEC_USE_COUNT; - return -ENODEV; - } - - spin_lock_irqsave(&xeno_console_lock, flags); - tty->driver_data = NULL; - if ( xeno_console_tty == NULL ) - xeno_console_tty = tty; - __do_console_io(); - spin_unlock_irqrestore(&xeno_console_lock, flags); - - return 0; -} - -static void xeno_console_close(struct tty_struct *tty, struct file *filp) -{ - unsigned long flags; - - if ( tty->count == 1 ) - { - tty->closing = 1; - tty_wait_until_sent(tty, 0); - if ( tty->driver.flush_buffer != NULL ) - tty->driver.flush_buffer(tty); - if ( tty->ldisc.flush_buffer != NULL ) - tty->ldisc.flush_buffer(tty); - tty->closing = 0; - spin_lock_irqsave(&xeno_console_lock, flags); - xeno_console_tty = NULL; - spin_unlock_irqrestore(&xeno_console_lock, flags); - } - - MOD_DEC_USE_COUNT; -} - -int __init xeno_con_init(void) -{ - memset(&xeno_console_driver, 0, sizeof(struct tty_driver)); - xeno_console_driver.magic = TTY_DRIVER_MAGIC; - xeno_console_driver.name = "xencons"; - xeno_console_driver.major = TTY_MAJOR; - xeno_console_driver.minor_start = XENO_TTY_MINOR; - xeno_console_driver.num = 1; - xeno_console_driver.type = TTY_DRIVER_TYPE_SERIAL; - xeno_console_driver.subtype = SERIAL_TYPE_NORMAL; - xeno_console_driver.init_termios = tty_std_termios; - xeno_console_driver.flags = - TTY_DRIVER_REAL_RAW | TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_NO_DEVFS; - xeno_console_driver.refcount = &xeno_console_refcount; - xeno_console_driver.table = xeno_console_table; - xeno_console_driver.termios = xeno_console_termios; - xeno_console_driver.termios_locked = xeno_console_termios_locked; - - xeno_console_driver.open = xeno_console_open; - xeno_console_driver.close = xeno_console_close; - xeno_console_driver.write = xeno_console_write; - xeno_console_driver.write_room = xeno_console_write_room; - xeno_console_driver.put_char = xeno_console_put_char; - xeno_console_driver.flush_chars = xeno_console_flush_chars; - xeno_console_driver.chars_in_buffer = xeno_console_chars_in_buffer; - xeno_console_driver.send_xchar = xeno_console_send_xchar; - xeno_console_driver.flush_buffer = xeno_console_flush_buffer; - xeno_console_driver.throttle = xeno_console_throttle; - xeno_console_driver.unthrottle = xeno_console_unthrottle; - xeno_console_driver.wait_until_sent = xeno_console_wait_until_sent; - - if ( tty_register_driver(&xeno_console_driver) ) - panic("Couldn't register Xeno console driver\n"); - - if ( !(start_info.flags & SIF_INITDOMAIN) ) - { - if ( evtchn_request_port(0, control_event) != 0 ) - BUG(); - control_event(0); /* kickstart the console */ - } - else - { - request_irq(HYPEREVENT_IRQ(_EVENT_CONSOLE), - control_irq, 0, "console", NULL); - control_irq(0, NULL, NULL); /* kickstart the console */ - } - - printk("Xeno console successfully installed\n"); - - return 0; -} - -void __exit xeno_con_fini(void) -{ - int ret; - - ret = tty_unregister_driver(&xeno_console_driver); - if ( ret != 0 ) - printk(KERN_ERR "Unable to unregister Xeno console driver: %d\n", ret); - - if ( !(start_info.flags & SIF_INITDOMAIN) ) - (void)evtchn_free_port(0); -} - -module_init(xeno_con_init); -module_exit(xeno_con_fini); - diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/Makefile b/xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/Makefile deleted file mode 100644 index 3e2e17bd23..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := core.o vfr.o -include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/core.c b/xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/core.c deleted file mode 100644 index c7f1fd496b..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/core.c +++ /dev/null @@ -1,104 +0,0 @@ -/****************************************************************************** - * core.c - * - * Interface to privileged domain-0 commands. - * - * Copyright (c) 2002-2004, K A Fraser, B Dragovic - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -static struct proc_dir_entry *privcmd_intf; - -static int privcmd_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long data) -{ - int ret = 0; - - switch ( cmd ) - { - case IOCTL_PRIVCMD_HYPERCALL: - { - privcmd_hypercall_t hypercall; - - if ( copy_from_user(&hypercall, (void *)data, sizeof(hypercall)) ) - return -EFAULT; - - __asm__ __volatile__ ( - "pushl %%ebx; pushl %%ecx; pushl %%edx; pushl %%esi; pushl %%edi; " - "movl 4(%%eax),%%ebx ;" - "movl 8(%%eax),%%ecx ;" - "movl 12(%%eax),%%edx ;" - "movl 16(%%eax),%%esi ;" - "movl 20(%%eax),%%edi ;" - "movl (%%eax),%%eax ;" - TRAP_INSTR "; " - "popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx" - : "=a" (ret) : "0" (&hypercall) : "memory" ); - - } - break; - - default: - ret = -EINVAL; - break; - } - return ret; -} - - -static struct file_operations privcmd_file_ops = { - ioctl : privcmd_ioctl -}; - - -static int __init init_module(void) -{ - if ( !(start_info.flags & SIF_PRIVILEGED) ) - return 0; - - privcmd_intf = create_xeno_proc_entry("privcmd", 0400); - if ( privcmd_intf != NULL ) - { - privcmd_intf->owner = THIS_MODULE; - privcmd_intf->nlink = 1; - privcmd_intf->proc_fops = &privcmd_file_ops; - } - - return 0; -} - - -static void __exit cleanup_module(void) -{ - if ( privcmd_intf == NULL ) return; - remove_xeno_proc_entry("privcmd"); - privcmd_intf = NULL; -} - - -module_init(init_module); -module_exit(cleanup_module); diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/vfr.c b/xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/vfr.c deleted file mode 100644 index 56b491314b..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/vfr.c +++ /dev/null @@ -1,343 +0,0 @@ -/****************************************************************************** - * vfr.c - * - * Interface to the virtual firewall/router. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static struct proc_dir_entry *proc_vfr; - -static unsigned char readbuf[1024]; - -/* Helpers, implemented at the bottom. */ -u32 getipaddr(const char *buff, unsigned int len); -u16 antous(const char *buff, int len); -u64 antoull(const char *buff, int len); -int anton(const char *buff, int len); - -static int vfr_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - strcpy(page, readbuf); - *readbuf = '\0'; - *eof = 1; - *start = page; - return strlen(page); -} - -/* The format for the vfr interface is as follows: - * - * COMMAND = [= [...]] - * - * where: - * - * COMMAND = { ACCEPT | COUNT } - * - * field=val pairs are as follows: - * - * field = { srcaddr | dstaddr } - * val is a dot seperated, numeric IP address. - * - * field = { srcport | dstport } - * val is a (16-bit) unsigned int - * - * field = { proto } - * val = { IP | TCP | UDP | ARP } - * - */ - -#define isspace(_x) ( ((_x)==' ') || ((_x)=='\t') || ((_x)=='\v') || \ - ((_x)=='\f') || ((_x)=='\r') || ((_x)=='\n') ) - -static int vfr_write_proc(struct file *file, const char *buffer, - u_long count, void *data) -{ - network_op_t op; - int ret, len; - int ts, te, tl; // token start, end, and length - int fs, fe, fl; // field. - - len = count; - ts = te = 0; - - memset(&op, 0, sizeof(network_op_t)); - - // get the command: - while ( count && isspace(buffer[ts]) ) { ts++; count--; } // skip spaces. - te = ts; - while ( count && !isspace(buffer[te]) ) { te++; count--; } // command end - if ( te <= ts ) goto bad; - tl = te - ts; - - if ( strncmp(&buffer[ts], "ADD", tl) == 0 ) - { - op.cmd = NETWORK_OP_ADDRULE; - } - else if ( strncmp(&buffer[ts], "DELETE", tl) == 0 ) - { - op.cmd = NETWORK_OP_DELETERULE; - } - else if ( strncmp(&buffer[ts], "PRINT", tl) == 0 ) - { - op.cmd = NETWORK_OP_GETRULELIST; - goto doneparsing; - } - - ts = te; - - // get the action - while ( count && (buffer[ts] == ' ') ) { ts++; count--; } // skip spaces. - te = ts; - while ( count && (buffer[te] != ' ') ) { te++; count--; } // command end - if ( te <= ts ) goto bad; - tl = te - ts; - - if ( strncmp(&buffer[ts], "ACCEPT", tl) == 0 ) - { - op.u.net_rule.action = NETWORK_ACTION_ACCEPT; - goto keyval; - } - if ( strncmp(&buffer[ts], "COUNT", tl) == 0 ) - { - op.u.net_rule.action = NETWORK_ACTION_COUNT; - goto keyval; - } - - // default case; - return (len); - - - // get the key=val pairs. - keyval: - while (count) - { - //get field - ts = te; while ( count && isspace(buffer[ts]) ) { ts++; count--; } - te = ts; - while ( count && !isspace(buffer[te]) && (buffer[te] != '=') ) - { te++; count--; } - if ( te <= ts ) - goto doneparsing; - tl = te - ts; - fs = ts; fe = te; fl = tl; // save the field markers. - // skip " = " (ignores extra equals.) - while ( count && (isspace(buffer[te]) || (buffer[te] == '=')) ) - { te++; count--; } - ts = te; - while ( count && !isspace(buffer[te]) ) { te++; count--; } - tl = te - ts; - - if ( (fl <= 0) || (tl <= 0) ) goto bad; - - /* NB. Prefix matches must go first! */ - if (strncmp(&buffer[fs], "src", fl) == 0) - { - op.u.net_rule.src_dom = VIF_SPECIAL; - op.u.net_rule.src_idx = VIF_ANY_INTERFACE; - } - else if (strncmp(&buffer[fs], "dst", fl) == 0) - { - op.u.net_rule.dst_dom = VIF_SPECIAL; - op.u.net_rule.dst_idx = VIF_PHYSICAL_INTERFACE; - } - else if (strncmp(&buffer[fs], "srcaddr", fl) == 0) - { - op.u.net_rule.src_addr = getipaddr(&buffer[ts], tl); - } - else if (strncmp(&buffer[fs], "dstaddr", fl) == 0) - { - op.u.net_rule.dst_addr = getipaddr(&buffer[ts], tl); - } - else if (strncmp(&buffer[fs], "srcaddrmask", fl) == 0) - { - op.u.net_rule.src_addr_mask = getipaddr(&buffer[ts], tl); - } - else if (strncmp(&buffer[fs], "dstaddrmask", fl) == 0) - { - op.u.net_rule.dst_addr_mask = getipaddr(&buffer[ts], tl); - } - else if (strncmp(&buffer[fs], "srcport", fl) == 0) - { - op.u.net_rule.src_port = antous(&buffer[ts], tl); - } - else if (strncmp(&buffer[fs], "dstport", fl) == 0) - { - op.u.net_rule.dst_port = antous(&buffer[ts], tl); - } - else if (strncmp(&buffer[fs], "srcportmask", fl) == 0) - { - op.u.net_rule.src_port_mask = antous(&buffer[ts], tl); - } - else if (strncmp(&buffer[fs], "dstportmask", fl) == 0) - { - op.u.net_rule.dst_port_mask = antous(&buffer[ts], tl); - } - else if (strncmp(&buffer[fs], "srcdom", fl) == 0) - { - op.u.net_rule.src_dom = antoull(&buffer[ts], tl); - } - else if (strncmp(&buffer[fs], "srcidx", fl) == 0) - { - op.u.net_rule.src_idx = anton(&buffer[ts], tl); - } - else if (strncmp(&buffer[fs], "dstdom", fl) == 0) - { - op.u.net_rule.dst_dom = antoull(&buffer[ts], tl); - } - else if (strncmp(&buffer[fs], "dstidx", fl) == 0) - { - op.u.net_rule.dst_idx = anton(&buffer[ts], tl); - } - else if ( (strncmp(&buffer[fs], "proto", fl) == 0)) - { - if (strncmp(&buffer[ts], "any", tl) == 0) - op.u.net_rule.proto = NETWORK_PROTO_ANY; - if (strncmp(&buffer[ts], "ip", tl) == 0) - op.u.net_rule.proto = NETWORK_PROTO_IP; - if (strncmp(&buffer[ts], "tcp", tl) == 0) - op.u.net_rule.proto = NETWORK_PROTO_TCP; - if (strncmp(&buffer[ts], "udp", tl) == 0) - op.u.net_rule.proto = NETWORK_PROTO_UDP; - if (strncmp(&buffer[ts], "arp", tl) == 0) - op.u.net_rule.proto = NETWORK_PROTO_ARP; - } - } - - doneparsing: - ret = HYPERVISOR_network_op(&op); - return(len); - - bad: - return(len); - - -} - -static int __init init_module(void) -{ - if ( !(start_info.flags & SIF_PRIVILEGED) ) - return 0; - - *readbuf = '\0'; - proc_vfr = create_xeno_proc_entry("vfr", 0600); - if ( proc_vfr != NULL ) - { - proc_vfr->owner = THIS_MODULE; - proc_vfr->nlink = 1; - proc_vfr->read_proc = vfr_read_proc; - proc_vfr->write_proc = vfr_write_proc; - printk("Successfully installed virtual firewall/router interface\n"); - } - return 0; -} - -static void __exit cleanup_module(void) -{ - if ( proc_vfr == NULL ) return; - remove_xeno_proc_entry("vfr"); - proc_vfr = NULL; -} - -module_init(init_module); -module_exit(cleanup_module); - -/* Helper functions start here: */ - -int anton(const char *buff, int len) -{ - int ret; - char c; - int sign = 1; - - ret = 0; - - if (len == 0) return 0; - if (*buff == '-') { sign = -1; buff++; len--; } - - while ( (len) && ((c = *buff) >= '0') && (c <= '9') ) - { - ret *= 10; - ret += c - '0'; - buff++; len--; - } - - ret *= sign; - return ret; -} - -u16 antous(const char *buff, int len) -{ - u16 ret; - char c; - - ret = 0; - - while ( (len) && ((c = *buff) >= '0') && (c <= '9') ) - { - ret *= 10; - ret += c - '0'; - buff++; len--; - } - - return ret; -} - -u64 antoull(const char *buff, int len) -{ - u64 ret; - char c; - - ret = 0; - - while ( (len) && ((c = *buff) >= '0') && (c <= '9') ) - { - ret *= 10; - ret += c - '0'; - buff++; len--; - } - - return ret; -} - -u32 getipaddr(const char *buff, unsigned int len) -{ - char c; - u32 ret, val; - - ret = 0; val = 0; - - while ( len ) - { - if (!((((c = *buff) >= '0') && ( c <= '9')) || ( c == '.' ) ) ) - { - return(0); // malformed. - } - - if ( c == '.' ) { - if (val > 255) return (0); //malformed. - ret = ret << 8; - ret += val; - val = 0; - len--; buff++; - continue; - } - val *= 10; - val += c - '0'; - buff++; len--; - } - ret = ret << 8; - ret += val; - - return (ret); -} - diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/evtchn/Makefile b/xenolinux-2.4.25-sparse/arch/xeno/drivers/evtchn/Makefile deleted file mode 100644 index 61c983f625..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/evtchn/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := evtchn.o -include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/evtchn/evtchn.c b/xenolinux-2.4.25-sparse/arch/xeno/drivers/evtchn/evtchn.c deleted file mode 100644 index a7978ee8d2..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/evtchn/evtchn.c +++ /dev/null @@ -1,481 +0,0 @@ -/****************************************************************************** - * evtchn.c - * - * Xenolinux driver for receiving and demuxing event-channel signals. - * - * Copyright (c) 2004, K A Fraser - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* NB. This must be shared amongst drivers if more things go in /dev/xen */ -static devfs_handle_t xen_dev_dir; - -/* Only one process may open /dev/xen/evtchn at any time. */ -static unsigned long evtchn_dev_inuse; - -/* Notification ring, accessed via /dev/xen/evtchn. */ -#define RING_SIZE 2048 /* 2048 16-bit entries */ -#define RING_MASK(_i) ((_i)&(RING_SIZE-1)) -static u16 *ring; -static unsigned int ring_cons, ring_prod, ring_overflow; - -/* Processes wait on this queue via /dev/xen/evtchn when ring is empty. */ -static DECLARE_WAIT_QUEUE_HEAD(evtchn_wait); -static struct fasync_struct *evtchn_async_queue; - -static evtchn_receiver_t rx_fns[1024]; - -static u32 pend_outstanding[32]; -static u32 disc_outstanding[32]; - -static spinlock_t lock; - -int evtchn_request_port(unsigned int port, evtchn_receiver_t rx_fn) -{ - unsigned long flags; - int rc; - - spin_lock_irqsave(&lock, flags); - - if ( rx_fns[port] != NULL ) - { - printk(KERN_ALERT "Event channel port %d already in use.\n", port); - rc = -EINVAL; - } - else - { - rx_fns[port] = rx_fn; - rc = 0; - } - - spin_unlock_irqrestore(&lock, flags); - - return rc; -} - -int evtchn_free_port(unsigned int port) -{ - unsigned long flags; - int rc; - - spin_lock_irqsave(&lock, flags); - - if ( rx_fns[port] == NULL ) - { - printk(KERN_ALERT "Event channel port %d not in use.\n", port); - rc = -EINVAL; - } - else - { - rx_fns[port] = NULL; - rc = 0; - } - - spin_unlock_irqrestore(&lock, flags); - - return rc; -} - -/* - * NB. Clearing port can race a notification from remote end. Caller must - * therefore recheck notification status on return to avoid missing events. - */ -void evtchn_clear_port(unsigned int port) -{ - unsigned int p = port & PORTIDX_MASK; - unsigned long flags; - - spin_lock_irqsave(&lock, flags); - - if ( unlikely(port & PORT_DISCONNECT) ) - { - clear_bit(p, &disc_outstanding[0]); - clear_bit(p, &HYPERVISOR_shared_info->event_channel_disc[0]); - } - else - { - clear_bit(p, &pend_outstanding[0]); - clear_bit(p, &HYPERVISOR_shared_info->event_channel_pend[0]); - } - - spin_unlock_irqrestore(&lock, flags); -} - -static inline void process_bitmask(u32 *sel, - u32 *mask, - u32 *outstanding, - unsigned int port_subtype) -{ - unsigned long l1, l2; - unsigned int l1_idx, l2_idx, port; - - l1 = xchg(sel, 0); - while ( (l1_idx = ffs(l1)) != 0 ) - { - l1_idx--; - l1 &= ~(1 << l1_idx); - - l2 = mask[l1_idx] & ~outstanding[l1_idx]; - outstanding[l1_idx] |= l2; - while ( (l2_idx = ffs(l2)) != 0 ) - { - l2_idx--; - l2 &= ~(1 << l2_idx); - - port = (l1_idx * 32) + l2_idx; - if ( rx_fns[port] != NULL ) - { - (*rx_fns[port])(port | port_subtype); - } - else if ( ring != NULL ) - { - if ( (ring_prod - ring_cons) < RING_SIZE ) - { - ring[RING_MASK(ring_prod)] = (u16)(port | port_subtype); - if ( ring_cons == ring_prod++ ) - { - wake_up_interruptible(&evtchn_wait); - kill_fasync(&evtchn_async_queue, SIGIO, POLL_IN); - } - } - else - { - ring_overflow = 1; - } - } - } - } -} - -static void evtchn_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - shared_info_t *si = HYPERVISOR_shared_info; - unsigned long flags; - - spin_lock_irqsave(&lock, flags); - - process_bitmask(&si->event_channel_pend_sel, - &si->event_channel_pend[0], - &pend_outstanding[0], - PORT_NORMAL); - - process_bitmask(&si->event_channel_disc_sel, - &si->event_channel_disc[0], - &disc_outstanding[0], - PORT_DISCONNECT); - - spin_unlock_irqrestore(&lock, flags); -} - -static void __evtchn_reset_buffer_ring(void) -{ - u32 m; - unsigned int i, j; - - /* Initialise the ring with currently outstanding notifications. */ - ring_cons = ring_prod = ring_overflow = 0; - - for ( i = 0; i < 32; i++ ) - { - m = pend_outstanding[i]; - while ( (j = ffs(m)) != 0 ) - { - m &= ~(1 << --j); - if ( rx_fns[(i * 32) + j] == NULL ) - ring[ring_prod++] = (u16)(((i * 32) + j) | PORT_NORMAL); - } - - m = disc_outstanding[i]; - while ( (j = ffs(m)) != 0 ) - { - m &= ~(1 << --j); - if ( rx_fns[(i * 32) + j] == NULL ) - ring[ring_prod++] = (u16)(((i * 32) + j) | PORT_DISCONNECT); - } - } -} - -static ssize_t evtchn_read(struct file *file, char *buf, - size_t count, loff_t *ppos) -{ - int rc; - unsigned int c, p, bytes1 = 0, bytes2 = 0; - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue(&evtchn_wait, &wait); - - count &= ~1; /* even number of bytes */ - - if ( count == 0 ) - { - rc = 0; - goto out; - } - - if ( count > PAGE_SIZE ) - count = PAGE_SIZE; - - for ( ; ; ) - { - set_current_state(TASK_INTERRUPTIBLE); - - if ( (c = ring_cons) != (p = ring_prod) ) - break; - - if ( ring_overflow ) - { - rc = -EFBIG; - goto out; - } - - if ( file->f_flags & O_NONBLOCK ) - { - rc = -EAGAIN; - goto out; - } - - if ( signal_pending(current) ) - { - rc = -ERESTARTSYS; - goto out; - } - - schedule(); - } - - /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ - if ( ((c ^ p) & RING_SIZE) != 0 ) - { - bytes1 = (RING_SIZE - RING_MASK(c)) * sizeof(u16); - bytes2 = RING_MASK(p) * sizeof(u16); - } - else - { - bytes1 = (p - c) * sizeof(u16); - bytes2 = 0; - } - - /* Truncate chunks according to caller's maximum byte count. */ - if ( bytes1 > count ) - { - bytes1 = count; - bytes2 = 0; - } - else if ( (bytes1 + bytes2) > count ) - { - bytes2 = count - bytes1; - } - - if ( copy_to_user(buf, &ring[RING_MASK(c)], bytes1) || - ((bytes2 != 0) && copy_to_user(&buf[bytes1], &ring[0], bytes2)) ) - { - rc = -EFAULT; - goto out; - } - - ring_cons += (bytes1 + bytes2) / sizeof(u16); - - rc = bytes1 + bytes2; - - out: - __set_current_state(TASK_RUNNING); - remove_wait_queue(&evtchn_wait, &wait); - return rc; -} - -static ssize_t evtchn_write(struct file *file, const char *buf, - size_t count, loff_t *ppos) -{ - int rc, i; - u16 *kbuf = (u16 *)get_free_page(GFP_KERNEL); - - if ( kbuf == NULL ) - return -ENOMEM; - - count &= ~1; /* even number of bytes */ - - if ( count == 0 ) - { - rc = 0; - goto out; - } - - if ( count > PAGE_SIZE ) - count = PAGE_SIZE; - - if ( copy_from_user(kbuf, buf, count) != 0 ) - { - rc = -EFAULT; - goto out; - } - - for ( i = 0; i < (count/2); i++ ) - evtchn_clear_port(kbuf[i]); - - rc = count; - - out: - free_page((unsigned long)kbuf); - return rc; -} - -static int evtchn_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) -{ - if ( cmd != EVTCHN_RESET ) - return -EINVAL; - - spin_lock_irq(&lock); - __evtchn_reset_buffer_ring(); - spin_unlock_irq(&lock); - - return 0; -} - -static unsigned int evtchn_poll(struct file *file, poll_table *wait) -{ - unsigned int mask = POLLOUT | POLLWRNORM; - poll_wait(file, &evtchn_wait, wait); - if ( ring_cons != ring_prod ) - mask |= POLLIN | POLLRDNORM; - if ( ring_overflow ) - mask = POLLERR; - return mask; -} - -static int evtchn_fasync(int fd, struct file *filp, int on) -{ - return fasync_helper(fd, filp, on, &evtchn_async_queue); -} - -static int evtchn_open(struct inode *inode, struct file *filp) -{ - u16 *_ring; - - if ( test_and_set_bit(0, &evtchn_dev_inuse) ) - return -EBUSY; - - /* Allocate outside locked region so that we can use GFP_KERNEL. */ - if ( (_ring = (u16 *)get_free_page(GFP_KERNEL)) == NULL ) - return -ENOMEM; - - spin_lock_irq(&lock); - ring = _ring; - __evtchn_reset_buffer_ring(); - spin_unlock_irq(&lock); - - MOD_INC_USE_COUNT; - - return 0; -} - -static int evtchn_release(struct inode *inode, struct file *filp) -{ - spin_lock_irq(&lock); - if ( ring != NULL ) - { - free_page((unsigned long)ring); - ring = NULL; - } - spin_unlock_irq(&lock); - - evtchn_dev_inuse = 0; - - MOD_DEC_USE_COUNT; - - return 0; -} - -static struct file_operations evtchn_fops = { - owner: THIS_MODULE, - read: evtchn_read, - write: evtchn_write, - ioctl: evtchn_ioctl, - poll: evtchn_poll, - fasync: evtchn_fasync, - open: evtchn_open, - release: evtchn_release -}; - -static struct miscdevice evtchn_miscdev = { - minor: EVTCHN_MINOR, - name: "evtchn", - fops: &evtchn_fops -}; - -static int __init init_module(void) -{ - devfs_handle_t symlink_handle; - int err, pos; - char link_dest[64]; - - /* (DEVFS) create '/dev/misc/evtchn'. */ - err = misc_register(&evtchn_miscdev); - if ( err != 0 ) - { - printk(KERN_ALERT "Could not register /dev/misc/evtchn\n"); - return err; - } - - /* (DEVFS) create directory '/dev/xen'. */ - xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL); - - /* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */ - pos = devfs_generate_path(evtchn_miscdev.devfs_handle, - &link_dest[3], - sizeof(link_dest) - 3); - if ( pos >= 0 ) - strncpy(&link_dest[pos], "../", 3); - - /* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */ - (void)devfs_mk_symlink(xen_dev_dir, - "evtchn", - DEVFS_FL_DEFAULT, - &link_dest[pos], - &symlink_handle, - NULL); - - /* (DEVFS) automatically destroy the symlink with its destination. */ - devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle); - - err = request_irq(HYPEREVENT_IRQ(_EVENT_EVTCHN), - evtchn_interrupt, 0, "evtchn", NULL); - if ( err != 0 ) - { - printk(KERN_ALERT "Could not allocate evtchn receive interrupt\n"); - return err; - } - - /* Kickstart servicing of notifications. */ - evtchn_interrupt(0, NULL, NULL); - - printk("Event-channel driver installed.\n"); - - return 0; -} - -static void cleanup_module(void) -{ - free_irq(HYPEREVENT_IRQ(_EVENT_EVTCHN), NULL); - misc_deregister(&evtchn_miscdev); -} - -module_init(init_module); -module_exit(cleanup_module); diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/Makefile b/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/Makefile deleted file mode 100644 index 2e4c1f4825..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := network.o -include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/network.c b/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/network.c deleted file mode 100644 index c5d25442e2..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/network.c +++ /dev/null @@ -1,631 +0,0 @@ -/****************************************************************************** - * network.c - * - * Virtual network driver for XenoLinux. - * - * Copyright (c) 2002-2003, K A Fraser - */ - -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - -#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */ - -static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs); -static void network_tx_buf_gc(struct net_device *dev); -static void network_alloc_rx_buffers(struct net_device *dev); -static void cleanup_module(void); - -static struct list_head dev_list; - -struct net_private -{ - struct list_head list; - struct net_device *dev; - - struct net_device_stats stats; - NET_RING_IDX rx_resp_cons, tx_resp_cons; - unsigned int net_ring_fixmap_idx, tx_full; - net_ring_t *net_ring; - net_idx_t *net_idx; - spinlock_t tx_lock; - unsigned int idx; /* Domain-specific index of this VIF. */ - - unsigned int rx_bufs_to_notify; - -#define STATE_ACTIVE 0 -#define STATE_SUSPENDED 1 -#define STATE_CLOSED 2 - unsigned int state; - - /* - * {tx,rx}_skbs store outstanding skbuffs. The first entry in each - * array is an index into a chain of free entries. - */ - struct sk_buff *tx_skbs[XENNET_TX_RING_SIZE+1]; - struct sk_buff *rx_skbs[XENNET_RX_RING_SIZE+1]; -}; - -/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */ -#define ADD_ID_TO_FREELIST(_list, _id) \ - (_list)[(_id)] = (_list)[0]; \ - (_list)[0] = (void *)(unsigned long)(_id); -#define GET_ID_FROM_FREELIST(_list) \ - ({ unsigned long _id = (unsigned long)(_list)[0]; \ - (_list)[0] = (_list)[_id]; \ - (unsigned short)_id; }) - - -static void _dbg_network_int(struct net_device *dev) -{ - struct net_private *np = dev->priv; - - if ( np->state == STATE_CLOSED ) - return; - - printk(KERN_ALERT "net: tx_full=%d, tx_resp_cons=0x%08x," - " tx_req_prod=0x%08x\nnet: tx_resp_prod=0x%08x," - " tx_event=0x%08x, state=%d\n", - np->tx_full, np->tx_resp_cons, - np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, - np->net_idx->tx_event, - test_bit(__LINK_STATE_XOFF, &dev->state)); - printk(KERN_ALERT "net: rx_resp_cons=0x%08x," - " rx_req_prod=0x%08x\nnet: rx_resp_prod=0x%08x, rx_event=0x%08x\n", - np->rx_resp_cons, np->net_idx->rx_req_prod, - np->net_idx->rx_resp_prod, np->net_idx->rx_event); -} - - -static void dbg_network_int(int irq, void *unused, struct pt_regs *ptregs) -{ - struct list_head *ent; - struct net_private *np; - list_for_each ( ent, &dev_list ) - { - np = list_entry(ent, struct net_private, list); - _dbg_network_int(np->dev); - } -} - - -static int network_open(struct net_device *dev) -{ - struct net_private *np = dev->priv; - netop_t netop; - int i, ret; - - netop.cmd = NETOP_RESET_RINGS; - netop.vif = np->idx; - if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 ) - { - printk(KERN_ALERT "Possible net trouble: couldn't reset ring idxs\n"); - return ret; - } - - netop.cmd = NETOP_GET_VIF_INFO; - netop.vif = np->idx; - if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 ) - { - printk(KERN_ALERT "Couldn't get info for vif %d\n", np->idx); - return ret; - } - - memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN); - - set_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx, - netop.u.get_vif_info.ring_mfn << PAGE_SHIFT); - np->net_ring = (net_ring_t *)fix_to_virt( - FIX_NETRING0_BASE + np->net_ring_fixmap_idx); - np->net_idx = &HYPERVISOR_shared_info->net_idx[np->idx]; - - np->rx_bufs_to_notify = 0; - np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0; - memset(&np->stats, 0, sizeof(np->stats)); - spin_lock_init(&np->tx_lock); - memset(np->net_ring, 0, sizeof(*np->net_ring)); - memset(np->net_idx, 0, sizeof(*np->net_idx)); - - /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ - for ( i = 0; i <= XENNET_TX_RING_SIZE; i++ ) - np->tx_skbs[i] = (void *)(i+1); - for ( i = 0; i <= XENNET_RX_RING_SIZE; i++ ) - np->rx_skbs[i] = (void *)(i+1); - - wmb(); - np->state = STATE_ACTIVE; - - network_alloc_rx_buffers(dev); - - netif_start_queue(dev); - - MOD_INC_USE_COUNT; - - return 0; -} - - -static void network_tx_buf_gc(struct net_device *dev) -{ - NET_RING_IDX i, prod; - unsigned short id; - struct net_private *np = dev->priv; - struct sk_buff *skb; - tx_entry_t *tx_ring = np->net_ring->tx_ring; - - do { - prod = np->net_idx->tx_resp_prod; - - for ( i = np->tx_resp_cons; i != prod; i++ ) - { - id = tx_ring[MASK_NET_TX_IDX(i)].resp.id; - skb = np->tx_skbs[id]; - ADD_ID_TO_FREELIST(np->tx_skbs, id); - dev_kfree_skb_any(skb); - } - - np->tx_resp_cons = prod; - - /* - * Set a new event, then check for race with update of tx_cons. Note - * that it is essential to schedule a callback, no matter how few - * buffers are pending. Even if there is space in the transmit ring, - * higher layers may be blocked because too much data is outstanding: - * in such cases notification from Xen is likely to be the only kick - * that we'll get. - */ - np->net_idx->tx_event = - prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1; - mb(); - } - while ( prod != np->net_idx->tx_resp_prod ); - - if ( np->tx_full && - ((np->net_idx->tx_req_prod - prod) < XENNET_TX_RING_SIZE) ) - { - np->tx_full = 0; - if ( np->state == STATE_ACTIVE ) - netif_wake_queue(dev); - } -} - - -static inline pte_t *get_ppte(void *addr) -{ - pgd_t *pgd; pmd_t *pmd; pte_t *pte; - pgd = pgd_offset_k( (unsigned long)addr); - pmd = pmd_offset(pgd, (unsigned long)addr); - pte = pte_offset(pmd, (unsigned long)addr); - return pte; -} - - -static void network_alloc_rx_buffers(struct net_device *dev) -{ - unsigned short id; - struct net_private *np = dev->priv; - struct sk_buff *skb; - netop_t netop; - NET_RING_IDX i = np->net_idx->rx_req_prod; - - if ( unlikely((i - np->rx_resp_cons) == XENNET_RX_RING_SIZE) || - unlikely(np->state != STATE_ACTIVE) ) - return; - - do { - skb = dev_alloc_skb(RX_BUF_SIZE); - if ( unlikely(skb == NULL) ) - break; - - skb->dev = dev; - - if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) ) - panic("alloc_skb needs to provide us page-aligned buffers."); - - id = GET_ID_FROM_FREELIST(np->rx_skbs); - np->rx_skbs[id] = skb; - - np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id = id; - np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = - virt_to_machine(get_ppte(skb->head)); - - np->rx_bufs_to_notify++; - } - while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE ); - - /* - * We may have allocated buffers which have entries outstanding in the page - * update queue -- make sure we flush those first! - */ - flush_page_update_queue(); - - np->net_idx->rx_req_prod = i; - np->net_idx->rx_event = np->rx_resp_cons + 1; - - /* Batch Xen notifications. */ - if ( np->rx_bufs_to_notify > (XENNET_RX_RING_SIZE/4) ) - { - netop.cmd = NETOP_PUSH_BUFFERS; - netop.vif = np->idx; - (void)HYPERVISOR_net_io_op(&netop); - np->rx_bufs_to_notify = 0; - } -} - - -static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ - unsigned short id; - struct net_private *np = (struct net_private *)dev->priv; - tx_req_entry_t *tx; - netop_t netop; - NET_RING_IDX i; - - if ( unlikely(np->tx_full) ) - { - printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name); - netif_stop_queue(dev); - return -ENOBUFS; - } - - if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= - PAGE_SIZE) ) - { - struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE); - if ( unlikely(new_skb == NULL) ) - return 1; - skb_put(new_skb, skb->len); - memcpy(new_skb->data, skb->data, skb->len); - dev_kfree_skb(skb); - skb = new_skb; - } - - spin_lock_irq(&np->tx_lock); - - i = np->net_idx->tx_req_prod; - - id = GET_ID_FROM_FREELIST(np->tx_skbs); - np->tx_skbs[id] = skb; - - tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req; - - tx->id = id; - tx->addr = phys_to_machine(virt_to_phys(skb->data)); - tx->size = skb->len; - - wmb(); - np->net_idx->tx_req_prod = i + 1; - - network_tx_buf_gc(dev); - - if ( (i - np->tx_resp_cons) == (XENNET_TX_RING_SIZE - 1) ) - { - np->tx_full = 1; - netif_stop_queue(dev); - } - - spin_unlock_irq(&np->tx_lock); - - np->stats.tx_bytes += skb->len; - np->stats.tx_packets++; - - /* Only notify Xen if there are no outstanding responses. */ - mb(); - if ( np->net_idx->tx_resp_prod == i ) - { - netop.cmd = NETOP_PUSH_BUFFERS; - netop.vif = np->idx; - (void)HYPERVISOR_net_io_op(&netop); - } - - return 0; -} - - -static inline void _network_interrupt(struct net_device *dev) -{ - struct net_private *np = dev->priv; - unsigned long flags; - struct sk_buff *skb; - rx_resp_entry_t *rx; - NET_RING_IDX i; - - if ( unlikely(np->state == STATE_CLOSED) ) - return; - - spin_lock_irqsave(&np->tx_lock, flags); - network_tx_buf_gc(dev); - spin_unlock_irqrestore(&np->tx_lock, flags); - - again: - for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ ) - { - rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp; - - skb = np->rx_skbs[rx->id]; - ADD_ID_TO_FREELIST(np->rx_skbs, rx->id); - - if ( unlikely(rx->status != RING_STATUS_OK) ) - { - /* Gate this error. We get a (valid) slew of them on suspend. */ - if ( np->state == STATE_ACTIVE ) - printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status); - dev_kfree_skb_any(skb); - continue; - } - - /* - * Set up shinfo -- from alloc_skb This was particularily nasty: the - * shared info is hidden at the back of the data area (presumably so it - * can be shared), but on page flip it gets very spunked. - */ - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; - - phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] = - (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT; - - skb->data = skb->tail = skb->head + rx->offset; - skb_put(skb, rx->size); - skb->protocol = eth_type_trans(skb, dev); - - np->stats.rx_packets++; - - np->stats.rx_bytes += rx->size; - netif_rx(skb); - dev->last_rx = jiffies; - } - - np->rx_resp_cons = i; - - network_alloc_rx_buffers(dev); - - /* Deal with hypervisor racing our resetting of rx_event. */ - mb(); - if ( np->net_idx->rx_resp_prod != i ) - goto again; -} - - -static void network_interrupt(int irq, void *unused, struct pt_regs *ptregs) -{ - struct list_head *ent; - struct net_private *np; - list_for_each ( ent, &dev_list ) - { - np = list_entry(ent, struct net_private, list); - _network_interrupt(np->dev); - } -} - - -static int network_close(struct net_device *dev) -{ - struct net_private *np = dev->priv; - netop_t netop; - - np->state = STATE_SUSPENDED; - wmb(); - - netif_stop_queue(np->dev); - - netop.cmd = NETOP_FLUSH_BUFFERS; - netop.vif = np->idx; - (void)HYPERVISOR_net_io_op(&netop); - - while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) || - (np->tx_resp_cons != np->net_idx->tx_req_prod) ) - { - barrier(); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1); - } - - wmb(); - np->state = STATE_CLOSED; - wmb(); - - /* Now no longer safe to take interrupts for this device. */ - clear_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx); - - MOD_DEC_USE_COUNT; - - return 0; -} - - -static struct net_device_stats *network_get_stats(struct net_device *dev) -{ - struct net_private *np = (struct net_private *)dev->priv; - return &np->stats; -} - - -/* - * This notifier is installed for domain 0 only. - * All other domains have VFR rules installed on their behalf by domain 0 - * when they are created. For bootstrap, Xen creates wildcard rules for - * domain 0 -- this notifier is used to detect when we find our proper - * IP address, so we can poke down proper rules and remove the wildcards. - */ -static int inetdev_notify(struct notifier_block *this, - unsigned long event, - void *ptr) -{ - struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; - struct net_device *dev = ifa->ifa_dev->dev; - struct list_head *ent; - struct net_private *np; - int idx = -1; - network_op_t op; - - list_for_each ( ent, &dev_list ) - { - np = list_entry(dev_list.next, struct net_private, list); - if ( np->dev == dev ) - idx = np->idx; - } - - if ( idx == -1 ) - goto out; - - memset(&op, 0, sizeof(op)); - op.u.net_rule.proto = NETWORK_PROTO_ANY; - op.u.net_rule.action = NETWORK_ACTION_ACCEPT; - - if ( event == NETDEV_UP ) - op.cmd = NETWORK_OP_ADDRULE; - else if ( event == NETDEV_DOWN ) - op.cmd = NETWORK_OP_DELETERULE; - else - goto out; - - op.u.net_rule.src_dom = 0; - op.u.net_rule.src_idx = idx; - op.u.net_rule.dst_dom = VIF_SPECIAL; - op.u.net_rule.dst_idx = VIF_PHYSICAL_INTERFACE; - op.u.net_rule.src_addr = ntohl(ifa->ifa_address); - op.u.net_rule.src_addr_mask = ~0UL; - op.u.net_rule.dst_addr = 0; - op.u.net_rule.dst_addr_mask = 0; - (void)HYPERVISOR_network_op(&op); - - op.u.net_rule.src_dom = VIF_SPECIAL; - op.u.net_rule.src_idx = VIF_ANY_INTERFACE; - op.u.net_rule.dst_dom = 0; - op.u.net_rule.dst_idx = idx; - op.u.net_rule.src_addr = 0; - op.u.net_rule.src_addr_mask = 0; - op.u.net_rule.dst_addr = ntohl(ifa->ifa_address); - op.u.net_rule.dst_addr_mask = ~0UL; - (void)HYPERVISOR_network_op(&op); - - out: - return NOTIFY_DONE; -} - -static struct notifier_block notifier_inetdev = { - .notifier_call = inetdev_notify, - .next = NULL, - .priority = 0 -}; - - -static int __init init_module(void) -{ - int i, fixmap_idx=-1, err; - struct net_device *dev; - struct net_private *np; - netop_t netop; - - INIT_LIST_HEAD(&dev_list); - - /* - * Domain 0 must poke its own network rules as it discovers its IP - * addresses. All other domains have a privileged "parent" to do this for - * them at start of day. - */ - if ( start_info.flags & SIF_INITDOMAIN ) - (void)register_inetaddr_notifier(¬ifier_inetdev); - - err = request_irq(HYPEREVENT_IRQ(_EVENT_NET), network_interrupt, - SA_SAMPLE_RANDOM, "network", NULL); - if ( err ) - { - printk(KERN_WARNING "Could not allocate network interrupt\n"); - goto fail; - } - - err = request_irq(HYPEREVENT_IRQ(_EVENT_DEBUG), dbg_network_int, - SA_SHIRQ, "net_dbg", &dbg_network_int); - if ( err ) - printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n"); - - for ( i = 0; i < MAX_DOMAIN_VIFS; i++ ) - { - /* If the VIF is invalid then the query hypercall will fail. */ - netop.cmd = NETOP_GET_VIF_INFO; - netop.vif = i; - if ( HYPERVISOR_net_io_op(&netop) != 0 ) - continue; - - /* We actually only support up to 4 vifs right now. */ - if ( ++fixmap_idx == 4 ) - break; - - dev = alloc_etherdev(sizeof(struct net_private)); - if ( dev == NULL ) - { - err = -ENOMEM; - goto fail; - } - - np = dev->priv; - np->state = STATE_CLOSED; - np->net_ring_fixmap_idx = fixmap_idx; - np->idx = i; - - SET_MODULE_OWNER(dev); - dev->open = network_open; - dev->hard_start_xmit = network_start_xmit; - dev->stop = network_close; - dev->get_stats = network_get_stats; - - memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN); - - if ( (err = register_netdev(dev)) != 0 ) - { - kfree(dev); - goto fail; - } - - np->dev = dev; - list_add(&np->list, &dev_list); - } - - return 0; - - fail: - cleanup_module(); - return err; -} - - -static void cleanup_module(void) -{ - struct net_private *np; - struct net_device *dev; - - while ( !list_empty(&dev_list) ) - { - np = list_entry(dev_list.next, struct net_private, list); - list_del(&np->list); - dev = np->dev; - unregister_netdev(dev); - kfree(dev); - } - - if ( start_info.flags & SIF_INITDOMAIN ) - (void)unregister_inetaddr_notifier(¬ifier_inetdev); -} - - -module_init(init_module); -module_exit(cleanup_module); diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/vnetif/Makefile b/xenolinux-2.4.25-sparse/arch/xeno/drivers/vnetif/Makefile deleted file mode 100644 index 304c2e78ef..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/vnetif/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := vnetif.o -include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/vnetif/vnetif.c b/xenolinux-2.4.25-sparse/arch/xeno/drivers/vnetif/vnetif.c deleted file mode 100644 index 91f3c5c17e..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/vnetif/vnetif.c +++ /dev/null @@ -1,553 +0,0 @@ -/****************************************************************************** - * vnetif.c - * - * Virtual network driver for XenoLinux. - * - * Copyright (c) 2002-2004, K A Fraser - */ - -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - -#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */ - -static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs); -static void network_tx_buf_gc(struct net_device *dev); -static void network_alloc_rx_buffers(struct net_device *dev); -static void cleanup_module(void); - -static struct list_head dev_list; - -struct net_private -{ - struct list_head list; - struct net_device *dev; - - struct net_device_stats stats; - NET_RING_IDX rx_resp_cons, tx_resp_cons; - unsigned int net_ring_fixmap_idx, tx_full; - net_ring_t *net_ring; - net_idx_t *net_idx; - spinlock_t tx_lock; - unsigned int idx; /* Domain-specific index of this VIF. */ - - unsigned int rx_bufs_to_notify; - -#define STATE_ACTIVE 0 -#define STATE_SUSPENDED 1 -#define STATE_CLOSED 2 - unsigned int state; - - /* - * {tx,rx}_skbs store outstanding skbuffs. The first entry in each - * array is an index into a chain of free entries. - */ - struct sk_buff *tx_skbs[XENNET_TX_RING_SIZE+1]; - struct sk_buff *rx_skbs[XENNET_RX_RING_SIZE+1]; -}; - -/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */ -#define ADD_ID_TO_FREELIST(_list, _id) \ - (_list)[(_id)] = (_list)[0]; \ - (_list)[0] = (void *)(unsigned long)(_id); -#define GET_ID_FROM_FREELIST(_list) \ - ({ unsigned long _id = (unsigned long)(_list)[0]; \ - (_list)[0] = (_list)[_id]; \ - (unsigned short)_id; }) - - -static void _dbg_network_int(struct net_device *dev) -{ - struct net_private *np = dev->priv; - - if ( np->state == STATE_CLOSED ) - return; - - printk(KERN_ALERT "net: tx_full=%d, tx_resp_cons=0x%08x," - " tx_req_prod=0x%08x\nnet: tx_resp_prod=0x%08x," - " tx_event=0x%08x, state=%d\n", - np->tx_full, np->tx_resp_cons, - np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, - np->net_idx->tx_event, - test_bit(__LINK_STATE_XOFF, &dev->state)); - printk(KERN_ALERT "net: rx_resp_cons=0x%08x," - " rx_req_prod=0x%08x\nnet: rx_resp_prod=0x%08x, rx_event=0x%08x\n", - np->rx_resp_cons, np->net_idx->rx_req_prod, - np->net_idx->rx_resp_prod, np->net_idx->rx_event); -} - - -static void dbg_network_int(int irq, void *unused, struct pt_regs *ptregs) -{ - struct list_head *ent; - struct net_private *np; - list_for_each ( ent, &dev_list ) - { - np = list_entry(ent, struct net_private, list); - _dbg_network_int(np->dev); - } -} - - -static int network_open(struct net_device *dev) -{ - struct net_private *np = dev->priv; - netop_t netop; - int i, ret; - - netop.cmd = NETOP_RESET_RINGS; - netop.vif = np->idx; - if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 ) - { - printk(KERN_ALERT "Possible net trouble: couldn't reset ring idxs\n"); - return ret; - } - - netop.cmd = NETOP_GET_VIF_INFO; - netop.vif = np->idx; - if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 ) - { - printk(KERN_ALERT "Couldn't get info for vif %d\n", np->idx); - return ret; - } - - memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN); - - set_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx, - netop.u.get_vif_info.ring_mfn << PAGE_SHIFT); - np->net_ring = (net_ring_t *)fix_to_virt( - FIX_NETRING0_BASE + np->net_ring_fixmap_idx); - np->net_idx = &HYPERVISOR_shared_info->net_idx[np->idx]; - - np->rx_bufs_to_notify = 0; - np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0; - memset(&np->stats, 0, sizeof(np->stats)); - spin_lock_init(&np->tx_lock); - memset(np->net_ring, 0, sizeof(*np->net_ring)); - memset(np->net_idx, 0, sizeof(*np->net_idx)); - - /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ - for ( i = 0; i <= XENNET_TX_RING_SIZE; i++ ) - np->tx_skbs[i] = (void *)(i+1); - for ( i = 0; i <= XENNET_RX_RING_SIZE; i++ ) - np->rx_skbs[i] = (void *)(i+1); - - wmb(); - np->state = STATE_ACTIVE; - - network_alloc_rx_buffers(dev); - - netif_start_queue(dev); - - MOD_INC_USE_COUNT; - - return 0; -} - - -static void network_tx_buf_gc(struct net_device *dev) -{ - NET_RING_IDX i, prod; - unsigned short id; - struct net_private *np = dev->priv; - struct sk_buff *skb; - tx_entry_t *tx_ring = np->net_ring->tx_ring; - - do { - prod = np->net_idx->tx_resp_prod; - - for ( i = np->tx_resp_cons; i != prod; i++ ) - { - id = tx_ring[MASK_NET_TX_IDX(i)].resp.id; - skb = np->tx_skbs[id]; - ADD_ID_TO_FREELIST(np->tx_skbs, id); - dev_kfree_skb_any(skb); - } - - np->tx_resp_cons = prod; - - /* - * Set a new event, then check for race with update of tx_cons. Note - * that it is essential to schedule a callback, no matter how few - * buffers are pending. Even if there is space in the transmit ring, - * higher layers may be blocked because too much data is outstanding: - * in such cases notification from Xen is likely to be the only kick - * that we'll get. - */ - np->net_idx->tx_event = - prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1; - mb(); - } - while ( prod != np->net_idx->tx_resp_prod ); - - if ( np->tx_full && - ((np->net_idx->tx_req_prod - prod) < XENNET_TX_RING_SIZE) ) - { - np->tx_full = 0; - if ( np->state == STATE_ACTIVE ) - netif_wake_queue(dev); - } -} - - -static inline pte_t *get_ppte(void *addr) -{ - pgd_t *pgd; pmd_t *pmd; pte_t *pte; - pgd = pgd_offset_k( (unsigned long)addr); - pmd = pmd_offset(pgd, (unsigned long)addr); - pte = pte_offset(pmd, (unsigned long)addr); - return pte; -} - - -static void network_alloc_rx_buffers(struct net_device *dev) -{ - unsigned short id; - struct net_private *np = dev->priv; - struct sk_buff *skb; - netop_t netop; - NET_RING_IDX i = np->net_idx->rx_req_prod; - - if ( unlikely((i - np->rx_resp_cons) == XENNET_RX_RING_SIZE) || - unlikely(np->state != STATE_ACTIVE) ) - return; - - do { - skb = dev_alloc_skb(RX_BUF_SIZE); - if ( unlikely(skb == NULL) ) - break; - - skb->dev = dev; - - if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) ) - panic("alloc_skb needs to provide us page-aligned buffers."); - - id = GET_ID_FROM_FREELIST(np->rx_skbs); - np->rx_skbs[id] = skb; - - np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id = id; - np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = - virt_to_machine(get_ppte(skb->head)); - - np->rx_bufs_to_notify++; - } - while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE ); - - /* - * We may have allocated buffers which have entries outstanding in the page - * update queue -- make sure we flush those first! - */ - flush_page_update_queue(); - - np->net_idx->rx_req_prod = i; - np->net_idx->rx_event = np->rx_resp_cons + 1; - - /* Batch Xen notifications. */ - if ( np->rx_bufs_to_notify > (XENNET_RX_RING_SIZE/4) ) - { - netop.cmd = NETOP_PUSH_BUFFERS; - netop.vif = np->idx; - (void)HYPERVISOR_net_io_op(&netop); - np->rx_bufs_to_notify = 0; - } -} - - -static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ - unsigned short id; - struct net_private *np = (struct net_private *)dev->priv; - tx_req_entry_t *tx; - netop_t netop; - NET_RING_IDX i; - - if ( unlikely(np->tx_full) ) - { - printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name); - netif_stop_queue(dev); - return -ENOBUFS; - } - - if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= - PAGE_SIZE) ) - { - struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE); - if ( unlikely(new_skb == NULL) ) - return 1; - skb_put(new_skb, skb->len); - memcpy(new_skb->data, skb->data, skb->len); - dev_kfree_skb(skb); - skb = new_skb; - } - - spin_lock_irq(&np->tx_lock); - - i = np->net_idx->tx_req_prod; - - id = GET_ID_FROM_FREELIST(np->tx_skbs); - np->tx_skbs[id] = skb; - - tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req; - - tx->id = id; - tx->addr = phys_to_machine(virt_to_phys(skb->data)); - tx->size = skb->len; - - wmb(); - np->net_idx->tx_req_prod = i + 1; - - network_tx_buf_gc(dev); - - if ( (i - np->tx_resp_cons) == (XENNET_TX_RING_SIZE - 1) ) - { - np->tx_full = 1; - netif_stop_queue(dev); - } - - spin_unlock_irq(&np->tx_lock); - - np->stats.tx_bytes += skb->len; - np->stats.tx_packets++; - - /* Only notify Xen if there are no outstanding responses. */ - mb(); - if ( np->net_idx->tx_resp_prod == i ) - { - netop.cmd = NETOP_PUSH_BUFFERS; - netop.vif = np->idx; - (void)HYPERVISOR_net_io_op(&netop); - } - - return 0; -} - - -static inline void _network_interrupt(struct net_device *dev) -{ - struct net_private *np = dev->priv; - unsigned long flags; - struct sk_buff *skb; - rx_resp_entry_t *rx; - NET_RING_IDX i; - - if ( unlikely(np->state == STATE_CLOSED) ) - return; - - spin_lock_irqsave(&np->tx_lock, flags); - network_tx_buf_gc(dev); - spin_unlock_irqrestore(&np->tx_lock, flags); - - again: - for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ ) - { - rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp; - - skb = np->rx_skbs[rx->id]; - ADD_ID_TO_FREELIST(np->rx_skbs, rx->id); - - if ( unlikely(rx->status != RING_STATUS_OK) ) - { - /* Gate this error. We get a (valid) slew of them on suspend. */ - if ( np->state == STATE_ACTIVE ) - printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status); - dev_kfree_skb_any(skb); - continue; - } - - /* - * Set up shinfo -- from alloc_skb This was particularily nasty: the - * shared info is hidden at the back of the data area (presumably so it - * can be shared), but on page flip it gets very spunked. - */ - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; - - phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] = - (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT; - - skb->data = skb->tail = skb->head + rx->offset; - skb_put(skb, rx->size); - skb->protocol = eth_type_trans(skb, dev); - - np->stats.rx_packets++; - - np->stats.rx_bytes += rx->size; - netif_rx(skb); - dev->last_rx = jiffies; - } - - np->rx_resp_cons = i; - - network_alloc_rx_buffers(dev); - - /* Deal with hypervisor racing our resetting of rx_event. */ - mb(); - if ( np->net_idx->rx_resp_prod != i ) - goto again; -} - - -static void network_interrupt(int irq, void *unused, struct pt_regs *ptregs) -{ - struct list_head *ent; - struct net_private *np; - list_for_each ( ent, &dev_list ) - { - np = list_entry(ent, struct net_private, list); - _network_interrupt(np->dev); - } -} - - -static int network_close(struct net_device *dev) -{ - struct net_private *np = dev->priv; - netop_t netop; - - np->state = STATE_SUSPENDED; - wmb(); - - netif_stop_queue(np->dev); - - netop.cmd = NETOP_FLUSH_BUFFERS; - netop.vif = np->idx; - (void)HYPERVISOR_net_io_op(&netop); - - while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) || - (np->tx_resp_cons != np->net_idx->tx_req_prod) ) - { - barrier(); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1); - } - - wmb(); - np->state = STATE_CLOSED; - wmb(); - - /* Now no longer safe to take interrupts for this device. */ - clear_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx); - - MOD_DEC_USE_COUNT; - - return 0; -} - - -static struct net_device_stats *network_get_stats(struct net_device *dev) -{ - struct net_private *np = (struct net_private *)dev->priv; - return &np->stats; -} - - -static int __init init_module(void) -{ -#if 0 - int i, fixmap_idx=-1, err; - struct net_device *dev; - struct net_private *np; - netop_t netop; - - INIT_LIST_HEAD(&dev_list); - - err = request_irq(HYPEREVENT_IRQ(_EVENT_NET), network_interrupt, - SA_SAMPLE_RANDOM, "network", NULL); - if ( err ) - { - printk(KERN_WARNING "Could not allocate network interrupt\n"); - goto fail; - } - - err = request_irq(HYPEREVENT_IRQ(_EVENT_DEBUG), dbg_network_int, - SA_SHIRQ, "net_dbg", &dbg_network_int); - if ( err ) - printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n"); - - for ( i = 0; i < MAX_DOMAIN_VIFS; i++ ) - { - /* If the VIF is invalid then the query hypercall will fail. */ - netop.cmd = NETOP_GET_VIF_INFO; - netop.vif = i; - if ( HYPERVISOR_net_io_op(&netop) != 0 ) - continue; - - /* We actually only support up to 4 vifs right now. */ - if ( ++fixmap_idx == 4 ) - break; - - dev = alloc_etherdev(sizeof(struct net_private)); - if ( dev == NULL ) - { - err = -ENOMEM; - goto fail; - } - - np = dev->priv; - np->state = STATE_CLOSED; - np->net_ring_fixmap_idx = fixmap_idx; - np->idx = i; - - SET_MODULE_OWNER(dev); - dev->open = network_open; - dev->hard_start_xmit = network_start_xmit; - dev->stop = network_close; - dev->get_stats = network_get_stats; - - memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN); - - if ( (err = register_netdev(dev)) != 0 ) - { - kfree(dev); - goto fail; - } - - np->dev = dev; - list_add(&np->list, &dev_list); - } - - return 0; - - fail: - cleanup_module(); - return err; -#endif - return 0; -} - - -static void cleanup_module(void) -{ - struct net_private *np; - struct net_device *dev; - - while ( !list_empty(&dev_list) ) - { - np = list_entry(dev_list.next, struct net_private, list); - list_del(&np->list); - dev = np->dev; - unregister_netdev(dev); - kfree(dev); - } -} - - -module_init(init_module); -module_exit(cleanup_module); diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/Makefile b/xenolinux-2.4.25-sparse/arch/xeno/kernel/Makefile deleted file mode 100644 index 10fc43e742..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/Makefile +++ /dev/null @@ -1,19 +0,0 @@ - -.S.o: - $(CC) $(AFLAGS) -traditional -c $< -o $*.o - -all: kernel.o head.o init_task.o - -O_TARGET := kernel.o - -export-objs := i386_ksyms.o - -obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ - ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o \ - i386_ksyms.o i387.o hypervisor.o physirq.o pci-dma.o - -ifdef CONFIG_PCI -obj-y += pci-i386.o pci-pc.o pci-irq.o -endif - -include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/entry.S b/xenolinux-2.4.25-sparse/arch/xeno/kernel/entry.S deleted file mode 100644 index 9b1a77d4c1..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/entry.S +++ /dev/null @@ -1,781 +0,0 @@ -/* - * linux/arch/i386/entry.S - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -/* - * entry.S contains the system-call and fault low-level handling routines. - * This also contains the timer-interrupt handler, as well as all interrupts - * and faults that can result in a task-switch. - * - * NOTE: This code handles signal-recognition, which happens every time - * after a timer-interrupt and after each system call. - * - * I changed all the .align's to 4 (16 byte alignment), as that's faster - * on a 486. - * - * Stack layout in 'ret_from_system_call': - * ptrace needs to have all regs on the stack. - * if the order here is changed, it needs to be - * updated in fork.c:copy_process, signal.c:do_signal, - * ptrace.c and ptrace.h - * - * 0(%esp) - %ebx - * 4(%esp) - %ecx - * 8(%esp) - %edx - * C(%esp) - %esi - * 10(%esp) - %edi - * 14(%esp) - %ebp - * 18(%esp) - %eax - * 1C(%esp) - %ds - * 20(%esp) - %es - * 24(%esp) - orig_eax - * 28(%esp) - %eip - * 2C(%esp) - %cs - * 30(%esp) - %eflags - * 34(%esp) - %oldesp - * 38(%esp) - %oldss - * - * "current" is in register %ebx during any slow entries. - */ - -#include -#include -#include -#include -#include - -EBX = 0x00 -ECX = 0x04 -EDX = 0x08 -ESI = 0x0C -EDI = 0x10 -EBP = 0x14 -EAX = 0x18 -DS = 0x1C -ES = 0x20 -ORIG_EAX = 0x24 -EIP = 0x28 -CS = 0x2C -EFLAGS = 0x30 -OLDESP = 0x34 -OLDSS = 0x38 - -CF_MASK = 0x00000001 -TF_MASK = 0x00000100 -IF_MASK = 0x00000200 -DF_MASK = 0x00000400 -NT_MASK = 0x00004000 - -/* - * these are offsets into the task-struct. - */ -state = 0 -flags = 4 -sigpending = 8 -addr_limit = 12 -exec_domain = 16 -need_resched = 20 -tsk_ptrace = 24 -processor = 52 - -ENOSYS = 38 - - -#define SAVE_ALL \ - cld; \ - pushl %es; \ - pushl %ds; \ - pushl %eax; \ - pushl %ebp; \ - pushl %edi; \ - pushl %esi; \ - pushl %edx; \ - pushl %ecx; \ - pushl %ebx; \ - movl $(__KERNEL_DS),%edx; \ - movl %edx,%ds; \ - movl %edx,%es; - -#define RESTORE_ALL \ - popl %ebx; \ - popl %ecx; \ - popl %edx; \ - popl %esi; \ - popl %edi; \ - popl %ebp; \ - popl %eax; \ -1: popl %ds; \ -2: popl %es; \ - addl $4,%esp; \ -3: iret; \ -.section .fixup,"ax"; \ -4: movl $0,(%esp); \ - jmp 1b; \ -5: movl $0,(%esp); \ - jmp 2b; \ -6: pushl %ss; \ - popl %ds; \ - pushl %ss; \ - popl %es; \ - pushl $11; \ - call do_exit; \ -.previous; \ -.section __ex_table,"a";\ - .align 4; \ - .long 1b,4b; \ - .long 2b,5b; \ - .long 3b,6b; \ -.previous - -#define GET_CURRENT(reg) \ - movl $-8192, reg; \ - andl %esp, reg - -ENTRY(lcall7) - pushfl # We get a different stack layout with call - pushl %eax # gates, which has to be cleaned up later.. - SAVE_ALL - movl EIP(%esp),%eax # due to call gates, this is eflags, not eip.. - movl CS(%esp),%edx # this is eip.. - movl EFLAGS(%esp),%ecx # and this is cs.. - movl %eax,EFLAGS(%esp) # - andl $~(NT_MASK|TF_MASK|DF_MASK), %eax - pushl %eax - popfl - movl %edx,EIP(%esp) # Now we move them to their "normal" places - movl %ecx,CS(%esp) # - movl %esp,%ebx - pushl %ebx - andl $-8192,%ebx # GET_CURRENT - movl exec_domain(%ebx),%edx # Get the execution domain - movl 4(%edx),%edx # Get the lcall7 handler for the domain - pushl $0x7 - call *%edx - addl $4, %esp - popl %eax - jmp ret_from_sys_call - -ENTRY(lcall27) - pushfl # We get a different stack layout with call - pushl %eax # gates, which has to be cleaned up later.. - SAVE_ALL - movl EIP(%esp),%eax # due to call gates, this is eflags, not eip.. - movl CS(%esp),%edx # this is eip.. - movl EFLAGS(%esp),%ecx # and this is cs.. - movl %eax,EFLAGS(%esp) # - andl $~(NT_MASK|TF_MASK|DF_MASK), %eax - pushl %eax - popfl - movl %edx,EIP(%esp) # Now we move them to their "normal" places - movl %ecx,CS(%esp) # - movl %esp,%ebx - pushl %ebx - andl $-8192,%ebx # GET_CURRENT - movl exec_domain(%ebx),%edx # Get the execution domain - movl 4(%edx),%edx # Get the lcall7 handler for the domain - pushl $0x27 - call *%edx - addl $4, %esp - popl %eax - jmp ret_from_sys_call - -ENTRY(ret_from_fork) - pushl %ebx - call SYMBOL_NAME(schedule_tail) - addl $4, %esp - GET_CURRENT(%ebx) - testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS - jne tracesys_exit - jmp ret_from_sys_call - -/* - * Return to user mode is not as complex as all this looks, - * but we want the default path for a system call return to - * go as quickly as possible which is why some of this is - * less clear than it otherwise should be. - */ -ENTRY(system_call) - pushl %eax # save orig_eax - SAVE_ALL - GET_CURRENT(%ebx) - testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS - jne tracesys - cmpl $(NR_syscalls),%eax - jae badsys - call *SYMBOL_NAME(sys_call_table)(,%eax,4) - movl %eax,EAX(%esp) # save the return value -ENTRY(ret_from_sys_call) - movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi - btrl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # make tests atomic -ret_syscall_tests: - cmpl $0,need_resched(%ebx) - jne reschedule - cmpl $0,sigpending(%ebx) - je safesti # ensure need_resched updates are seen -signal_return: - btsl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # reenable event callbacks - movl %esp,%eax - xorl %edx,%edx - call SYMBOL_NAME(do_signal) - jmp ret_from_sys_call - - ALIGN -restore_all: - RESTORE_ALL - - ALIGN -tracesys: - movl $-ENOSYS,EAX(%esp) - call SYMBOL_NAME(syscall_trace) - movl ORIG_EAX(%esp),%eax - cmpl $(NR_syscalls),%eax - jae tracesys_exit - call *SYMBOL_NAME(sys_call_table)(,%eax,4) - movl %eax,EAX(%esp) # save the return value -tracesys_exit: - call SYMBOL_NAME(syscall_trace) - jmp ret_from_sys_call -badsys: - movl $-ENOSYS,EAX(%esp) - jmp ret_from_sys_call - - ALIGN -ENTRY(ret_from_intr) - GET_CURRENT(%ebx) -ret_from_exception: - movb CS(%esp),%al - testl $2,%eax - jne ret_from_sys_call - jmp restore_all - - ALIGN -reschedule: - btsl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # reenable event callbacks - call SYMBOL_NAME(schedule) # test - jmp ret_from_sys_call - -ENTRY(divide_error) - pushl $0 # no error code - pushl $ SYMBOL_NAME(do_divide_error) - ALIGN -error_code: - pushl %ds - pushl %eax - xorl %eax,%eax - pushl %ebp - pushl %edi - pushl %esi - pushl %edx - decl %eax # eax = -1 - pushl %ecx - pushl %ebx - GET_CURRENT(%ebx) - cld - movl %es,%ecx - movl ORIG_EAX(%esp), %esi # get the error code - movl ES(%esp), %edi # get the function address - movl %eax, ORIG_EAX(%esp) - movl %ecx, ES(%esp) - movl %esp,%edx - pushl %esi # push the error code - pushl %edx # push the pt_regs pointer - movl $(__KERNEL_DS),%edx - movl %edx,%ds - movl %edx,%es - call *%edi - addl $8,%esp - jmp ret_from_exception - -# A note on the "critical region" in our callback handler. -# We want to avoid stacking callback handlers due to events occurring -# during handling of the last event. To do this, we keep events disabled -# until we've done all processing. HOWEVER, we must enable events before -# popping the stack frame (can't be done atomically) and so it would still -# be possible to get enough handler activations to overflow the stack. -# Although unlikely, bugs of that kind are hard to track down, so we'd -# like to avoid the possibility. -# So, on entry to the handler we detect whether we interrupted an -# existing activation in its critical region -- if so, we pop the current -# activation and restart the handler using the previous one. -ENTRY(hypervisor_callback) - pushl %eax - SAVE_ALL - GET_CURRENT(%ebx) - movl EIP(%esp),%eax - cmpl $scrit,%eax - jb 11f - cmpl $ecrit,%eax - jb critical_region_fixup -11: push %esp - call do_hypervisor_callback - add $4,%esp - movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi - movb CS(%esp),%cl - test $2,%cl # slow return to ring 2 or 3 - jne ret_syscall_tests -safesti:btsl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # reenable event callbacks -scrit: /**** START OF CRITICAL REGION ****/ - cmpl $0,(%esi) - jne 14f # process more events if necessary... - RESTORE_ALL -14: btrl $EVENTS_MASTER_ENABLE_BIT,4(%esi) - jmp 11b -ecrit: /**** END OF CRITICAL REGION ****/ -# [How we do the fixup]. We want to merge the current stack frame with the -# just-interrupted frame. How we do this depends on where in the critical -# region the interrupted handler was executing, and so how many saved -# registers are in each frame. We do this quickly using the lookup table -# 'critical_fixup_table'. For each byte offset in the critical region, it -# provides the number of bytes which have already been popped from the -# interrupted stack frame. -critical_region_fixup: - addl $critical_fixup_table-scrit,%eax - movzbl (%eax),%eax # %eax contains num bytes popped - mov %esp,%esi - add %eax,%esi # %esi points at end of src region - mov %esp,%edi - add $0x34,%edi # %edi points at end of dst region - mov %eax,%ecx - shr $2,%ecx # convert words to bytes - je 16f # skip loop if nothing to copy -15: subl $4,%esi # pre-decrementing copy loop - subl $4,%edi - movl (%esi),%eax - movl %eax,(%edi) - loop 15b -16: movl %edi,%esp # final %edi is top of merged stack - jmp 11b - -critical_fixup_table: - .byte 0x00,0x00,0x00 # cmpl $0,(%esi) - .byte 0x00,0x00 # jne 14f - .byte 0x00 # pop %ebx - .byte 0x04 # pop %ecx - .byte 0x08 # pop %edx - .byte 0x0c # pop %esi - .byte 0x10 # pop %edi - .byte 0x14 # pop %ebp - .byte 0x18 # pop %eax - .byte 0x1c # pop %ds - .byte 0x20 # pop %es - .byte 0x24,0x24,0x24 # add $4,%esp - .byte 0x28 # iret - .byte 0x00,0x00,0x00,0x00,0x00 # btrl $31,4(%esi) - .byte 0x00,0x00 # jmp 11b - -# Hypervisor uses this for application faults while it executes. -ENTRY(failsafe_callback) - call SYMBOL_NAME(install_safe_pf_handler) -1: pop %ds -2: pop %es -3: pop %fs -4: pop %gs - call SYMBOL_NAME(install_normal_pf_handler) -5: iret -.section .fixup,"ax"; \ -6: movl $0,(%esp); \ - jmp 1b; \ -7: movl $0,(%esp); \ - jmp 2b; \ -8: movl $0,(%esp); \ - jmp 3b; \ -9: movl $0,(%esp); \ - jmp 4b; \ -10: pushl %ss; \ - popl %ds; \ - pushl %ss; \ - popl %es; \ - pushl $11; \ - call do_exit; \ -.previous; \ -.section __ex_table,"a";\ - .align 4; \ - .long 1b,6b; \ - .long 2b,7b; \ - .long 3b,8b; \ - .long 4b,9b; \ - .long 5b,10b; \ -.previous - -ENTRY(coprocessor_error) - pushl $0 - pushl $ SYMBOL_NAME(do_coprocessor_error) - jmp error_code - -ENTRY(simd_coprocessor_error) - pushl $0 - pushl $ SYMBOL_NAME(do_simd_coprocessor_error) - jmp error_code - -ENTRY(device_not_available) - pushl $-1 # mark this as an int - SAVE_ALL - GET_CURRENT(%ebx) - call SYMBOL_NAME(math_state_restore) - jmp ret_from_exception - -ENTRY(debug) - pushl $0 - pushl $ SYMBOL_NAME(do_debug) - jmp error_code - -ENTRY(int3) - pushl $0 - pushl $ SYMBOL_NAME(do_int3) - jmp error_code - -ENTRY(overflow) - pushl $0 - pushl $ SYMBOL_NAME(do_overflow) - jmp error_code - -ENTRY(bounds) - pushl $0 - pushl $ SYMBOL_NAME(do_bounds) - jmp error_code - -ENTRY(invalid_op) - pushl $0 - pushl $ SYMBOL_NAME(do_invalid_op) - jmp error_code - -ENTRY(coprocessor_segment_overrun) - pushl $0 - pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun) - jmp error_code - -ENTRY(double_fault) - pushl $ SYMBOL_NAME(do_double_fault) - jmp error_code - -ENTRY(invalid_TSS) - pushl $ SYMBOL_NAME(do_invalid_TSS) - jmp error_code - -ENTRY(segment_not_present) - pushl $ SYMBOL_NAME(do_segment_not_present) - jmp error_code - -ENTRY(stack_segment) - pushl $ SYMBOL_NAME(do_stack_segment) - jmp error_code - -ENTRY(general_protection) - pushl $ SYMBOL_NAME(do_general_protection) - jmp error_code - -ENTRY(alignment_check) - pushl $ SYMBOL_NAME(do_alignment_check) - jmp error_code - -# This handler is special, because it gets an extra value on its stack, -# which is the linear faulting address. -#define PAGE_FAULT_STUB(_name1, _name2) \ -ENTRY(_name1) \ - pushl %ds ; \ - pushl %eax ; \ - xorl %eax,%eax ; \ - pushl %ebp ; \ - pushl %edi ; \ - pushl %esi ; \ - pushl %edx ; \ - decl %eax /* eax = -1 */ ; \ - pushl %ecx ; \ - pushl %ebx ; \ - GET_CURRENT(%ebx) ; \ - cld ; \ - movl %es,%ecx ; \ - movl ORIG_EAX(%esp), %esi /* get the error code */ ; \ - movl ES(%esp), %edi /* get the faulting address */ ; \ - movl %eax, ORIG_EAX(%esp) ; \ - movl %ecx, ES(%esp) ; \ - movl %esp,%edx ; \ - pushl %edi /* push the faulting address */ ; \ - pushl %esi /* push the error code */ ; \ - pushl %edx /* push the pt_regs pointer */ ; \ - movl $(__KERNEL_DS),%edx ; \ - movl %edx,%ds ; \ - movl %edx,%es ; \ - call SYMBOL_NAME(_name2) ; \ - addl $12,%esp ; \ - jmp ret_from_exception ; -PAGE_FAULT_STUB(page_fault, do_page_fault) -PAGE_FAULT_STUB(safe_page_fault, do_safe_page_fault) - -ENTRY(machine_check) - pushl $0 - pushl $ SYMBOL_NAME(do_machine_check) - jmp error_code - -ENTRY(spurious_interrupt_bug) - pushl $0 - pushl $ SYMBOL_NAME(do_spurious_interrupt_bug) - jmp error_code - -.data -ENTRY(sys_call_table) - .long SYMBOL_NAME(sys_ni_syscall) /* 0 - old "setup()" system call*/ - .long SYMBOL_NAME(sys_exit) - .long SYMBOL_NAME(sys_fork) - .long SYMBOL_NAME(sys_read) - .long SYMBOL_NAME(sys_write) - .long SYMBOL_NAME(sys_open) /* 5 */ - .long SYMBOL_NAME(sys_close) - .long SYMBOL_NAME(sys_waitpid) - .long SYMBOL_NAME(sys_creat) - .long SYMBOL_NAME(sys_link) - .long SYMBOL_NAME(sys_unlink) /* 10 */ - .long SYMBOL_NAME(sys_execve) - .long SYMBOL_NAME(sys_chdir) - .long SYMBOL_NAME(sys_time) - .long SYMBOL_NAME(sys_mknod) - .long SYMBOL_NAME(sys_chmod) /* 15 */ - .long SYMBOL_NAME(sys_lchown16) - .long SYMBOL_NAME(sys_ni_syscall) /* old break syscall holder */ - .long SYMBOL_NAME(sys_stat) - .long SYMBOL_NAME(sys_lseek) - .long SYMBOL_NAME(sys_getpid) /* 20 */ - .long SYMBOL_NAME(sys_mount) - .long SYMBOL_NAME(sys_oldumount) - .long SYMBOL_NAME(sys_setuid16) - .long SYMBOL_NAME(sys_getuid16) - .long SYMBOL_NAME(sys_stime) /* 25 */ - .long SYMBOL_NAME(sys_ptrace) - .long SYMBOL_NAME(sys_alarm) - .long SYMBOL_NAME(sys_fstat) - .long SYMBOL_NAME(sys_pause) - .long SYMBOL_NAME(sys_utime) /* 30 */ - .long SYMBOL_NAME(sys_ni_syscall) /* old stty syscall holder */ - .long SYMBOL_NAME(sys_ni_syscall) /* old gtty syscall holder */ - .long SYMBOL_NAME(sys_access) - .long SYMBOL_NAME(sys_nice) - .long SYMBOL_NAME(sys_ni_syscall) /* 35 */ /* old ftime syscall holder */ - .long SYMBOL_NAME(sys_sync) - .long SYMBOL_NAME(sys_kill) - .long SYMBOL_NAME(sys_rename) - .long SYMBOL_NAME(sys_mkdir) - .long SYMBOL_NAME(sys_rmdir) /* 40 */ - .long SYMBOL_NAME(sys_dup) - .long SYMBOL_NAME(sys_pipe) - .long SYMBOL_NAME(sys_times) - .long SYMBOL_NAME(sys_ni_syscall) /* old prof syscall holder */ - .long SYMBOL_NAME(sys_brk) /* 45 */ - .long SYMBOL_NAME(sys_setgid16) - .long SYMBOL_NAME(sys_getgid16) - .long SYMBOL_NAME(sys_signal) - .long SYMBOL_NAME(sys_geteuid16) - .long SYMBOL_NAME(sys_getegid16) /* 50 */ - .long SYMBOL_NAME(sys_acct) - .long SYMBOL_NAME(sys_umount) /* recycled never used phys() */ - .long SYMBOL_NAME(sys_ni_syscall) /* old lock syscall holder */ - .long SYMBOL_NAME(sys_ioctl) - .long SYMBOL_NAME(sys_fcntl) /* 55 */ - .long SYMBOL_NAME(sys_ni_syscall) /* old mpx syscall holder */ - .long SYMBOL_NAME(sys_setpgid) - .long SYMBOL_NAME(sys_ni_syscall) /* old ulimit syscall holder */ - .long SYMBOL_NAME(sys_olduname) - .long SYMBOL_NAME(sys_umask) /* 60 */ - .long SYMBOL_NAME(sys_chroot) - .long SYMBOL_NAME(sys_ustat) - .long SYMBOL_NAME(sys_dup2) - .long SYMBOL_NAME(sys_getppid) - .long SYMBOL_NAME(sys_getpgrp) /* 65 */ - .long SYMBOL_NAME(sys_setsid) - .long SYMBOL_NAME(sys_sigaction) - .long SYMBOL_NAME(sys_sgetmask) - .long SYMBOL_NAME(sys_ssetmask) - .long SYMBOL_NAME(sys_setreuid16) /* 70 */ - .long SYMBOL_NAME(sys_setregid16) - .long SYMBOL_NAME(sys_sigsuspend) - .long SYMBOL_NAME(sys_sigpending) - .long SYMBOL_NAME(sys_sethostname) - .long SYMBOL_NAME(sys_setrlimit) /* 75 */ - .long SYMBOL_NAME(sys_old_getrlimit) - .long SYMBOL_NAME(sys_getrusage) - .long SYMBOL_NAME(sys_gettimeofday) - .long SYMBOL_NAME(sys_settimeofday) - .long SYMBOL_NAME(sys_getgroups16) /* 80 */ - .long SYMBOL_NAME(sys_setgroups16) - .long SYMBOL_NAME(old_select) - .long SYMBOL_NAME(sys_symlink) - .long SYMBOL_NAME(sys_lstat) - .long SYMBOL_NAME(sys_readlink) /* 85 */ - .long SYMBOL_NAME(sys_uselib) - .long SYMBOL_NAME(sys_swapon) - .long SYMBOL_NAME(sys_reboot) - .long SYMBOL_NAME(old_readdir) - .long SYMBOL_NAME(old_mmap) /* 90 */ - .long SYMBOL_NAME(sys_munmap) - .long SYMBOL_NAME(sys_truncate) - .long SYMBOL_NAME(sys_ftruncate) - .long SYMBOL_NAME(sys_fchmod) - .long SYMBOL_NAME(sys_fchown16) /* 95 */ - .long SYMBOL_NAME(sys_getpriority) - .long SYMBOL_NAME(sys_setpriority) - .long SYMBOL_NAME(sys_ni_syscall) /* old profil syscall holder */ - .long SYMBOL_NAME(sys_statfs) - .long SYMBOL_NAME(sys_fstatfs) /* 100 */ - .long SYMBOL_NAME(sys_ioperm) - .long SYMBOL_NAME(sys_socketcall) - .long SYMBOL_NAME(sys_syslog) - .long SYMBOL_NAME(sys_setitimer) - .long SYMBOL_NAME(sys_getitimer) /* 105 */ - .long SYMBOL_NAME(sys_newstat) - .long SYMBOL_NAME(sys_newlstat) - .long SYMBOL_NAME(sys_newfstat) - .long SYMBOL_NAME(sys_uname) - .long SYMBOL_NAME(sys_iopl) /* 110 */ - .long SYMBOL_NAME(sys_vhangup) - .long SYMBOL_NAME(sys_ni_syscall) /* old "idle" system call */ - .long SYMBOL_NAME(sys_ni_syscall) /* was VM86 */ - .long SYMBOL_NAME(sys_wait4) - .long SYMBOL_NAME(sys_swapoff) /* 115 */ - .long SYMBOL_NAME(sys_sysinfo) - .long SYMBOL_NAME(sys_ipc) - .long SYMBOL_NAME(sys_fsync) - .long SYMBOL_NAME(sys_sigreturn) - .long SYMBOL_NAME(sys_clone) /* 120 */ - .long SYMBOL_NAME(sys_setdomainname) - .long SYMBOL_NAME(sys_newuname) - .long SYMBOL_NAME(sys_modify_ldt) - .long SYMBOL_NAME(sys_adjtimex) - .long SYMBOL_NAME(sys_mprotect) /* 125 */ - .long SYMBOL_NAME(sys_sigprocmask) - .long SYMBOL_NAME(sys_create_module) - .long SYMBOL_NAME(sys_init_module) - .long SYMBOL_NAME(sys_delete_module) - .long SYMBOL_NAME(sys_get_kernel_syms) /* 130 */ - .long SYMBOL_NAME(sys_quotactl) - .long SYMBOL_NAME(sys_getpgid) - .long SYMBOL_NAME(sys_fchdir) - .long SYMBOL_NAME(sys_bdflush) - .long SYMBOL_NAME(sys_sysfs) /* 135 */ - .long SYMBOL_NAME(sys_personality) - .long SYMBOL_NAME(sys_ni_syscall) /* for afs_syscall */ - .long SYMBOL_NAME(sys_setfsuid16) - .long SYMBOL_NAME(sys_setfsgid16) - .long SYMBOL_NAME(sys_llseek) /* 140 */ - .long SYMBOL_NAME(sys_getdents) - .long SYMBOL_NAME(sys_select) - .long SYMBOL_NAME(sys_flock) - .long SYMBOL_NAME(sys_msync) - .long SYMBOL_NAME(sys_readv) /* 145 */ - .long SYMBOL_NAME(sys_writev) - .long SYMBOL_NAME(sys_getsid) - .long SYMBOL_NAME(sys_fdatasync) - .long SYMBOL_NAME(sys_sysctl) - .long SYMBOL_NAME(sys_mlock) /* 150 */ - .long SYMBOL_NAME(sys_munlock) - .long SYMBOL_NAME(sys_mlockall) - .long SYMBOL_NAME(sys_munlockall) - .long SYMBOL_NAME(sys_sched_setparam) - .long SYMBOL_NAME(sys_sched_getparam) /* 155 */ - .long SYMBOL_NAME(sys_sched_setscheduler) - .long SYMBOL_NAME(sys_sched_getscheduler) - .long SYMBOL_NAME(sys_sched_yield) - .long SYMBOL_NAME(sys_sched_get_priority_max) - .long SYMBOL_NAME(sys_sched_get_priority_min) /* 160 */ - .long SYMBOL_NAME(sys_sched_rr_get_interval) - .long SYMBOL_NAME(sys_nanosleep) - .long SYMBOL_NAME(sys_mremap) - .long SYMBOL_NAME(sys_setresuid16) - .long SYMBOL_NAME(sys_getresuid16) /* 165 */ - .long SYMBOL_NAME(sys_ni_syscall) /* was VM86 */ - .long SYMBOL_NAME(sys_query_module) - .long SYMBOL_NAME(sys_poll) - .long SYMBOL_NAME(sys_nfsservctl) - .long SYMBOL_NAME(sys_setresgid16) /* 170 */ - .long SYMBOL_NAME(sys_getresgid16) - .long SYMBOL_NAME(sys_prctl) - .long SYMBOL_NAME(sys_rt_sigreturn) - .long SYMBOL_NAME(sys_rt_sigaction) - .long SYMBOL_NAME(sys_rt_sigprocmask) /* 175 */ - .long SYMBOL_NAME(sys_rt_sigpending) - .long SYMBOL_NAME(sys_rt_sigtimedwait) - .long SYMBOL_NAME(sys_rt_sigqueueinfo) - .long SYMBOL_NAME(sys_rt_sigsuspend) - .long SYMBOL_NAME(sys_pread) /* 180 */ - .long SYMBOL_NAME(sys_pwrite) - .long SYMBOL_NAME(sys_chown16) - .long SYMBOL_NAME(sys_getcwd) - .long SYMBOL_NAME(sys_capget) - .long SYMBOL_NAME(sys_capset) /* 185 */ - .long SYMBOL_NAME(sys_sigaltstack) - .long SYMBOL_NAME(sys_sendfile) - .long SYMBOL_NAME(sys_ni_syscall) /* streams1 */ - .long SYMBOL_NAME(sys_ni_syscall) /* streams2 */ - .long SYMBOL_NAME(sys_vfork) /* 190 */ - .long SYMBOL_NAME(sys_getrlimit) - .long SYMBOL_NAME(sys_mmap2) - .long SYMBOL_NAME(sys_truncate64) - .long SYMBOL_NAME(sys_ftruncate64) - .long SYMBOL_NAME(sys_stat64) /* 195 */ - .long SYMBOL_NAME(sys_lstat64) - .long SYMBOL_NAME(sys_fstat64) - .long SYMBOL_NAME(sys_lchown) - .long SYMBOL_NAME(sys_getuid) - .long SYMBOL_NAME(sys_getgid) /* 200 */ - .long SYMBOL_NAME(sys_geteuid) - .long SYMBOL_NAME(sys_getegid) - .long SYMBOL_NAME(sys_setreuid) - .long SYMBOL_NAME(sys_setregid) - .long SYMBOL_NAME(sys_getgroups) /* 205 */ - .long SYMBOL_NAME(sys_setgroups) - .long SYMBOL_NAME(sys_fchown) - .long SYMBOL_NAME(sys_setresuid) - .long SYMBOL_NAME(sys_getresuid) - .long SYMBOL_NAME(sys_setresgid) /* 210 */ - .long SYMBOL_NAME(sys_getresgid) - .long SYMBOL_NAME(sys_chown) - .long SYMBOL_NAME(sys_setuid) - .long SYMBOL_NAME(sys_setgid) - .long SYMBOL_NAME(sys_setfsuid) /* 215 */ - .long SYMBOL_NAME(sys_setfsgid) - .long SYMBOL_NAME(sys_pivot_root) - .long SYMBOL_NAME(sys_mincore) - .long SYMBOL_NAME(sys_madvise) - .long SYMBOL_NAME(sys_getdents64) /* 220 */ - .long SYMBOL_NAME(sys_fcntl64) - .long SYMBOL_NAME(sys_ni_syscall) /* reserved for TUX */ - .long SYMBOL_NAME(sys_ni_syscall) /* Reserved for Security */ - .long SYMBOL_NAME(sys_gettid) - .long SYMBOL_NAME(sys_readahead) /* 225 */ - .long SYMBOL_NAME(sys_setxattr) - .long SYMBOL_NAME(sys_lsetxattr) - .long SYMBOL_NAME(sys_fsetxattr) - .long SYMBOL_NAME(sys_getxattr) - .long SYMBOL_NAME(sys_lgetxattr) /* 230 */ - .long SYMBOL_NAME(sys_fgetxattr) - .long SYMBOL_NAME(sys_listxattr) - .long SYMBOL_NAME(sys_llistxattr) - .long SYMBOL_NAME(sys_flistxattr) - .long SYMBOL_NAME(sys_removexattr) /* 235 */ - .long SYMBOL_NAME(sys_lremovexattr) - .long SYMBOL_NAME(sys_fremovexattr) - .long SYMBOL_NAME(sys_tkill) - .long SYMBOL_NAME(sys_sendfile64) - .long SYMBOL_NAME(sys_ni_syscall) /* 240 reserved for futex */ - .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sched_setaffinity */ - .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sched_getaffinity */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_set_thread_area */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_get_thread_area */ - .long SYMBOL_NAME(sys_ni_syscall) /* 245 sys_io_setup */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_destroy */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_getevents */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_submit */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_io_cancel */ - .long SYMBOL_NAME(sys_ni_syscall) /* 250 sys_alloc_hugepages */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_free_hugepages */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_exit_group */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_lookup_dcookie */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_create */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_ctl 255 */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_wait */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_remap_file_pages */ - .long SYMBOL_NAME(sys_ni_syscall) /* sys_set_tid_address */ - - .rept NR_syscalls-(.-sys_call_table)/4 - .long SYMBOL_NAME(sys_ni_syscall) - .endr diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/head.S b/xenolinux-2.4.25-sparse/arch/xeno/kernel/head.S deleted file mode 100644 index 361815a58b..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/head.S +++ /dev/null @@ -1,66 +0,0 @@ - -.text -#include -#include -#include -#include -#include -#include -#include - -/* Offsets in start_info structure */ -#define MOD_START 16 -#define MOD_LEN 20 - -startup_32: - cld - - lss stack_start,%esp - - /* Copy initrd somewhere safe before it's clobbered by BSS. */ - mov MOD_LEN(%esi),%ecx - shr $2,%ecx - jz 2f /* bail from copy loop if no initrd */ - mov $SYMBOL_NAME(_end),%edi - add MOD_LEN(%esi),%edi - mov MOD_START(%esi),%eax - add MOD_LEN(%esi),%eax -1: sub $4,%eax - sub $4,%edi - mov (%eax),%ebx - mov %ebx,(%edi) - loop 1b - mov %edi,MOD_START(%esi) - - /* Clear BSS first so that there are no surprises... */ -2: xorl %eax,%eax - movl $SYMBOL_NAME(__bss_start),%edi - movl $SYMBOL_NAME(_end),%ecx - subl %edi,%ecx - rep stosb - - /* Copy the necessary stuff from start_info structure. */ - mov $SYMBOL_NAME(start_info_union),%edi - mov $128,%ecx - rep movsl - - jmp SYMBOL_NAME(start_kernel) - -ENTRY(stack_start) - .long SYMBOL_NAME(init_task_union)+8192, __KERNEL_DS - -.org 0x1000 -ENTRY(empty_zero_page) - -.org 0x2000 -ENTRY(default_ldt) - -.org 0x3000 -ENTRY(cpu0_pte_quicklist) - -.org 0x3400 -ENTRY(cpu0_pgd_quicklist) - -.org 0x3800 -ENTRY(stext) -ENTRY(_stext) diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/hypervisor.c b/xenolinux-2.4.25-sparse/arch/xeno/kernel/hypervisor.c deleted file mode 100644 index 7c6aca05c5..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/hypervisor.c +++ /dev/null @@ -1,170 +0,0 @@ -/****************************************************************************** - * hypervisor.c - * - * Communication to/from hypervisor. - * - * Copyright (c) 2002, K A Fraser - */ - -#include -#include -#include -#include -#include -#include -#include - -multicall_entry_t multicall_list[8]; -int nr_multicall_ents = 0; - -static unsigned long event_mask = 0; - -asmlinkage unsigned int do_physirq(int irq, struct pt_regs *regs) -{ - int cpu = smp_processor_id(); - unsigned long irqs; - shared_info_t *shared = HYPERVISOR_shared_info; - - /* do this manually */ - kstat.irqs[cpu][irq]++; - ack_hypervisor_event(irq); - - barrier(); - irqs = xchg(&shared->physirq_pend, 0); - - __asm__ __volatile__ ( - " push %1 ;" - " sub $4,%%esp ;" - " jmp 3f ;" - "1: btrl %%eax,%0 ;" /* clear bit */ - " mov %%eax,(%%esp) ;" - " call do_IRQ ;" /* do_IRQ(event) */ - "3: bsfl %0,%%eax ;" /* %eax == bit # */ - " jnz 1b ;" - " add $8,%%esp ;" - /* we use %ebx because it is callee-saved */ - : : "b" (irqs), "r" (regs) - /* clobbered by callback function calls */ - : "eax", "ecx", "edx", "memory" ); - - /* do this manually */ - end_hypervisor_event(irq); - - return 0; -} - -void do_hypervisor_callback(struct pt_regs *regs) -{ - unsigned long events, flags; - shared_info_t *shared = HYPERVISOR_shared_info; - - do { - /* Specialised local_irq_save(). */ - flags = test_and_clear_bit(EVENTS_MASTER_ENABLE_BIT, - &shared->events_mask); - barrier(); - - events = xchg(&shared->events, 0); - events &= event_mask; - - if ( (events & EVENT_PHYSIRQ) != 0 ) - { - do_physirq(_EVENT_PHYSIRQ, regs); - events &= ~EVENT_PHYSIRQ; - } - - __asm__ __volatile__ ( - " push %1 ;" - " sub $4,%%esp ;" - " jmp 2f ;" - "1: btrl %%eax,%0 ;" /* clear bit */ - " add %2,%%eax ;" - " mov %%eax,(%%esp) ;" - " call do_IRQ ;" /* do_IRQ(event) */ - "2: bsfl %0,%%eax ;" /* %eax == bit # */ - " jnz 1b ;" - " add $8,%%esp ;" - /* we use %ebx because it is callee-saved */ - : : "b" (events), "r" (regs), "i" (HYPEREVENT_IRQ_BASE) - /* clobbered by callback function calls */ - : "eax", "ecx", "edx", "memory" ); - - /* Specialised local_irq_restore(). */ - if ( flags ) set_bit(EVENTS_MASTER_ENABLE_BIT, &shared->events_mask); - barrier(); - } - while ( shared->events ); -} - -/* - * Define interface to generic handling in irq.c - */ - -static void shutdown_hypervisor_event(unsigned int irq) -{ - clear_bit(HYPEREVENT_FROM_IRQ(irq), &event_mask); - clear_bit(HYPEREVENT_FROM_IRQ(irq), &HYPERVISOR_shared_info->events_mask); -} - -static void enable_hypervisor_event(unsigned int irq) -{ - set_bit(HYPEREVENT_FROM_IRQ(irq), &event_mask); - set_bit(HYPEREVENT_FROM_IRQ(irq), &HYPERVISOR_shared_info->events_mask); - if ( test_bit(EVENTS_MASTER_ENABLE_BIT, - &HYPERVISOR_shared_info->events_mask) ) - do_hypervisor_callback(NULL); -} - -static void disable_hypervisor_event(unsigned int irq) -{ - clear_bit(HYPEREVENT_FROM_IRQ(irq), &event_mask); - clear_bit(HYPEREVENT_FROM_IRQ(irq), &HYPERVISOR_shared_info->events_mask); -} - -static void ack_hypervisor_event(unsigned int irq) -{ - int ev = HYPEREVENT_FROM_IRQ(irq); - if ( !(event_mask & (1<events_mask); -} - -static unsigned int startup_hypervisor_event(unsigned int irq) -{ - enable_hypervisor_event(irq); - return 0; -} - -static void end_hypervisor_event(unsigned int irq) -{ -} - -static struct hw_interrupt_type hypervisor_irq_type = { - "Hypervisor-event", - startup_hypervisor_event, - shutdown_hypervisor_event, - enable_hypervisor_event, - disable_hypervisor_event, - ack_hypervisor_event, - end_hypervisor_event, - NULL -}; - -void __init init_IRQ(void) -{ - int i; - - for ( i = 0; i < NR_HYPEREVENT_IRQS; i++ ) - { - irq_desc[i + HYPEREVENT_IRQ_BASE].status = IRQ_DISABLED; - irq_desc[i + HYPEREVENT_IRQ_BASE].action = 0; - irq_desc[i + HYPEREVENT_IRQ_BASE].depth = 1; - irq_desc[i + HYPEREVENT_IRQ_BASE].handler = &hypervisor_irq_type; - } - - /* Also initialise the physical IRQ handlers. */ - physirq_init(); -} diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/i386_ksyms.c b/xenolinux-2.4.25-sparse/arch/xeno/kernel/i386_ksyms.c deleted file mode 100644 index 6744999039..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/i386_ksyms.c +++ /dev/null @@ -1,175 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -extern void dump_thread(struct pt_regs *, struct user *); -extern spinlock_t rtc_lock; - -#if defined(CONFIG_APMXXX) || defined(CONFIG_APM_MODULEXXX) -extern void machine_real_restart(unsigned char *, int); -EXPORT_SYMBOL(machine_real_restart); -extern void default_idle(void); -EXPORT_SYMBOL(default_idle); -#endif - -#ifdef CONFIG_SMP -extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); -extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); -#endif - -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) -extern struct drive_info_struct drive_info; -EXPORT_SYMBOL(drive_info); -#endif - -// XXX extern unsigned long get_cmos_time(void); - -/* platform dependent support */ -EXPORT_SYMBOL(boot_cpu_data); -EXPORT_SYMBOL(dump_thread); -EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL(dump_extended_fpu); -EXPORT_SYMBOL(__ioremap); -EXPORT_SYMBOL(iounmap); -EXPORT_SYMBOL(enable_irq); -EXPORT_SYMBOL(disable_irq); -EXPORT_SYMBOL(disable_irq_nosync); -EXPORT_SYMBOL(probe_irq_mask); -EXPORT_SYMBOL(kernel_thread); -EXPORT_SYMBOL(pm_idle); -EXPORT_SYMBOL(pm_power_off); -EXPORT_SYMBOL(apm_info); -//EXPORT_SYMBOL(gdt); -EXPORT_SYMBOL(empty_zero_page); -EXPORT_SYMBOL(phys_to_machine_mapping); - - -#ifdef CONFIG_DEBUG_IOVIRT -EXPORT_SYMBOL(__io_virt_debug); -#endif - -EXPORT_SYMBOL_NOVERS(__down_failed); -EXPORT_SYMBOL_NOVERS(__down_failed_interruptible); -EXPORT_SYMBOL_NOVERS(__down_failed_trylock); -EXPORT_SYMBOL_NOVERS(__up_wakeup); -/* Networking helper routines. */ -EXPORT_SYMBOL(csum_partial_copy_generic); -/* Delay loops */ -EXPORT_SYMBOL(__ndelay); -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(__delay); -EXPORT_SYMBOL(__const_udelay); - -EXPORT_SYMBOL_NOVERS(__get_user_1); -EXPORT_SYMBOL_NOVERS(__get_user_2); -EXPORT_SYMBOL_NOVERS(__get_user_4); - -EXPORT_SYMBOL(strtok); -EXPORT_SYMBOL(strpbrk); -EXPORT_SYMBOL(strstr); - -EXPORT_SYMBOL(strncpy_from_user); -EXPORT_SYMBOL(__strncpy_from_user); -EXPORT_SYMBOL(clear_user); -EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(__generic_copy_from_user); -EXPORT_SYMBOL(__generic_copy_to_user); -EXPORT_SYMBOL(strnlen_user); - - -EXPORT_SYMBOL(pci_alloc_consistent); -EXPORT_SYMBOL(pci_free_consistent); - -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pcibios_penalize_isa_irq); -EXPORT_SYMBOL(pci_mem_start); -#endif - - -#ifdef CONFIG_X86_USE_3DNOW -EXPORT_SYMBOL(_mmx_memcpy); -EXPORT_SYMBOL(mmx_clear_page); -EXPORT_SYMBOL(mmx_copy_page); -#endif - -#ifdef CONFIG_SMP -EXPORT_SYMBOL(cpu_data); -EXPORT_SYMBOL(kernel_flag_cacheline); -EXPORT_SYMBOL(smp_num_cpus); -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL_NOVERS(__write_lock_failed); -EXPORT_SYMBOL_NOVERS(__read_lock_failed); - -/* Global SMP irq stuff */ -EXPORT_SYMBOL(synchronize_irq); -EXPORT_SYMBOL(global_irq_holder); -EXPORT_SYMBOL(__global_cli); -EXPORT_SYMBOL(__global_sti); -EXPORT_SYMBOL(__global_save_flags); -EXPORT_SYMBOL(__global_restore_flags); -EXPORT_SYMBOL(smp_call_function); - -/* TLB flushing */ -EXPORT_SYMBOL(flush_tlb_page); -#endif - -#ifdef CONFIG_X86_IO_APIC -EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); -#endif - -#ifdef CONFIG_VT -EXPORT_SYMBOL(screen_info); -#endif - -EXPORT_SYMBOL(get_wchan); - -EXPORT_SYMBOL(rtc_lock); - -#undef memcpy -#undef memset -extern void * memset(void *,int,__kernel_size_t); -extern void * memcpy(void *,const void *,__kernel_size_t); -EXPORT_SYMBOL_NOVERS(memcpy); -EXPORT_SYMBOL_NOVERS(memset); - -#ifdef CONFIG_HAVE_DEC_LOCK -EXPORT_SYMBOL(atomic_dec_and_lock); -#endif - -#ifdef CONFIG_MULTIQUAD -EXPORT_SYMBOL(xquad_portio); -#endif - -#include -EXPORT_SYMBOL(create_xeno_proc_entry); -EXPORT_SYMBOL(remove_xeno_proc_entry); - -EXPORT_SYMBOL(do_hypervisor_callback); -EXPORT_SYMBOL(HYPERVISOR_shared_info); diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/ioport.c b/xenolinux-2.4.25-sparse/arch/xeno/kernel/ioport.c deleted file mode 100644 index fc0164045f..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/ioport.c +++ /dev/null @@ -1,48 +0,0 @@ -#include -#include -#include -#include -#include -#include - - -asmlinkage int sys_iopl(unsigned int new_io_pl) -{ - unsigned int old_io_pl = current->thread.io_pl; - dom0_op_t op; - - if ( !(start_info.flags & SIF_PRIVILEGED) ) - return -EPERM; - - if ( new_io_pl > 3 ) - return -EINVAL; - - /* Need "raw I/O" privileges for direct port access. */ - if ( (new_io_pl > old_io_pl) && !capable(CAP_SYS_RAWIO) ) - return -EPERM; - - /* Maintain OS privileges even if user attempts to relinquish them. */ - if ( (new_io_pl == 0) && (start_info.flags & SIF_PRIVILEGED) ) - new_io_pl = 1; - - /* Change our version of the privilege levels. */ - current->thread.io_pl = new_io_pl; - - /* Force the change at ring 0. */ - op.cmd = DOM0_IOPL; - op.u.iopl.domain = DOMID_SELF; - op.u.iopl.iopl = new_io_pl; - HYPERVISOR_dom0_op(&op); - - return 0; -} - - -asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on) -{ - printk(KERN_INFO "ioperm not fully supported - %s\n", - turn_on ? "set iopl to 3" : "ignore resource release"); - return turn_on ? sys_iopl(3) : 0; -} - - diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/irq.c b/xenolinux-2.4.25-sparse/arch/xeno/kernel/irq.c deleted file mode 100644 index c88e976125..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/irq.c +++ /dev/null @@ -1,1137 +0,0 @@ -/* - * linux/arch/i386/kernel/irq.c - * - * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar - * - * This file contains the code used by various IRQ handling routines: - * asking for different IRQ's should be done through these routines - * instead of just grabbing them. Thus setups with different IRQ numbers - * shouldn't result in any weird surprises, and installing new handlers - * should be easier. - */ - -/* - * (mostly architecture independent, will move to kernel/irq.c in 2.5.) - * - * IRQs are in fact implemented a bit like signal handlers for the kernel. - * Naturally it's not a 1:1 relation, but there are similarities. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - - -/* - * Linux has a controller-independent x86 interrupt architecture. - * every controller has a 'controller-template', that is used - * by the main code to do the right thing. Each driver-visible - * interrupt source is transparently wired to the apropriate - * controller. Thus drivers need not be aware of the - * interrupt-controller. - * - * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC, - * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC. - * (IO-APICs assumed to be messaging to Pentium local-APICs) - * - * the code is designed to be easily extended with new/different - * interrupt controllers, without having to do assembly magic. - */ - -/* - * Controller mappings for all interrupt sources: - */ -irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = - { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}}; - -static void register_irq_proc (unsigned int irq); - -/* - * Special irq handlers. - */ - -void no_action(int cpl, void *dev_id, struct pt_regs *regs) { } - -/* - * Generic no controller code - */ - -static void enable_none(unsigned int irq) { } -static unsigned int startup_none(unsigned int irq) { return 0; } -static void disable_none(unsigned int irq) { } -static void ack_none(unsigned int irq) -{ - printk("unexpected IRQ trap at vector %02x\n", irq); -} - -/* startup is the same as "enable", shutdown is same as "disable" */ -#define shutdown_none disable_none -#define end_none enable_none - -struct hw_interrupt_type no_irq_type = { - "none", - startup_none, - shutdown_none, - enable_none, - disable_none, - ack_none, - end_none -}; - -atomic_t irq_err_count; -#ifdef CONFIG_X86_IO_APIC -#ifdef APIC_MISMATCH_DEBUG -atomic_t irq_mis_count; -#endif -#endif - -/* - * Generic, controller-independent functions: - */ - -int show_interrupts(struct seq_file *p, void *v) -{ - int i, j; - struct irqaction * action; - - seq_printf(p, " "); - for (j=0; jtypename); - seq_printf(p, " %s", action->name); - - for (action=action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); - seq_putc(p,'\n'); - } - seq_printf(p, "NMI: "); - for (j = 0; j < smp_num_cpus; j++) - seq_printf(p, "%10u ", - nmi_count(cpu_logical_map(j))); - seq_printf(p, "\n"); -#if CONFIG_X86_LOCAL_APIC - seq_printf(p, "LOC: "); - for (j = 0; j < smp_num_cpus; j++) - seq_printf(p, "%10u ", - apic_timer_irqs[cpu_logical_map(j)]); - seq_printf(p, "\n"); -#endif - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); -#ifdef CONFIG_X86_IO_APIC -#ifdef APIC_MISMATCH_DEBUG - seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); -#endif -#endif - - return 0; -} - - -/* - * Global interrupt locks for SMP. Allow interrupts to come in on any - * CPU, yet make cli/sti act globally to protect critical regions.. - */ - -#ifdef CONFIG_SMP -unsigned char global_irq_holder = NO_PROC_ID; -unsigned volatile long global_irq_lock; /* pendantic: long for set_bit --RR */ - -extern void show_stack(unsigned long* esp); - -static void show(char * str) -{ - int i; - int cpu = smp_processor_id(); - - printk("\n%s, CPU %d:\n", str, cpu); - printk("irq: %d [",irqs_running()); - for(i=0;i < smp_num_cpus;i++) - printk(" %d",local_irq_count(i)); - printk(" ]\nbh: %d [",spin_is_locked(&global_bh_lock) ? 1 : 0); - for(i=0;i < smp_num_cpus;i++) - printk(" %d",local_bh_count(i)); - - printk(" ]\nStack dumps:"); - for(i = 0; i < smp_num_cpus; i++) { - unsigned long esp; - if (i == cpu) - continue; - printk("\nCPU %d:",i); - esp = init_tss[i].esp0; - if (!esp) { - /* tss->esp0 is set to NULL in cpu_init(), - * it's initialized when the cpu returns to user - * space. -- manfreds - */ - printk(" "); - continue; - } - esp &= ~(THREAD_SIZE-1); - esp += sizeof(struct task_struct); - show_stack((void*)esp); - } - printk("\nCPU %d:",cpu); - show_stack(NULL); - printk("\n"); -} - -#define MAXCOUNT 100000000 - -/* - * I had a lockup scenario where a tight loop doing - * spin_unlock()/spin_lock() on CPU#1 was racing with - * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but - * apparently the spin_unlock() information did not make it - * through to CPU#0 ... nasty, is this by design, do we have to limit - * 'memory update oscillation frequency' artificially like here? - * - * Such 'high frequency update' races can be avoided by careful design, but - * some of our major constructs like spinlocks use similar techniques, - * it would be nice to clarify this issue. Set this define to 0 if you - * want to check whether your system freezes. I suspect the delay done - * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but - * i thought that such things are guaranteed by design, since we use - * the 'LOCK' prefix. - */ -#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 0 - -#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND -# define SYNC_OTHER_CORES(x) udelay(x+1) -#else -/* - * We have to allow irqs to arrive between __sti and __cli - */ -# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop") -#endif - -static inline void wait_on_irq(int cpu) -{ - int count = MAXCOUNT; - - for (;;) { - - /* - * Wait until all interrupts are gone. Wait - * for bottom half handlers unless we're - * already executing in one.. - */ - if (!irqs_running()) - if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock)) - break; - - /* Duh, we have to loop. Release the lock to avoid deadlocks */ - clear_bit(0,&global_irq_lock); - - for (;;) { - if (!--count) { - show("wait_on_irq"); - count = ~0; - } - __sti(); - SYNC_OTHER_CORES(cpu); - __cli(); - if (irqs_running()) - continue; - if (global_irq_lock) - continue; - if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock)) - continue; - if (!test_and_set_bit(0,&global_irq_lock)) - break; - } - } -} - -/* - * This is called when we want to synchronize with - * interrupts. We may for example tell a device to - * stop sending interrupts: but to make sure there - * are no interrupts that are executing on another - * CPU we need to call this function. - */ -void synchronize_irq(void) -{ - if (irqs_running()) { - /* Stupid approach */ - cli(); - sti(); - } -} - -static inline void get_irqlock(int cpu) -{ - if (test_and_set_bit(0,&global_irq_lock)) { - /* do we already hold the lock? */ - if ((unsigned char) cpu == global_irq_holder) - return; - /* Uhhuh.. Somebody else got it. Wait.. */ - do { - do { - rep_nop(); - } while (test_bit(0,&global_irq_lock)); - } while (test_and_set_bit(0,&global_irq_lock)); - } - /* - * We also to make sure that nobody else is running - * in an interrupt context. - */ - wait_on_irq(cpu); - - /* - * Ok, finally.. - */ - global_irq_holder = cpu; -} - -void __global_cli(void) -{ - panic("__global_cli"); -} - -void __global_sti(void) -{ - panic("__global_sti"); -} - -/* - * SMP flags value to restore to: - * 0 - global cli - * 1 - global sti - * 2 - local cli - * 3 - local sti - */ -unsigned long __global_save_flags(void) -{ - panic("__global_save_flags"); -} - -void __global_restore_flags(unsigned long flags) -{ - panic("__global_restore_flags"); -} - -#endif - -/* - * This should really return information about whether - * we should do bottom half handling etc. Right now we - * end up _always_ checking the bottom half, which is a - * waste of time and is not what some drivers would - * prefer. - */ -int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action) -{ - int status; - int cpu = smp_processor_id(); - - irq_enter(cpu, irq); - - status = 1; /* Force the "do bottom halves" bit */ - - if (!(action->flags & SA_INTERRUPT)) - __sti(); - - do { - status |= action->flags; - action->handler(irq, action->dev_id, regs); - action = action->next; - } while (action); - if (status & SA_SAMPLE_RANDOM) - add_interrupt_randomness(irq); - __cli(); - - irq_exit(cpu, irq); - - return status; -} - -/* - * Generic enable/disable code: this just calls - * down into the PIC-specific version for the actual - * hardware disable after having gotten the irq - * controller lock. - */ - -/** - * disable_irq_nosync - disable an irq without waiting - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Disables and Enables are - * nested. - * Unlike disable_irq(), this function does not ensure existing - * instances of the IRQ handler have completed before returning. - * - * This function may be called from IRQ context. - */ - -inline void disable_irq_nosync(unsigned int irq) -{ - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - if (!desc->depth++) { - desc->status |= IRQ_DISABLED; - desc->handler->disable(irq); - } - spin_unlock_irqrestore(&desc->lock, flags); -} - -/** - * disable_irq - disable an irq and wait for completion - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Enables and Disables are - * nested. - * This function waits for any pending IRQ handlers for this interrupt - * to complete before returning. If you use this function while - * holding a resource the IRQ handler may need you will deadlock. - * - * This function may be called - with care - from IRQ context. - */ - -void disable_irq(unsigned int irq) -{ - disable_irq_nosync(irq); - - if (!local_irq_count(smp_processor_id())) { - do { - barrier(); - cpu_relax(); - } while (irq_desc[irq].status & IRQ_INPROGRESS); - } -} - -/** - * enable_irq - enable handling of an irq - * @irq: Interrupt to enable - * - * Undoes the effect of one call to disable_irq(). If this - * matches the last disable, processing of interrupts on this - * IRQ line is re-enabled. - * - * This function may be called from IRQ context. - */ - -void enable_irq(unsigned int irq) -{ - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - switch (desc->depth) { - case 1: { - unsigned int status = desc->status & ~IRQ_DISABLED; - desc->status = status; - if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { - desc->status = status | IRQ_REPLAY; - hw_resend_irq(desc->handler,irq); - } - desc->handler->enable(irq); - /* fall-through */ - } - default: - desc->depth--; - break; - case 0: - printk("enable_irq(%u) unbalanced from %p\n", irq, - __builtin_return_address(0)); - } - spin_unlock_irqrestore(&desc->lock, flags); -} - -/* - * do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - */ -asmlinkage unsigned int do_IRQ(int irq, struct pt_regs *regs) -{ - /* - * We ack quickly, we don't want the irq controller - * thinking we're snobs just because some other CPU has - * disabled global interrupts (we have already done the - * INT_ACK cycles, it's too late to try to pretend to the - * controller that we aren't taking the interrupt). - * - * 0 return value means that this irq is already being - * handled by some other CPU. (or is disabled) - */ - int cpu = smp_processor_id(); - irq_desc_t *desc = irq_desc + irq; - struct irqaction * action; - unsigned int status; -#ifdef CONFIG_DEBUG_STACKOVERFLOW - long esp; - - /* Debugging check for stack overflow: is there less than 1KB free? */ - __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191)); - if (unlikely(esp < (sizeof(struct task_struct) + 1024))) { - extern void show_stack(unsigned long *); - - printk("do_IRQ: stack overflow: %ld\n", - esp - sizeof(struct task_struct)); - __asm__ __volatile__("movl %%esp,%0" : "=r" (esp)); - show_stack((void *)esp); - } -#endif - - kstat.irqs[cpu][irq]++; - spin_lock(&desc->lock); - desc->handler->ack(irq); - /* - REPLAY is when Linux resends an IRQ that was dropped earlier - WAITING is used by probe to mark irqs that are being tested - */ - status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); - status |= IRQ_PENDING; /* we _want_ to handle it */ - - /* - * If the IRQ is disabled for whatever reason, we cannot - * use the action we have. - */ - action = NULL; - if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) { - action = desc->action; - status &= ~IRQ_PENDING; /* we commit to handling */ - status |= IRQ_INPROGRESS; /* we are handling it */ - } - desc->status = status; - - /* - * If there is no IRQ handler or it was disabled, exit early. - Since we set PENDING, if another processor is handling - a different instance of this same irq, the other processor - will take care of it. - */ - if (!action) - goto out; - - /* - * Edge triggered interrupts need to remember - * pending events. - * This applies to any hw interrupts that allow a second - * instance of the same irq to arrive while we are in do_IRQ - * or in the handler. But the code here only handles the _second_ - * instance of the irq, not the third or fourth. So it is mostly - * useful for irq hardware that does not mask cleanly in an - * SMP environment. - */ - for (;;) { - spin_unlock(&desc->lock); - handle_IRQ_event(irq, regs, action); - spin_lock(&desc->lock); - - if (!(desc->status & IRQ_PENDING)) - break; - desc->status &= ~IRQ_PENDING; - } - desc->status &= ~IRQ_INPROGRESS; -out: - /* - * The ->end() handler has to deal with interrupts which got - * disabled while the handler was running. - */ - desc->handler->end(irq); - spin_unlock(&desc->lock); - - if (softirq_pending(cpu)) - do_softirq(); - return 1; -} - -/** - * request_irq - allocate an interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device - * @dev_id: A cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt line and IRQ handling. From the point this - * call is made your handler function may be invoked. Since - * your handler function must clear any interrupt the board - * raises, you must take care both to initialise your hardware - * and to set up the interrupt handler in the right order. - * - * Dev_id must be globally unique. Normally the address of the - * device data structure is used as the cookie. Since the handler - * receives this value it makes sense to use it. - * - * If your interrupt is shared you must pass a non NULL dev_id - * as this is required when freeing the interrupt. - * - * Flags: - * - * SA_SHIRQ Interrupt is shared - * - * SA_INTERRUPT Disable local interrupts while processing - * - * SA_SAMPLE_RANDOM The interrupt can be used for entropy - * - */ - -int request_irq(unsigned int irq, - void (*handler)(int, void *, struct pt_regs *), - unsigned long irqflags, - const char * devname, - void *dev_id) -{ - int retval; - struct irqaction * action; - -#if 1 - /* - * Sanity-check: shared interrupts should REALLY pass in - * a real dev-ID, otherwise we'll have trouble later trying - * to figure out which interrupt is which (messes up the - * interrupt freeing logic etc). - */ - if (irqflags & SA_SHIRQ) { - if (!dev_id) - printk("Bad boy: %s (at 0x%x) called us without a dev_id!\n", devname, (&irq)[-1]); - } -#endif - - if (irq >= NR_IRQS) - return -EINVAL; - if (!handler) - return -EINVAL; - - action = (struct irqaction *) - kmalloc(sizeof(struct irqaction), GFP_KERNEL); - if (!action) - return -ENOMEM; - - action->handler = handler; - action->flags = irqflags; - action->mask = 0; - action->name = devname; - action->next = NULL; - action->dev_id = dev_id; - - retval = setup_irq(irq, action); - if (retval) - kfree(action); - return retval; -} - -/** - * free_irq - free an interrupt - * @irq: Interrupt line to free - * @dev_id: Device identity to free - * - * Remove an interrupt handler. The handler is removed and if the - * interrupt line is no longer in use by any driver it is disabled. - * On a shared IRQ the caller must ensure the interrupt is disabled - * on the card it drives before calling this function. The function - * does not return until any executing interrupts for this IRQ - * have completed. - * - * This function may be called from interrupt context. - * - * Bugs: Attempting to free an irq in a handler for the same irq hangs - * the machine. - */ - -void free_irq(unsigned int irq, void *dev_id) -{ - irq_desc_t *desc; - struct irqaction **p; - unsigned long flags; - - if (irq >= NR_IRQS) - return; - - desc = irq_desc + irq; - spin_lock_irqsave(&desc->lock,flags); - p = &desc->action; - for (;;) { - struct irqaction * action = *p; - if (action) { - struct irqaction **pp = p; - p = &action->next; - if (action->dev_id != dev_id) - continue; - - /* Found it - now remove it from the list of entries */ - *pp = action->next; - if (!desc->action) { - desc->status |= IRQ_DISABLED; - desc->handler->shutdown(irq); - } - spin_unlock_irqrestore(&desc->lock,flags); - -#ifdef CONFIG_SMP - /* Wait to make sure it's not being used on another CPU */ - while (desc->status & IRQ_INPROGRESS) { - barrier(); - cpu_relax(); - } -#endif - kfree(action); - return; - } - printk("Trying to free free IRQ%d\n",irq); - spin_unlock_irqrestore(&desc->lock,flags); - return; - } -} - -/* - * IRQ autodetection code.. - * - * This depends on the fact that any interrupt that - * comes in on to an unassigned handler will get stuck - * with "IRQ_WAITING" cleared and the interrupt - * disabled. - */ - -static DECLARE_MUTEX(probe_sem); - -/** - * probe_irq_on - begin an interrupt autodetect - * - * Commence probing for an interrupt. The interrupts are scanned - * and a mask of potential interrupt lines is returned. - * - */ - -unsigned long probe_irq_on(void) -{ - unsigned int i; - irq_desc_t *desc; - unsigned long val; - unsigned long delay; - - down(&probe_sem); - /* - * something may have generated an irq long ago and we want to - * flush such a longstanding irq before considering it as spurious. - */ - for (i = NR_IRQS-1; i > 0; i--) { - desc = irq_desc + i; - - spin_lock_irq(&desc->lock); - if (!irq_desc[i].action) - irq_desc[i].handler->startup(i); - spin_unlock_irq(&desc->lock); - } - - /* Wait for longstanding interrupts to trigger. */ - for (delay = jiffies + HZ/50; time_after(delay, jiffies); ) - /* about 20ms delay */ synchronize_irq(); - - /* - * enable any unassigned irqs - * (we must startup again here because if a longstanding irq - * happened in the previous stage, it may have masked itself) - */ - for (i = NR_IRQS-1; i > 0; i--) { - desc = irq_desc + i; - - spin_lock_irq(&desc->lock); - if (!desc->action) { - desc->status |= IRQ_AUTODETECT | IRQ_WAITING; - if (desc->handler->startup(i)) - desc->status |= IRQ_PENDING; - } - spin_unlock_irq(&desc->lock); - } - - /* - * Wait for spurious interrupts to trigger - */ - for (delay = jiffies + HZ/10; time_after(delay, jiffies); ) - /* about 100ms delay */ synchronize_irq(); - - /* - * Now filter out any obviously spurious interrupts - */ - val = 0; - for (i = 0; i < NR_IRQS; i++) { - irq_desc_t *desc = irq_desc + i; - unsigned int status; - - spin_lock_irq(&desc->lock); - status = desc->status; - - if (status & IRQ_AUTODETECT) { - /* It triggered already - consider it spurious. */ - if (!(status & IRQ_WAITING)) { - desc->status = status & ~IRQ_AUTODETECT; - desc->handler->shutdown(i); - } else - if (i < 32) - val |= 1 << i; - } - spin_unlock_irq(&desc->lock); - } - - return val; -} - -/* - * Return a mask of triggered interrupts (this - * can handle only legacy ISA interrupts). - */ - -/** - * probe_irq_mask - scan a bitmap of interrupt lines - * @val: mask of interrupts to consider - * - * Scan the ISA bus interrupt lines and return a bitmap of - * active interrupts. The interrupt probe logic state is then - * returned to its previous value. - * - * Note: we need to scan all the irq's even though we will - * only return ISA irq numbers - just so that we reset them - * all to a known state. - */ -unsigned int probe_irq_mask(unsigned long val) -{ - int i; - unsigned int mask; - - mask = 0; - for (i = 0; i < NR_IRQS; i++) { - irq_desc_t *desc = irq_desc + i; - unsigned int status; - - spin_lock_irq(&desc->lock); - status = desc->status; - - if (status & IRQ_AUTODETECT) { - if (i < 16 && !(status & IRQ_WAITING)) - mask |= 1 << i; - - desc->status = status & ~IRQ_AUTODETECT; - desc->handler->shutdown(i); - } - spin_unlock_irq(&desc->lock); - } - up(&probe_sem); - - return mask & val; -} - -/* - * Return the one interrupt that triggered (this can - * handle any interrupt source). - */ - -/** - * probe_irq_off - end an interrupt autodetect - * @val: mask of potential interrupts (unused) - * - * Scans the unused interrupt lines and returns the line which - * appears to have triggered the interrupt. If no interrupt was - * found then zero is returned. If more than one interrupt is - * found then minus the first candidate is returned to indicate - * their is doubt. - * - * The interrupt probe logic state is returned to its previous - * value. - * - * BUGS: When used in a module (which arguably shouldnt happen) - * nothing prevents two IRQ probe callers from overlapping. The - * results of this are non-optimal. - */ - -int probe_irq_off(unsigned long val) -{ - int i, irq_found, nr_irqs; - - nr_irqs = 0; - irq_found = 0; - for (i = 0; i < NR_IRQS; i++) { - irq_desc_t *desc = irq_desc + i; - unsigned int status; - - spin_lock_irq(&desc->lock); - status = desc->status; - - if (status & IRQ_AUTODETECT) { - if (!(status & IRQ_WAITING)) { - if (!nr_irqs) - irq_found = i; - nr_irqs++; - } - desc->status = status & ~IRQ_AUTODETECT; - desc->handler->shutdown(i); - } - spin_unlock_irq(&desc->lock); - } - up(&probe_sem); - - if (nr_irqs > 1) - irq_found = -irq_found; - return irq_found; -} - -/* this was setup_x86_irq but it seems pretty generic */ -int setup_irq(unsigned int irq, struct irqaction * new) -{ - int shared = 0; - unsigned long flags; - struct irqaction *old, **p; - irq_desc_t *desc = irq_desc + irq; - - /* - * Some drivers like serial.c use request_irq() heavily, - * so we have to be careful not to interfere with a - * running system. - */ - if (new->flags & SA_SAMPLE_RANDOM) { - /* - * This function might sleep, we want to call it first, - * outside of the atomic block. - * Yes, this might clear the entropy pool if the wrong - * driver is attempted to be loaded, without actually - * installing a new handler, but is this really a problem, - * only the sysadmin is able to do this. - */ - rand_initialize_irq(irq); - } - - /* - * The following block of code has to be executed atomically - */ - spin_lock_irqsave(&desc->lock,flags); - p = &desc->action; - if ((old = *p) != NULL) { - /* Can't share interrupts unless both agree to */ - if (!(old->flags & new->flags & SA_SHIRQ)) { - spin_unlock_irqrestore(&desc->lock,flags); - return -EBUSY; - } - - /* add new interrupt at end of irq queue */ - do { - p = &old->next; - old = *p; - } while (old); - shared = 1; - } - - *p = new; - - if (!shared) { - desc->depth = 0; - desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | IRQ_INPROGRESS); - desc->handler->startup(irq); - } - spin_unlock_irqrestore(&desc->lock,flags); - - register_irq_proc(irq); - return 0; -} - -static struct proc_dir_entry * root_irq_dir; -static struct proc_dir_entry * irq_dir [NR_IRQS]; - -#define HEX_DIGITS 8 - -static unsigned int parse_hex_value (const char *buffer, - unsigned long count, unsigned long *ret) -{ - unsigned char hexnum [HEX_DIGITS]; - unsigned long value; - int i; - - if (!count) - return -EINVAL; - if (count > HEX_DIGITS) - count = HEX_DIGITS; - if (copy_from_user(hexnum, buffer, count)) - return -EFAULT; - - /* - * Parse the first 8 characters as a hex string, any non-hex char - * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same. - */ - value = 0; - - for (i = 0; i < count; i++) { - unsigned int c = hexnum[i]; - - switch (c) { - case '0' ... '9': c -= '0'; break; - case 'a' ... 'f': c -= 'a'-10; break; - case 'A' ... 'F': c -= 'A'-10; break; - default: - goto out; - } - value = (value << 4) | c; - } -out: - *ret = value; - return 0; -} - -#if CONFIG_SMP - -static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; - -static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; -static int irq_affinity_read_proc (char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - if (count < HEX_DIGITS+1) - return -EINVAL; - return sprintf (page, "%08lx\n", irq_affinity[(long)data]); -} - -static int irq_affinity_write_proc (struct file *file, const char *buffer, - unsigned long count, void *data) -{ - int irq = (long) data, full_count = count, err; - unsigned long new_value; - - if (!irq_desc[irq].handler->set_affinity) - return -EIO; - - err = parse_hex_value(buffer, count, &new_value); - - /* - * Do not allow disabling IRQs completely - it's a too easy - * way to make the system unusable accidentally :-) At least - * one online CPU still has to be targeted. - */ - if (!(new_value & cpu_online_map)) - return -EINVAL; - - irq_affinity[irq] = new_value; - irq_desc[irq].handler->set_affinity(irq, new_value); - - return full_count; -} - -#endif - -static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - unsigned long *mask = (unsigned long *) data; - if (count < HEX_DIGITS+1) - return -EINVAL; - return sprintf (page, "%08lx\n", *mask); -} - -static int prof_cpu_mask_write_proc (struct file *file, const char *buffer, - unsigned long count, void *data) -{ - unsigned long *mask = (unsigned long *) data, full_count = count, err; - unsigned long new_value; - - err = parse_hex_value(buffer, count, &new_value); - if (err) - return err; - - *mask = new_value; - return full_count; -} - -#define MAX_NAMELEN 10 - -static void register_irq_proc (unsigned int irq) -{ - char name [MAX_NAMELEN]; - - if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type) || - irq_dir[irq]) - return; - - memset(name, 0, MAX_NAMELEN); - sprintf(name, "%d", irq); - - /* create /proc/irq/1234 */ - irq_dir[irq] = proc_mkdir(name, root_irq_dir); - -#if CONFIG_SMP - { - struct proc_dir_entry *entry; - - /* create /proc/irq/1234/smp_affinity */ - entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]); - - if (entry) { - entry->nlink = 1; - entry->data = (void *)(long)irq; - entry->read_proc = irq_affinity_read_proc; - entry->write_proc = irq_affinity_write_proc; - } - - smp_affinity_entry[irq] = entry; - } -#endif -} - -unsigned long prof_cpu_mask = -1; - -void init_irq_proc (void) -{ - struct proc_dir_entry *entry; - int i; - - /* create /proc/irq */ - root_irq_dir = proc_mkdir("irq", 0); - - /* create /proc/irq/prof_cpu_mask */ - entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir); - - if (!entry) - return; - - entry->nlink = 1; - entry->data = (void *)&prof_cpu_mask; - entry->read_proc = prof_cpu_mask_read_proc; - entry->write_proc = prof_cpu_mask_write_proc; - - /* - * Create entries for all existing IRQs. - */ - for (i = 0; i < NR_IRQS; i++) - register_irq_proc(i); -} - diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/ldt.c b/xenolinux-2.4.25-sparse/arch/xeno/kernel/ldt.c deleted file mode 100644 index 6a2bd7a0d9..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/ldt.c +++ /dev/null @@ -1,287 +0,0 @@ -/* - * linux/kernel/ldt.c - * - * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds - * Copyright (C) 1999 Ingo Molnar - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ -static void flush_ldt(void *mm) -{ - if (current->active_mm) - load_LDT(¤t->active_mm->context); -} -#endif - -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) -{ - void *oldldt; - void *newldt; - int oldsize; - - if (mincount <= pc->size) - return 0; - oldsize = pc->size; - mincount = (mincount+511)&(~511); - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount*LDT_ENTRY_SIZE); - else - newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); - - if (!newldt) - return -ENOMEM; - - if (oldsize) - memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); - - oldldt = pc->ldt; - memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); - wmb(); - pc->ldt = newldt; - pc->size = mincount; - if (reload) { - make_pages_readonly( - pc->ldt, - (pc->size*LDT_ENTRY_SIZE)/PAGE_SIZE); - load_LDT(pc); - flush_page_update_queue(); -#ifdef CONFIG_SMP - if (current->mm->cpu_vm_mask != (1< PAGE_SIZE) - vfree(oldldt); - else - kfree(oldldt); - } - return 0; -} - -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) -{ - int err = alloc_ldt(new, old->size, 0); - if (err < 0) { - printk(KERN_WARNING "ldt allocation failed\n"); - new->size = 0; - return err; - } - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); - make_pages_readonly(new->ldt, (new->size*LDT_ENTRY_SIZE)/PAGE_SIZE); - return 0; -} - -/* - * we do not have to muck with descriptors here, that is - * done in switch_mm() as needed. - */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - struct mm_struct * old_mm; - int retval = 0; - - init_MUTEX(&mm->context.sem); - mm->context.size = 0; - old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - down(&old_mm->context.sem); - retval = copy_ldt(&mm->context, &old_mm->context); - up(&old_mm->context.sem); - } - return retval; -} - -/* - * No need to lock the MM as we are the last user - * Do not touch the ldt register, we are already - * in the next thread. - */ -void destroy_context(struct mm_struct *mm) -{ - if (mm->context.size) { - make_pages_writeable( - mm->context.ldt, - (mm->context.size*LDT_ENTRY_SIZE)/PAGE_SIZE); - flush_page_update_queue(); - if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - kfree(mm->context.ldt); - mm->context.size = 0; - } -} - -static int read_ldt(void * ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - struct mm_struct * mm = current->mm; - - if (!mm->context.size) - return 0; - if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) - bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - - down(&mm->context.sem); - size = mm->context.size*LDT_ENTRY_SIZE; - if (size > bytecount) - size = bytecount; - - err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; - up(&mm->context.sem); - if (err < 0) - return err; - if (size != bytecount) { - /* zero-fill the rest */ - clear_user(ptr+size, bytecount-size); - } - return bytecount; -} - - -static int read_default_ldt(void * ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - void *address; - - err = 0; - address = &default_ldt[0]; - size = 5*sizeof(struct desc_struct); - if (size > bytecount) - size = bytecount; - - err = size; - if (copy_to_user(ptr, address, size)) - err = -EFAULT; - - return err; -} - -static int write_ldt(void * ptr, unsigned long bytecount, int oldmode) -{ - struct mm_struct * mm = current->mm; - __u32 entry_1, entry_2, *lp; - unsigned long phys_lp, max_limit; - int error; - struct modify_ldt_ldt_s ldt_info; - - error = -EINVAL; - if (bytecount != sizeof(ldt_info)) - goto out; - error = -EFAULT; - if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) - goto out; - - error = -EINVAL; - if (ldt_info.entry_number >= LDT_ENTRIES) - goto out; - if (ldt_info.contents == 3) { - if (oldmode) - goto out; - if (ldt_info.seg_not_present == 0) - goto out; - } - - /* - * This makes our tests for overlap with Xen space easier. There's no good - * reason to have a user segment starting this high anyway. - */ - if (ldt_info.base_addr >= PAGE_OFFSET) - goto out; - - down(&mm->context.sem); - if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); - if (error < 0) - goto out_unlock; - } - - - lp = (__u32 *)((ldt_info.entry_number<<3) + (char *)mm->context.ldt); - phys_lp = arbitrary_virt_to_phys(lp); - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || - (ldt_info.contents == 0 && - ldt_info.read_exec_only == 1 && - ldt_info.seg_32bit == 0 && - ldt_info.limit_in_pages == 0 && - ldt_info.seg_not_present == 1 && - ldt_info.useable == 0 )) { - entry_1 = 0; - entry_2 = 0; - goto install; - } - } - - max_limit = HYPERVISOR_VIRT_START - ldt_info.base_addr; - if ( ldt_info.limit_in_pages ) - max_limit >>= PAGE_SHIFT; - max_limit--; - if ( (ldt_info.limit & 0xfffff) > (max_limit & 0xfffff) ) - ldt_info.limit = max_limit; - - entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) | - (ldt_info.limit & 0x0ffff); - entry_2 = (ldt_info.base_addr & 0xff000000) | - ((ldt_info.base_addr & 0x00ff0000) >> 16) | - (ldt_info.limit & 0xf0000) | - ((ldt_info.read_exec_only ^ 1) << 9) | - (ldt_info.contents << 10) | - ((ldt_info.seg_not_present ^ 1) << 15) | - (ldt_info.seg_32bit << 22) | - (ldt_info.limit_in_pages << 23) | - 0x7000; - if (!oldmode) - entry_2 |= (ldt_info.useable << 20); - - /* Install the new entry ... */ - install: - error = HYPERVISOR_update_descriptor(phys_lp, entry_1, entry_2); - - out_unlock: - up(&mm->context.sem); - out: - return error; -} - -asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount) -{ - int ret = -ENOSYS; - - switch (func) { - case 0: - ret = read_ldt(ptr, bytecount); - break; - case 1: - ret = write_ldt(ptr, bytecount, 1); - break; - case 2: - ret = read_default_ldt(ptr, bytecount); - break; - case 0x11: - ret = write_ldt(ptr, bytecount, 0); - break; - } - return ret; -} diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-dma.c b/xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-dma.c deleted file mode 100644 index dd8842719e..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-dma.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Dynamic DMA mapping support. - * - * On i386 there is no hardware dynamic DMA address translation, - * so consistent alloc/free are merely page allocation/freeing. - * The rest of the dynamic DMA mapping interface is implemented - * in asm/pci.h. - */ - -#include -#include -#include -#include -#include - -void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, - dma_addr_t *dma_handle) -{ - void *ret; - int gfp = GFP_ATOMIC; - - if (hwdev == NULL || ((u32)hwdev->dma_mask < 0xffffffff)) - gfp |= GFP_DMA; - ret = (void *)__get_free_pages(gfp, get_order(size)); - - if (ret != NULL) { - memset(ret, 0, size); - *dma_handle = virt_to_bus(ret); - } - return ret; -} - -void pci_free_consistent(struct pci_dev *hwdev, size_t size, - void *vaddr, dma_addr_t dma_handle) -{ - free_pages((unsigned long)vaddr, get_order(size)); -} diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-i386.c b/xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-i386.c deleted file mode 100644 index 96dcdde6b3..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-i386.c +++ /dev/null @@ -1,410 +0,0 @@ -/* - * Low-Level PCI Access for i386 machines - * - * Copyright 1993, 1994 Drew Eckhardt - * Visionary Computing - * (Unix and Linux consulting and custom programming) - * Drew@Colorado.EDU - * +1 (303) 786-7975 - * - * Drew's work was sponsored by: - * iX Multiuser Multitasking Magazine - * Hannover, Germany - * hm@ix.de - * - * Copyright 1997--2000 Martin Mares - * - * For more information, please consult the following manuals (look at - * http://www.pcisig.com/ for how to get them): - * - * PCI BIOS Specification - * PCI Local Bus Specification - * PCI to PCI Bridge Specification - * PCI System Design Guide - * - * - * CHANGELOG : - * Jun 17, 1994 : Modified to accommodate the broken pre-PCI BIOS SPECIFICATION - * Revision 2.0 present on 's ASUS mainboard. - * - * Jan 5, 1995 : Modified to probe PCI hardware at boot time by Frederic - * Potter, potter@cao-vlsi.ibp.fr - * - * Jan 10, 1995 : Modified to store the information about configured pci - * devices into a list, which can be accessed via /proc/pci by - * Curtis Varner, cvarner@cs.ucr.edu - * - * Jan 12, 1995 : CPU-PCI bridge optimization support by Frederic Potter. - * Alpha version. Intel & UMC chipset support only. - * - * Apr 16, 1995 : Source merge with the DEC Alpha PCI support. Most of the code - * moved to drivers/pci/pci.c. - * - * Dec 7, 1996 : Added support for direct configuration access of boards - * with Intel compatible access schemes (tsbogend@alpha.franken.de) - * - * Feb 3, 1997 : Set internal functions to static, save/restore flags - * avoid dead locks reading broken PCI BIOS, werner@suse.de - * - * Apr 26, 1997 : Fixed case when there is BIOS32, but not PCI BIOS - * (mj@atrey.karlin.mff.cuni.cz) - * - * May 7, 1997 : Added some missing cli()'s. [mj] - * - * Jun 20, 1997 : Corrected problems in "conf1" type accesses. - * (paubert@iram.es) - * - * Aug 2, 1997 : Split to PCI BIOS handling and direct PCI access parts - * and cleaned it up... Martin Mares - * - * Feb 6, 1998 : No longer using BIOS to find devices and device classes. [mj] - * - * May 1, 1998 : Support for peer host bridges. [mj] - * - * Jun 19, 1998 : Changed to use spinlocks, so that PCI configuration space - * can be accessed from interrupts even on SMP systems. [mj] - * - * August 1998 : Better support for peer host bridges and more paranoid - * checks for direct hardware access. Ugh, this file starts to look as - * a large gallery of common hardware bug workarounds (watch the comments) - * -- the PCI specs themselves are sane, but most implementors should be - * hit hard with \hammer scaled \magstep5. [mj] - * - * Jan 23, 1999 : More improvements to peer host bridge logic. i450NX fixup. [mj] - * - * Feb 8, 1999 : Added UM8886BF I/O address fixup. [mj] - * - * August 1999 : New resource management and configuration access stuff. [mj] - * - * Sep 19, 1999 : Use PCI IRQ routing tables for detection of peer host bridges. - * Based on ideas by Chris Frantz and David Hinds. [mj] - * - * Sep 28, 1999 : Handle unreported/unassigned IRQs. Thanks to Shuu Yamaguchi - * for a lot of patience during testing. [mj] - * - * Oct 8, 1999 : Split to pci-i386.c, pci-pc.c and pci-visws.c. [mj] - */ - -#include -#include -#include -#include -#include -#include - -#include "pci-i386.h" - -void -pcibios_update_resource(struct pci_dev *dev, struct resource *root, - struct resource *res, int resource) -{ - u32 new, check; - int reg; - - new = res->start | (res->flags & PCI_REGION_FLAG_MASK); - if (resource < 6) { - reg = PCI_BASE_ADDRESS_0 + 4*resource; - } else if (resource == PCI_ROM_RESOURCE) { - res->flags |= PCI_ROM_ADDRESS_ENABLE; - new |= PCI_ROM_ADDRESS_ENABLE; - reg = dev->rom_base_reg; - } else { - /* Somebody might have asked allocation of a non-standard resource */ - return; - } - - pci_write_config_dword(dev, reg, new); - pci_read_config_dword(dev, reg, &check); - if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) { - printk(KERN_ERR "PCI: Error while updating region " - "%s/%d (%08x != %08x)\n", dev->slot_name, resource, - new, check); - } -} - -/* - * We need to avoid collisions with `mirrored' VGA ports - * and other strange ISA hardware, so we always want the - * addresses to be allocated in the 0x000-0x0ff region - * modulo 0x400. - * - * Why? Because some silly external IO cards only decode - * the low 10 bits of the IO address. The 0x00-0xff region - * is reserved for motherboard devices that decode all 16 - * bits, so it's ok to allocate at, say, 0x2800-0x28ff, - * but we want to try to avoid allocating at 0x2900-0x2bff - * which might have be mirrored at 0x0100-0x03ff.. - */ -void -pcibios_align_resource(void *data, struct resource *res, - unsigned long size, unsigned long align) -{ - if (res->flags & IORESOURCE_IO) { - unsigned long start = res->start; - - if (start & 0x300) { - start = (start + 0x3ff) & ~0x3ff; - res->start = start; - } - } -} - - -/* - * Handle resources of PCI devices. If the world were perfect, we could - * just allocate all the resource regions and do nothing more. It isn't. - * On the other hand, we cannot just re-allocate all devices, as it would - * require us to know lots of host bridge internals. So we attempt to - * keep as much of the original configuration as possible, but tweak it - * when it's found to be wrong. - * - * Known BIOS problems we have to work around: - * - I/O or memory regions not configured - * - regions configured, but not enabled in the command register - * - bogus I/O addresses above 64K used - * - expansion ROMs left enabled (this may sound harmless, but given - * the fact the PCI specs explicitly allow address decoders to be - * shared between expansion ROMs and other resource regions, it's - * at least dangerous) - * - * Our solution: - * (1) Allocate resources for all buses behind PCI-to-PCI bridges. - * This gives us fixed barriers on where we can allocate. - * (2) Allocate resources for all enabled devices. If there is - * a collision, just mark the resource as unallocated. Also - * disable expansion ROMs during this step. - * (3) Try to allocate resources for disabled devices. If the - * resources were assigned correctly, everything goes well, - * if they weren't, they won't disturb allocation of other - * resources. - * (4) Assign new addresses to resources which were either - * not configured at all or misconfigured. If explicitly - * requested by the user, configure expansion ROM address - * as well. - */ - -static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) -{ - struct list_head *ln; - struct pci_bus *bus; - struct pci_dev *dev; - int idx; - struct resource *r, *pr; - - /* Depth-First Search on bus tree */ - for (ln=bus_list->next; ln != bus_list; ln=ln->next) { - bus = pci_bus_b(ln); - if ((dev = bus->self)) { - printk("alloc bus res: %s\n", dev->slot_name); - for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) { - r = &dev->resource[idx]; - if (!r->start) - { - printk(" res1: 0x%08lx-0x%08lx f=%lx\n", - r->start, r->end, r->flags); - - continue; - } - pr = pci_find_parent_resource(dev, r); - if (!pr || request_resource(pr, r) < 0) - printk(KERN_ERR "PCI: Cannot allocate resource region %d " - "of bridge %s (%p)\n", idx, dev->slot_name, pr); - printk(" res2: %08lx-%08lx f=%lx\n", - r->start, r->end, r->flags); - } - } - pcibios_allocate_bus_resources(&bus->children); - } -} - -static void __init pcibios_allocate_resources(int pass) -{ - struct pci_dev *dev; - int idx, disabled; - u16 command; - struct resource *r, *pr; - - pci_for_each_dev(dev) { - pci_read_config_word(dev, PCI_COMMAND, &command); - for(idx = 0; idx < 6; idx++) { - r = &dev->resource[idx]; - if (r->parent) /* Already allocated */ - continue; - if (!r->start) /* Address not assigned at all */ - continue; - if (r->flags & IORESOURCE_IO) - disabled = !(command & PCI_COMMAND_IO); - else - disabled = !(command & PCI_COMMAND_MEMORY); - if (pass == disabled) { - printk("PCI: Resource %08lx-%08lx (f=%lx, d=%d, p=%d) (%s)\n", - r->start, r->end, r->flags, disabled, pass, dev->slot_name); - pr = pci_find_parent_resource(dev, r); - if (!pr || request_resource(pr, r) < 0) { - printk(KERN_ERR "PCI: Cannot allocate resource region %d" - " of device %s (%p)\n", idx, dev->slot_name, pr); - /* We'll assign a new address later */ - r->end -= r->start; - r->start = 0; - } - } - } - if (!pass) { - r = &dev->resource[PCI_ROM_RESOURCE]; - if (r->flags & PCI_ROM_ADDRESS_ENABLE) { - /* Turn the ROM off, leave the resource region, but keep it unregistered. */ - u32 reg; - printk("PCI: Switching off ROM of %s\n", dev->slot_name); - r->flags &= ~PCI_ROM_ADDRESS_ENABLE; - pci_read_config_dword(dev, dev->rom_base_reg, ®); - pci_write_config_dword(dev, dev->rom_base_reg, reg & ~PCI_ROM_ADDRESS_ENABLE); - } - } - } -} - -static void __init pcibios_assign_resources(void) -{ - struct pci_dev *dev; - int idx; - struct resource *r; - - pci_for_each_dev(dev) { - int class = dev->class >> 8; - - /* Don't touch classless devices and host bridges */ - if (!class || class == PCI_CLASS_BRIDGE_HOST) - continue; - - for(idx=0; idx<6; idx++) { - r = &dev->resource[idx]; - - /* - * Don't touch IDE controllers and I/O ports of video cards! - */ - if ((class == PCI_CLASS_STORAGE_IDE && idx < 4) || - (class == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO))) - continue; - - /* - * We shall assign a new address to this resource, either because - * the BIOS forgot to do so or because we have decided the old - * address was unusable for some reason. - */ - if (!r->start && r->end) - pci_assign_resource(dev, idx); - } - - if (pci_probe & PCI_ASSIGN_ROMS) { - r = &dev->resource[PCI_ROM_RESOURCE]; - r->end -= r->start; - r->start = 0; - if (r->end) - pci_assign_resource(dev, PCI_ROM_RESOURCE); - } - } -} - -void __init pcibios_set_cacheline_size(void) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - - pci_cache_line_size = 32 >> 2; - if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD) - pci_cache_line_size = 64 >> 2; /* K7 & K8 */ - else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL) - pci_cache_line_size = 128 >> 2; /* P4 */ -} - -void __init pcibios_resource_survey(void) -{ - DBG("PCI: Allocating resources\n"); - pcibios_allocate_bus_resources(&pci_root_buses); - pcibios_allocate_resources(0); - pcibios_allocate_resources(1); - pcibios_assign_resources(); -} - -int pcibios_enable_resources(struct pci_dev *dev, int mask) -{ - u16 cmd, old_cmd; - int idx; - struct resource *r; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - old_cmd = cmd; - for(idx=0; idx<6; idx++) { - /* Only set up the requested stuff */ - if (!(mask & (1<resource[idx]; - if (!r->start && r->end) { - printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", dev->slot_name); - return -EINVAL; - } - if (r->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (r->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - if (dev->resource[PCI_ROM_RESOURCE].start) - cmd |= PCI_COMMAND_MEMORY; - if (cmd != old_cmd) { - printk("PCI: Enabling device %s (%04x -> %04x)\n", dev->slot_name, old_cmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - return 0; -} - -/* - * If we set up a device for bus mastering, we need to check the latency - * timer as certain crappy BIOSes forget to set it properly. - */ -unsigned int pcibios_max_latency = 255; - -void pcibios_set_master(struct pci_dev *dev) -{ - u8 lat; - pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); - if (lat < 16) - lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency; - else if (lat > pcibios_max_latency) - lat = pcibios_max_latency; - else - return; - printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", dev->slot_name, lat); - pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); -} - -int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, int write_combine) -{ - unsigned long prot; - - /* I/O space cannot be accessed via normal processor loads and - * stores on this platform. - */ - if (mmap_state == pci_mmap_io) - return -EINVAL; - - /* Leave vm_pgoff as-is, the PCI space address is the physical - * address on this platform. - */ - vma->vm_flags |= (VM_SHM | VM_LOCKED | VM_IO); - - prot = pgprot_val(vma->vm_page_prot); - if (boot_cpu_data.x86 > 3) - prot |= _PAGE_PCD | _PAGE_PWT; - vma->vm_page_prot = __pgprot(prot); - - /* Write-combine setting is ignored, it is changed via the mtrr - * interfaces on this platform. - */ - if (remap_page_range(vma->vm_start, vma->vm_pgoff << PAGE_SHIFT, - vma->vm_end - vma->vm_start, - vma->vm_page_prot)) - return -EAGAIN; - - return 0; -} diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-i386.h b/xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-i386.h deleted file mode 100644 index fe70b10166..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-i386.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Low-Level PCI Access for i386 machines. - * - * (c) 1999 Martin Mares - */ - -#undef DEBUG - -#ifdef DEBUG -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif - -#define PCI_PROBE_BIOS 0x0001 -#define PCI_PROBE_CONF1 0x0002 -#define PCI_PROBE_CONF2 0x0004 -#define PCI_NO_SORT 0x0100 -#define PCI_BIOS_SORT 0x0200 -#define PCI_NO_CHECKS 0x0400 -#define PCI_ASSIGN_ROMS 0x1000 -#define PCI_BIOS_IRQ_SCAN 0x2000 -#define PCI_ASSIGN_ALL_BUSSES 0x4000 - -extern unsigned int pci_probe; - -/* pci-i386.c */ - -extern unsigned int pcibios_max_latency; -extern u8 pci_cache_line_size; - -void pcibios_resource_survey(void); -void pcibios_set_cacheline_size(void); -int pcibios_enable_resources(struct pci_dev *, int); - -/* pci-pc.c */ - -extern int pcibios_last_bus; -extern struct pci_bus *pci_root_bus; -extern struct pci_ops *pci_root_ops; - -/* pci-irq.c */ - -struct irq_info { - u8 bus, devfn; /* Bus, device and function */ - struct { - u8 link; /* IRQ line ID, chipset dependent, 0=not routed */ - u16 bitmap; /* Available IRQs */ - } __attribute__((packed)) irq[4]; - u8 slot; /* Slot number, 0=onboard */ - u8 rfu; -} __attribute__((packed)); - -struct irq_routing_table { - u32 signature; /* PIRQ_SIGNATURE should be here */ - u16 version; /* PIRQ_VERSION */ - u16 size; /* Table size in bytes */ - u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ - u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */ - u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */ - u32 miniport_data; /* Crap */ - u8 rfu[11]; - u8 checksum; /* Modulo 256 checksum must give zero */ - struct irq_info slots[0]; -} __attribute__((packed)); - -extern unsigned int pcibios_irq_mask; - -void pcibios_irq_init(void); -void pcibios_fixup_irqs(void); -void pcibios_enable_irq(struct pci_dev *dev); diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-irq.c b/xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-irq.c deleted file mode 100644 index f530244f6a..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-irq.c +++ /dev/null @@ -1,90 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- - **************************************************************************** - * (C) 2004 - Rolf Neugebauer - Intel Research Cambridge - **************************************************************************** - * - * File: phys_dev.c - * Author: Rolf Neugebauer (rolf.neugebauer@intel.com) - * Date: Mar 2004 - * - * Description: XenoLinux wrappers for PCI interrupt handling. - * very simple since someone else is doing all the hard bits - */ - - -/* - * Low-Level PCI Support for PC -- Routing of Interrupts - * - * (c) 1999--2000 Martin Mares - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "pci-i386.h" - -#include - -unsigned int pcibios_irq_mask = 0xfff8; - -void eisa_set_level_irq(unsigned int irq) -{ - /* dummy */ -} - -void __init pcibios_irq_init(void) -{ - printk("PCI: IRQ init\n"); -} - -void __init pcibios_fixup_irqs(void) -{ - struct pci_dev *dev; - physdev_op_t op; - int ret; - - - printk("PCI: IRQ fixup\n"); - pci_for_each_dev(dev) { - - op.cmd = PHYSDEVOP_FIND_IRQ; - op.u.find_irq.seg = 0; - op.u.find_irq.bus = dev->bus->number; - op.u.find_irq.dev = PCI_SLOT(dev->devfn); - op.u.find_irq.func = PCI_FUNC(dev->devfn); - - if ( (ret = HYPERVISOR_physdev_op(&op)) != 0 ) - { - printk(KERN_ALERT "pci find irq error\n"); - return; - } - - dev->irq = op.u.find_irq.irq; - printk(KERN_INFO "PCI IRQ: [%02x:%02x:%02x] -> %d\n", - dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), dev->irq); - } - return; -} - -void pcibios_penalize_isa_irq(int irq) -{ - /* dummy */ -} - -void pcibios_enable_irq(struct pci_dev *dev) -{ - u8 pin; - - pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); - - if (pin && !dev->irq) { - printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of " - "device %s.\n", 'A' + pin - 1, dev->slot_name); - } -} diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-pc.c b/xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-pc.c deleted file mode 100644 index afe6e4d494..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/pci-pc.c +++ /dev/null @@ -1,364 +0,0 @@ -/* - * Low-Level PCI Support for PC - * - * (c) 1999--2000 Martin Mares - * - * adjusted to use Xen's interface by Rolf Neugebauer - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -#include "pci-i386.h" - -int pcibios_last_bus = -1; -struct pci_bus *pci_root_bus = NULL; -struct pci_ops *pci_root_ops = NULL; - -int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value) = NULL; -int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value) = NULL; - -static int pci_using_acpi_prt = 0; - -/* - * This interrupt-safe spinlock protects all accesses to PCI - * configuration space. - */ -static spinlock_t pci_config_lock = SPIN_LOCK_UNLOCKED; - -unsigned int pci_probe = PCI_PROBE_BIOS; - -/* - * Functions for accessing PCI configuration space with type 1 accesses - */ - -static int pci_confx_read (int seg, int bus, int dev, int fn, int reg, - int len, u32 *value) -{ - int ret; - physdev_op_t op; - - if (bus > 255 || dev > 31 || fn > 7 || reg > 255) - return -EINVAL; - - op.cmd = PHYSDEVOP_CFGREG_READ; - op.u.cfg_read.seg = seg; - op.u.cfg_read.bus = bus; - op.u.cfg_read.dev = dev; - op.u.cfg_read.func = fn; - op.u.cfg_read.reg = reg; - op.u.cfg_read.len = len; - - if ( (ret = HYPERVISOR_physdev_op(&op)) != 0 ) - { - //printk(KERN_ALERT "pci config read error\n"); - return ret; - } - - *value = op.u.cfg_read.value; - - return 0; -} - -static int pci_confx_write (int seg, int bus, int dev, int fn, int reg, - int len, u32 value) -{ - int ret; - physdev_op_t op; - - if ((bus > 255 || dev > 31 || fn > 7 || reg > 255)) - return -EINVAL; - - op.cmd = PHYSDEVOP_CFGREG_WRITE; - op.u.cfg_write.seg = seg; - op.u.cfg_write.bus = bus; - op.u.cfg_write.dev = dev; - op.u.cfg_write.func = fn; - op.u.cfg_write.reg = reg; - op.u.cfg_write.len = len; - op.u.cfg_write.value = value; - - if ( (ret = HYPERVISOR_physdev_op(&op)) != 0 ) - { - //printk(KERN_ALERT "pci config write error\n"); - return ret; - } - return 0; -} - - -static int pci_confx_read_config_byte(struct pci_dev *dev, int where, u8 *value) -{ - int result; - u32 data; - - result = pci_confx_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 1, &data); - - *value = (u8)data; - - return result; -} - -static int pci_confx_read_config_word(struct pci_dev *dev, int where, u16 *value) -{ - int result; - u32 data; - - result = pci_confx_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 2, &data); - - *value = (u16)data; - - return result; -} - -static int pci_confx_read_config_dword(struct pci_dev *dev, int where, u32 *value) -{ - return pci_confx_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 4, value); -} - -static int pci_confx_write_config_byte(struct pci_dev *dev, int where, u8 value) -{ - return pci_confx_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 1, value); -} - -static int pci_confx_write_config_word(struct pci_dev *dev, int where, u16 value) -{ - return pci_confx_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 2, value); -} - -static int pci_confx_write_config_dword(struct pci_dev *dev, int where, u32 value) -{ - return pci_confx_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 4, value); -} - -static struct pci_ops pci_direct_confx = { - pci_confx_read_config_byte, - pci_confx_read_config_word, - pci_confx_read_config_dword, - pci_confx_write_config_byte, - pci_confx_write_config_word, - pci_confx_write_config_dword -}; - - - -static struct pci_ops * __devinit pci_check_xen(void) -{ - unsigned long flags; - - __save_flags(flags); __cli(); - - printk(KERN_INFO "PCI: Using Xen interface\n"); - - __restore_flags(flags); - - return &pci_direct_confx; -} - -struct pci_fixup pcibios_fixups[] = { {0}}; - - -struct irq_routing_table * __devinit pcibios_get_irq_routing_table(void) -{ - return NULL; -} - -int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) -{ - return 0; -} - -/* - * Several buggy motherboards address only 16 devices and mirror - * them to next 16 IDs. We try to detect this `feature' on all - * primary buses (those containing host bridges as they are - * expected to be unique) and remove the ghost devices. - */ - -static void __devinit pcibios_fixup_ghosts(struct pci_bus *b) -{ - struct list_head *ln, *mn; - struct pci_dev *d, *e; - int mirror = PCI_DEVFN(16,0); - int seen_host_bridge = 0; - int i; - - DBG("PCI: Scanning for ghost devices on bus %d\n", b->number); - for (ln=b->devices.next; ln != &b->devices; ln=ln->next) { - d = pci_dev_b(ln); - if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST) - seen_host_bridge++; - for (mn=ln->next; mn != &b->devices; mn=mn->next) { - e = pci_dev_b(mn); - if (e->devfn != d->devfn + mirror || - e->vendor != d->vendor || - e->device != d->device || - e->class != d->class) - continue; - for(i=0; iresource[i].start != d->resource[i].start || - e->resource[i].end != d->resource[i].end || - e->resource[i].flags != d->resource[i].flags) - continue; - break; - } - if (mn == &b->devices) - return; - } - if (!seen_host_bridge) - return; - printk(KERN_WARNING "PCI: Ignoring ghost devices on bus %02x\n", b->number); - - ln = &b->devices; - while (ln->next != &b->devices) { - d = pci_dev_b(ln->next); - if (d->devfn >= mirror) { - list_del(&d->global_list); - list_del(&d->bus_list); - kfree(d); - } else - ln = ln->next; - } -} - -/* - * Discover remaining PCI buses in case there are peer host bridges. - * We use the number of last PCI bus provided by the PCI BIOS. - */ -static void __devinit pcibios_fixup_peer_bridges(void) -{ - int n; - struct pci_bus bus; - struct pci_dev dev; - u16 l; - - if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) - return; - DBG("PCI: Peer bridge fixup\n"); - for (n=0; n <= pcibios_last_bus; n++) { - if (pci_bus_exists(&pci_root_buses, n)) - continue; - bus.number = n; - bus.ops = pci_root_ops; - dev.bus = &bus; - for(dev.devfn=0; dev.devfn<256; dev.devfn += 8) - if (!pci_read_config_word(&dev, PCI_VENDOR_ID, &l) && - l != 0x0000 && l != 0xffff) { - DBG("Found device at %02x:%02x [%04x]\n", n, dev.devfn, l); - printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); - pci_scan_bus(n, pci_root_ops, NULL); - break; - } - } -} - - -/* - * Called after each bus is probed, but before its children - * are examined. - */ - -void __devinit pcibios_fixup_bus(struct pci_bus *b) -{ - pcibios_fixup_ghosts(b); - pci_read_bridge_bases(b); - return; -} - -struct pci_bus * __devinit pcibios_scan_root(int busnum) -{ - struct list_head *list; - struct pci_bus *bus; - - list_for_each(list, &pci_root_buses) { - bus = pci_bus_b(list); - if (bus->number == busnum) { - /* Already scanned */ - return bus; - } - } - - printk("PCI: Probing PCI hardware (bus %02x)\n", busnum); - - return pci_scan_bus(busnum, pci_root_ops, NULL); -} - -void __devinit pcibios_config_init(void) -{ - /* - * Try all known PCI access methods. Note that we support using - * both PCI BIOS and direct access, with a preference for direct. - */ - - pci_root_ops = pci_check_xen(); - pci_config_read = pci_confx_read; - pci_config_write = pci_confx_write; - - return; -} - -void __init pcibios_init(void) -{ - if (!pci_root_ops) - pcibios_config_init(); - if (!pci_root_ops) { - printk(KERN_WARNING "PCI: System does not support PCI\n"); - return; - } - - pcibios_set_cacheline_size(); - - printk(KERN_INFO "PCI: Probing PCI hardware\n"); - - if (!pci_using_acpi_prt) { - pci_root_bus = pcibios_scan_root(0); - pcibios_irq_init(); - pcibios_fixup_peer_bridges(); - pcibios_fixup_irqs(); - } - - pcibios_resource_survey(); -} - -char * __devinit pcibios_setup(char *str) -{ - if (!strcmp(str, "off")) { - pci_probe = 0; - return NULL; - } - return NULL; -} - -unsigned int pcibios_assign_all_busses(void) -{ - return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0; -} - -int pcibios_enable_device(struct pci_dev *dev, int mask) -{ - int err; - - if ((err = pcibios_enable_resources(dev, mask)) < 0) - return err; - - pcibios_enable_irq(dev); - - return 0; -} diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/physirq.c b/xenolinux-2.4.25-sparse/arch/xeno/kernel/physirq.c deleted file mode 100644 index 1f7a8e4fee..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/physirq.c +++ /dev/null @@ -1,172 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- - **************************************************************************** - * (C) 2004 - Rolf Neugebauer - Intel Research Cambridge - **************************************************************************** - * - * File: physirq.c - * Author: Rolf Neugebauer (rolf.neugebauer@intel.com) - * Date: Mar 2004 - * - * Description: guests may receive virtual interrupts directly - * corresponding to physical interrupts. these virtual - * interrupts require special handling provided - * by the virq irq type. - */ - - -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -static void physirq_interrupt(int irq, void *unused, struct pt_regs *ptregs); - -static int setup_event_handler = 0; - -static unsigned int startup_physirq_event(unsigned int irq) -{ - physdev_op_t op; - int err; - - printk("startup_physirq_event %d\n", irq); - - /* - * install a interrupt handler for physirq event when called first time - * we actually are never executing the handler as _EVENT_PHYSIRQ is - * handled specially in hypervisor.c But we need to enable the event etc. - */ - if ( !setup_event_handler ) - { - printk("startup_physirq_event %d: setup event handler\n", irq); - /* set up a event handler to demux virtualised physical interrupts */ - err = request_irq(HYPEREVENT_IRQ(_EVENT_PHYSIRQ), physirq_interrupt, - SA_SAMPLE_RANDOM, "physirq", NULL); - if ( err ) - { - printk(KERN_WARNING "Could not allocate physirq interrupt\n"); - return err; - } - setup_event_handler = 1; - } - - /* - * request the irq from hypervisor - */ - op.cmd = PHYSDEVOP_REQUEST_IRQ; - op.u.request_irq.irq = irq; - if ( (err = HYPERVISOR_physdev_op(&op)) != 0 ) - { - printk(KERN_ALERT "could not get IRQ %d from Xen\n", irq); - return err; - } - return 0; -} -/* - * This is a dummy interrupt handler. - * It should never be called. events for physical interrupts are handled - * differently in hypervisor.c - */ -static void physirq_interrupt(int irq, void *unused, struct pt_regs *ptregs) -{ - printk("XXX This should never be called!"); -} - - -/* - * IRQ is not needed anymore. - */ -static void shutdown_physirq_event(unsigned int irq) -{ - physdev_op_t op; - int err; - - printk("shutdown_phys_irq called."); - - /* - * tell hypervisor - */ - op.cmd = PHYSDEVOP_FREE_IRQ; - op.u.free_irq.irq = irq; - if ( (err = HYPERVISOR_physdev_op(&op)) != 0 ) - { - printk(KERN_ALERT "could not free IRQ %d\n", irq); - return; - } - return; -} - - -static void enable_physirq_event(unsigned int irq) -{ - /* XXX just enable all phys interrupts for now */ - enable_irq(HYPEREVENT_IRQ(_EVENT_PHYSIRQ)); -} - -static void disable_physirq_event(unsigned int irq) -{ - /* XXX just disable all phys interrupts for now */ - disable_irq(HYPEREVENT_IRQ(_EVENT_PHYSIRQ)); -} - -static void ack_physirq_event(unsigned int irq) -{ - /* clear bit */ - if ( irq <= 0 || irq >= 32 ) - { - printk("wrong irq %d\n", irq); - } - - clear_bit(irq, &HYPERVISOR_shared_info->physirq_pend); -} - -static void end_physirq_event(unsigned int irq) -{ - int err; - physdev_op_t op; - - /* call hypervisor */ - op.cmd = PHYSDEVOP_FINISHED_IRQ; - op.u.finished_irq.irq = irq; - if ( (err = HYPERVISOR_physdev_op(&op)) != 0 ) - { - printk(KERN_ALERT "could not finish IRQ %d\n", irq); - return; - } - return; -} - -static struct hw_interrupt_type physirq_irq_type = { - "physical-irq", - startup_physirq_event, - shutdown_physirq_event, - enable_physirq_event, - disable_physirq_event, - ack_physirq_event, - end_physirq_event, - NULL -}; - - - -void __init physirq_init(void) -{ - int i; - - printk("Initialise irq handlers [%d-%d] for physical interrupts.\n", - PHYS_IRQ_BASE, PHYS_IRQ_BASE+NR_PHYS_IRQS-1); - - for ( i = 0; i < NR_PHYS_IRQS; i++ ) - { - irq_desc[i + PHYS_IRQ_BASE].status = IRQ_DISABLED; - irq_desc[i + PHYS_IRQ_BASE].action = 0; - irq_desc[i + PHYS_IRQ_BASE].depth = 1; - irq_desc[i + PHYS_IRQ_BASE].handler = &physirq_irq_type; - } -} diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/process.c b/xenolinux-2.4.25-sparse/arch/xeno/kernel/process.c deleted file mode 100644 index 640179661b..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/process.c +++ /dev/null @@ -1,474 +0,0 @@ -/* - * linux/arch/i386/kernel/process.c - * - * Copyright (C) 1995 Linus Torvalds - * - * Pentium III FXSR, SSE support - * Gareth Hughes , May 2000 - */ - -/* - * This file handles the architecture-dependent parts of process handling.. - */ - -#define __KERNEL_SYSCALLS__ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); - -int hlt_counter; - -/* - * Powermanagement idle function, if any.. - */ -void (*pm_idle)(void); - -/* - * Power off function, if any - */ -void (*pm_power_off)(void); - -void disable_hlt(void) -{ - hlt_counter++; -} - -void enable_hlt(void) -{ - hlt_counter--; -} - -/* - * The idle thread. There's no useful work to be - * done, so just try to conserve power and have a - * low exit latency (ie sit in a loop waiting for - * somebody to say that they'd like to reschedule) - */ -void cpu_idle (void) -{ - extern int set_timeout_timer(void); - - /* Endless idle loop with no priority at all. */ - init_idle(); - current->nice = 20; - current->counter = -100; - - for ( ; ; ) - { - while ( !current->need_resched ) - { - __cli(); - if ( current->need_resched ) - { - /* The race-free check for events failed. */ - __sti(); - break; - } - else if ( set_timeout_timer() == 0 ) - { - /* NB. Blocking reenable events in a race-free manner. */ - HYPERVISOR_block(); - } - else - { - /* No race here: yielding will get us the CPU again anyway. */ - __sti(); - HYPERVISOR_yield(); - } - } - schedule(); - check_pgt_cache(); - } -} - -void machine_restart(char * __unused) -{ - HYPERVISOR_exit(); -} - -void machine_halt(void) -{ - HYPERVISOR_exit(); -} - -void machine_power_off(void) -{ - HYPERVISOR_exit(); -} - -extern void show_trace(unsigned long* esp); - -void show_regs(struct pt_regs * regs) -{ - printk("\n"); - printk("Pid: %d, comm: %20s\n", current->pid, current->comm); - printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs,regs->eip, smp_processor_id()); - if (regs->xcs & 2) - printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); - printk(" EFLAGS: %08lx %s\n",regs->eflags, print_tainted()); - printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", - regs->eax,regs->ebx,regs->ecx,regs->edx); - printk("ESI: %08lx EDI: %08lx EBP: %08lx", - regs->esi, regs->edi, regs->ebp); - printk(" DS: %04x ES: %04x\n", - 0xffff & regs->xds,0xffff & regs->xes); - - show_trace(®s->esp); -} - - -/* - * Create a kernel thread - */ -int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) -{ - long retval, d0; - - __asm__ __volatile__( - "movl %%esp,%%esi\n\t" - "int $0x80\n\t" /* Linux/i386 system call */ - "cmpl %%esp,%%esi\n\t" /* child or parent? */ - "je 1f\n\t" /* parent - jump */ - /* Load the argument into eax, and push it. That way, it does - * not matter whether the called function is compiled with - * -mregparm or not. */ - "movl %4,%%eax\n\t" - "pushl %%eax\n\t" - "call *%5\n\t" /* call fn */ - "movl %3,%0\n\t" /* exit */ - "int $0x80\n" - "1:\t" - :"=&a" (retval), "=&S" (d0) - :"0" (__NR_clone), "i" (__NR_exit), - "r" (arg), "r" (fn), - "b" (flags | CLONE_VM) - : "memory"); - - return retval; -} - -/* - * Free current thread data structures etc.. - */ -void exit_thread(void) -{ - /* nothing to do ... */ -} - -void flush_thread(void) -{ - struct task_struct *tsk = current; - - memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); - - /* - * Forget coprocessor state.. - */ - clear_fpu(tsk); - tsk->used_math = 0; -} - -void release_thread(struct task_struct *dead_task) -{ - if (dead_task->mm) { - // temporary debugging check - if (dead_task->mm->context.size) { - printk("WARNING: dead process %8s still has LDT? <%p/%p>\n", - dead_task->comm, - dead_task->mm->context.ldt, - dead_task->mm->context.size); - BUG(); - } - } - //release_x86_irqs(dead_task); -} - - -/* - * Save a segment. - */ -#define savesegment(seg,value) \ - asm volatile("movl %%" #seg ",%0":"=m" (*(int *)&(value))) - -int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, - unsigned long unused, - struct task_struct * p, struct pt_regs * regs) -{ - struct pt_regs * childregs; - unsigned long eflags; - - childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1; - struct_cpy(childregs, regs); - childregs->eax = 0; - childregs->esp = esp; - - p->thread.esp = (unsigned long) childregs; - p->thread.esp0 = (unsigned long) (childregs+1); - - p->thread.eip = (unsigned long) ret_from_fork; - - savesegment(fs,p->thread.fs); - savesegment(gs,p->thread.gs); - - unlazy_fpu(current); - struct_cpy(&p->thread.i387, ¤t->thread.i387); - - - __asm__ __volatile__ ( "pushfl; popl %0" : "=r" (eflags) : ); - p->thread.io_pl = (eflags >> 12) & 3; - - return 0; -} - -/* - * fill in the user structure for a core dump.. - */ -void dump_thread(struct pt_regs * regs, struct user * dump) -{ - int i; - -/* changed the size calculations - should hopefully work better. lbt */ - dump->magic = CMAGIC; - dump->start_code = 0; - dump->start_stack = regs->esp & ~(PAGE_SIZE - 1); - dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; - dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; - dump->u_dsize -= dump->u_tsize; - dump->u_ssize = 0; - for (i = 0; i < 8; i++) - dump->u_debugreg[i] = current->thread.debugreg[i]; - - if (dump->start_stack < TASK_SIZE) - dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; - - dump->regs.ebx = regs->ebx; - dump->regs.ecx = regs->ecx; - dump->regs.edx = regs->edx; - dump->regs.esi = regs->esi; - dump->regs.edi = regs->edi; - dump->regs.ebp = regs->ebp; - dump->regs.eax = regs->eax; - dump->regs.ds = regs->xds; - dump->regs.es = regs->xes; - savesegment(fs,dump->regs.fs); - savesegment(gs,dump->regs.gs); - dump->regs.orig_eax = regs->orig_eax; - dump->regs.eip = regs->eip; - dump->regs.cs = regs->xcs; - dump->regs.eflags = regs->eflags; - dump->regs.esp = regs->esp; - dump->regs.ss = regs->xss; - - dump->u_fpvalid = dump_fpu (regs, &dump->i387); -} - -/* - * switch_to(x,yn) should switch tasks from x to y. - * - * We fsave/fwait so that an exception goes off at the right time - * (as a call from the fsave or fwait in effect) rather than to - * the wrong process. Lazy FP saving no longer makes any sense - * with modern CPU's, and this simplifies a lot of things (SMP - * and UP become the same). - * - * NOTE! We used to use the x86 hardware context switching. The - * reason for not using it any more becomes apparent when you - * try to recover gracefully from saved state that is no longer - * valid (stale segment register values in particular). With the - * hardware task-switch, there is no way to fix up bad state in - * a reasonable manner. - * - * The fact that Intel documents the hardware task-switching to - * be slow is a fairly red herring - this code is not noticeably - * faster. However, there _is_ some room for improvement here, - * so the performance issues may eventually be a valid point. - * More important, however, is the fact that this allows us much - * more flexibility. - */ -void __switch_to(struct task_struct *prev_p, struct task_struct *next_p) -{ - struct thread_struct *next = &next_p->thread; - - __cli(); - - /* - * We clobber FS and GS here so that we avoid a GPF when restoring previous - * task's FS/GS values in Xen when the LDT is switched. If we don't do this - * then we can end up erroneously re-flushing the page-update queue when - * we 'execute_multicall_list'. - */ - __asm__ __volatile__ ( - "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs" : : : "eax" ); - - MULTICALL_flush_page_update_queue(); - - /* - * This is basically 'unlazy_fpu', except that we queue a multicall to - * indicate FPU task switch, rather than synchronously trapping to Xen. - */ - if ( prev_p->flags & PF_USEDFPU ) - { - if ( cpu_has_fxsr ) - asm volatile( "fxsave %0 ; fnclex" - : "=m" (prev_p->thread.i387.fxsave) ); - else - asm volatile( "fnsave %0 ; fwait" - : "=m" (prev_p->thread.i387.fsave) ); - prev_p->flags &= ~PF_USEDFPU; - queue_multicall0(__HYPERVISOR_fpu_taskswitch); - } - - queue_multicall2(__HYPERVISOR_stack_switch, __KERNEL_DS, next->esp0); - if ( start_info.flags & SIF_PRIVILEGED ) - { - dom0_op_t op; - op.cmd = DOM0_IOPL; - op.u.iopl.domain = DOMID_SELF; - op.u.iopl.iopl = next->io_pl; - queue_multicall1(__HYPERVISOR_dom0_op, (unsigned long)&op); - } - - /* EXECUTE ALL TASK SWITCH XEN SYSCALLS AT THIS POINT. */ - execute_multicall_list(); - __sti(); - - /* - * Restore %fs and %gs. - */ - loadsegment(fs, next->fs); - loadsegment(gs, next->gs); - - /* - * Now maybe reload the debug registers - */ - if ( next->debugreg[7] != 0 ) - { - HYPERVISOR_set_debugreg(0, next->debugreg[0]); - HYPERVISOR_set_debugreg(1, next->debugreg[1]); - HYPERVISOR_set_debugreg(2, next->debugreg[2]); - HYPERVISOR_set_debugreg(3, next->debugreg[3]); - /* no 4 and 5 */ - HYPERVISOR_set_debugreg(6, next->debugreg[6]); - HYPERVISOR_set_debugreg(7, next->debugreg[7]); - } -} - -asmlinkage int sys_fork(struct pt_regs regs) -{ - return do_fork(SIGCHLD, regs.esp, ®s, 0); -} - -asmlinkage int sys_clone(struct pt_regs regs) -{ - unsigned long clone_flags; - unsigned long newsp; - - clone_flags = regs.ebx; - newsp = regs.ecx; - if (!newsp) - newsp = regs.esp; - return do_fork(clone_flags, newsp, ®s, 0); -} - -/* - * This is trivial, and on the face of it looks like it - * could equally well be done in user mode. - * - * Not so, for quite unobvious reasons - register pressure. - * In user mode vfork() cannot have a stack frame, and if - * done by calling the "clone()" system call directly, you - * do not have enough call-clobbered registers to hold all - * the information you need. - */ -asmlinkage int sys_vfork(struct pt_regs regs) -{ - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0); -} - -/* - * sys_execve() executes a new program. - */ -asmlinkage int sys_execve(struct pt_regs regs) -{ - int error; - char * filename; - - filename = getname((char *) regs.ebx); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - goto out; - error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, ®s); - if (error == 0) - current->ptrace &= ~PT_DTRACE; - putname(filename); - out: - return error; -} - -/* - * These bracket the sleeping functions.. - */ -extern void scheduling_functions_start_here(void); -extern void scheduling_functions_end_here(void); -#define first_sched ((unsigned long) scheduling_functions_start_here) -#define last_sched ((unsigned long) scheduling_functions_end_here) - -unsigned long get_wchan(struct task_struct *p) -{ - unsigned long ebp, esp, eip; - unsigned long stack_page; - int count = 0; - if (!p || p == current || p->state == TASK_RUNNING) - return 0; - stack_page = (unsigned long)p; - esp = p->thread.esp; - if (!stack_page || esp < stack_page || esp > 8188+stack_page) - return 0; - /* include/asm-i386/system.h:switch_to() pushes ebp last. */ - ebp = *(unsigned long *) esp; - do { - if (ebp < stack_page || ebp > 8184+stack_page) - return 0; - eip = *(unsigned long *) (ebp+4); - if (eip < first_sched || eip >= last_sched) - return eip; - ebp = *(unsigned long *) ebp; - } while (count++ < 16); - return 0; -} -#undef last_sched -#undef first_sched diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/setup.c b/xenolinux-2.4.25-sparse/arch/xeno/kernel/setup.c deleted file mode 100644 index c593bddec7..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/setup.c +++ /dev/null @@ -1,1257 +0,0 @@ -/* - * linux/arch/i386/kernel/setup.c - * - * Copyright (C) 1995 Linus Torvalds - */ - -/* - * This file handles the architecture-dependent parts of initialization - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_BLK_DEV_RAM -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* dev_(de)activate */ - -/* - * Point at the empty zero page to start with. We map the real shared_info - * page as soon as fixmap is up and running. - */ -shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; - -unsigned long *phys_to_machine_mapping; - -/* - * Machine setup.. - */ - -char ignore_irq13; /* set if exception 16 works */ -struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; - -unsigned long mmu_cr4_features; -//EXPORT_SYMBOL(mmu_cr4_features); - -unsigned char * vgacon_mmap; - -/* - * Bus types .. - */ -#ifdef CONFIG_EISA -int EISA_bus; -#endif -int MCA_bus; - -/* for MCA, but anyone else can use it if they want */ -unsigned int machine_id; -unsigned int machine_submodel_id; -unsigned int BIOS_revision; -unsigned int mca_pentium_flag; - -/* For PCI or other memory-mapped resources */ -unsigned long pci_mem_start = 0x10000000; - -/* - * Setup options - */ -struct drive_info_struct { char dummy[32]; } drive_info; -struct screen_info screen_info; -struct apm_info apm_info; -struct sys_desc_table_struct { - unsigned short length; - unsigned char table[0]; -}; - -unsigned char aux_device_present; - -extern int root_mountflags; -extern char _text, _etext, _edata, _end; - -int enable_acpi_smp_table; - -/* Raw start-of-day parameters from the hypervisor. */ -union start_info_union start_info_union; - -#define COMMAND_LINE_SIZE 256 -static char command_line[COMMAND_LINE_SIZE]; -char saved_command_line[COMMAND_LINE_SIZE]; - -static void __init parse_mem_cmdline (char ** cmdline_p) -{ - char c = ' ', *to = command_line, *from = saved_command_line; - int len = 0; - - /* Save unparsed command line copy for /proc/cmdline */ - memcpy(saved_command_line, start_info.cmd_line, COMMAND_LINE_SIZE); - saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; - - for (;;) { - /* - * "mem=nopentium" disables the 4MB page tables. - * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM - * to , overriding the bios size. - * "mem=XXX[KkmM]@XXX[KkmM]" defines a memory region from - * to +, overriding the bios size. - */ - if (c == ' ' && !memcmp(from, "mem=", 4)) { - if (to != command_line) - to--; - if (!memcmp(from+4, "nopentium", 9)) { - from += 9+4; - } else if (!memcmp(from+4, "exactmap", 8)) { - from += 8+4; - } else { - (void)memparse(from+4, &from); - if (*from == '@') - (void)memparse(from+1, &from); - } - } - - c = *(from++); - if (!c) - break; - if (COMMAND_LINE_SIZE <= ++len) - break; - *(to++) = c; - } - *to = '\0'; - *cmdline_p = command_line; -} - -void __init setup_arch(char **cmdline_p) -{ - unsigned long bootmap_size, start_pfn, max_low_pfn; - unsigned long i; - - extern void hypervisor_callback(void); - extern void failsafe_callback(void); - - extern unsigned long cpu0_pte_quicklist[]; - extern unsigned long cpu0_pgd_quicklist[]; - - HYPERVISOR_set_callbacks( - __KERNEL_CS, (unsigned long)hypervisor_callback, - __KERNEL_CS, (unsigned long)failsafe_callback); - - boot_cpu_data.pgd_quick = cpu0_pgd_quicklist; - boot_cpu_data.pte_quick = cpu0_pte_quicklist; - - ROOT_DEV = MKDEV(RAMDISK_MAJOR,0); - memset(&drive_info, 0, sizeof(drive_info)); - memset(&screen_info, 0, sizeof(screen_info)); - - /* This is drawn from a dump from vgacon:startup in standard Linux. */ - screen_info.orig_video_mode = 3; - screen_info.orig_video_isVGA = 1; - screen_info.orig_video_lines = 25; - screen_info.orig_video_cols = 80; - screen_info.orig_video_ega_bx = 3; - screen_info.orig_video_points = 16; - - memset(&apm_info.bios, 0, sizeof(apm_info.bios)); - aux_device_present = 0; -#ifdef CONFIG_BLK_DEV_RAM - rd_image_start = 0; - rd_prompt = 0; - rd_doload = 0; -#endif - - root_mountflags &= ~MS_RDONLY; - init_mm.start_code = (unsigned long) &_text; - init_mm.end_code = (unsigned long) &_etext; - init_mm.end_data = (unsigned long) &_edata; - init_mm.brk = (unsigned long) &_end; - - parse_mem_cmdline(cmdline_p); - -#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) -#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) -#define PFN_PHYS(x) ((x) << PAGE_SHIFT) - -/* - * 128MB for vmalloc and initrd - */ -#define VMALLOC_RESERVE (unsigned long)(128 << 20) -#define MAXMEM (unsigned long)(HYPERVISOR_VIRT_START-PAGE_OFFSET-VMALLOC_RESERVE) -#define MAXMEM_PFN PFN_DOWN(MAXMEM) -#define MAX_NONPAE_PFN (1 << 20) - - /* - * partially used pages are not usable - thus - * we are rounding upwards: - */ -#ifdef CONFIG_BLK_DEV_INITRD - if ( start_info.mod_start ) - start_pfn = PFN_UP(__pa(start_info.mod_start + start_info.mod_len)); - else -#endif - start_pfn = PFN_UP(__pa(&_end)); - max_pfn = start_info.nr_pages; - - /* - * Determine low and high memory ranges: - */ - max_low_pfn = max_pfn; - if (max_low_pfn > MAXMEM_PFN) { - max_low_pfn = MAXMEM_PFN; -#ifndef CONFIG_HIGHMEM - /* Maximum memory usable is what is directly addressable */ - printk(KERN_WARNING "Warning only %ldMB will be used.\n", - MAXMEM>>20); - if (max_pfn > MAX_NONPAE_PFN) - printk(KERN_WARNING "Use a PAE enabled kernel.\n"); - else - printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); -#else /* !CONFIG_HIGHMEM */ -#ifndef CONFIG_X86_PAE - if (max_pfn > MAX_NONPAE_PFN) { - max_pfn = MAX_NONPAE_PFN; - printk(KERN_WARNING "Warning only 4GB will be used.\n"); - printk(KERN_WARNING "Use a PAE enabled kernel.\n"); - } -#endif /* !CONFIG_X86_PAE */ -#endif /* !CONFIG_HIGHMEM */ - } - -#ifdef CONFIG_HIGHMEM - highstart_pfn = highend_pfn = max_pfn; - if (max_pfn > MAXMEM_PFN) { - highstart_pfn = MAXMEM_PFN; - printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", - pages_to_mb(highend_pfn - highstart_pfn)); - } -#endif - - /* - * Initialize the boot-time allocator, and free up all RAM. - * Then reserve space for OS image, and the bootmem bitmap. - */ - bootmap_size = init_bootmem(start_pfn, max_low_pfn); - free_bootmem(0, PFN_PHYS(max_low_pfn)); - reserve_bootmem(0, PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1); - - /* Now reserve space for the hypervisor-provided page tables. */ - { - unsigned long *pgd = (unsigned long *)start_info.pt_base; - unsigned long pte; - int i; - reserve_bootmem(__pa(pgd), PAGE_SIZE); - for ( i = 0; i < (HYPERVISOR_VIRT_START>>22); i++ ) - { - unsigned long pgde = *pgd++; - if ( !(pgde & 1) ) continue; - pte = machine_to_phys(pgde & PAGE_MASK); - reserve_bootmem(pte, PAGE_SIZE); - } - } - cur_pgd = init_mm.pgd = (pgd_t *)start_info.pt_base; - - /* Now initialise the physical->machine mapping table. */ - phys_to_machine_mapping = alloc_bootmem(max_pfn * sizeof(unsigned long)); - for ( i = 0; i < max_pfn; i++ ) - { - unsigned long pgde, *ppte; - unsigned long pfn = i + (PAGE_OFFSET >> PAGE_SHIFT); - pgde = *((unsigned long *)start_info.pt_base + (pfn >> 10)); - ppte = (unsigned long *)machine_to_phys(pgde & PAGE_MASK) + (pfn&1023); - phys_to_machine_mapping[i] = - (*(unsigned long *)__va(ppte)) >> PAGE_SHIFT; - } - -#ifdef CONFIG_BLK_DEV_INITRD - if (start_info.mod_start) { - if ((__pa(start_info.mod_start) + start_info.mod_len) <= - (max_low_pfn << PAGE_SHIFT)) { - initrd_start = start_info.mod_start; - initrd_end = initrd_start + start_info.mod_len; - initrd_below_start_ok = 1; - } - else { - printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - __pa(start_info.mod_start) + start_info.mod_len, - max_low_pfn << PAGE_SHIFT); - initrd_start = 0; - } - } -#endif - - paging_init(); - - /* We are privileged guest os - should have IO privileges. */ - if ( start_info.flags & SIF_PRIVILEGED ) - { - dom0_op_t op; - op.cmd = DOM0_IOPL; - op.u.iopl.domain = DOMID_SELF; - op.u.iopl.iopl = 1; - if( HYPERVISOR_dom0_op(&op) != 0 ) - panic("Unable to obtain IOPL, despite being SIF_PRIVILEGED"); - current->thread.io_pl = 1; - } - - if (start_info.flags & SIF_INITDOMAIN ) - { - if( !(start_info.flags & SIF_PRIVILEGED) ) - panic("Xen granted us console access but not privileged status"); - -#if defined(CONFIG_VT) -#if defined(CONFIG_VGA_CONSOLE) - conswitchp = &vga_con; -#elif defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; -#endif -#endif - } -} - -static int cachesize_override __initdata = -1; -static int __init cachesize_setup(char *str) -{ - get_option (&str, &cachesize_override); - return 1; -} -__setup("cachesize=", cachesize_setup); - - -static int __init get_model_name(struct cpuinfo_x86 *c) -{ - unsigned int *v; - char *p, *q; - - if (cpuid_eax(0x80000000) < 0x80000004) - return 0; - - v = (unsigned int *) c->x86_model_id; - cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); - cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); - cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); - c->x86_model_id[48] = 0; - - /* Intel chips right-justify this string for some dumb reason; - undo that brain damage */ - p = q = &c->x86_model_id[0]; - while ( *p == ' ' ) - p++; - if ( p != q ) { - while ( *p ) - *q++ = *p++; - while ( q <= &c->x86_model_id[48] ) - *q++ = '\0'; /* Zero-pad the rest */ - } - - return 1; -} - - -static void __init display_cacheinfo(struct cpuinfo_x86 *c) -{ - unsigned int n, dummy, ecx, edx, l2size; - - n = cpuid_eax(0x80000000); - - if (n >= 0x80000005) { - cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); - c->x86_cache_size=(ecx>>24)+(edx>>24); - } - - if (n < 0x80000006) /* Some chips just has a large L1. */ - return; - - ecx = cpuid_ecx(0x80000006); - l2size = ecx >> 16; - - /* AMD errata T13 (order #21922) */ - if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { - if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ - l2size = 64; - if (c->x86_model == 4 && - (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */ - l2size = 256; - } - - /* Intel PIII Tualatin. This comes in two flavours. - * One has 256kb of cache, the other 512. We have no way - * to determine which, so we use a boottime override - * for the 512kb model, and assume 256 otherwise. - */ - if ((c->x86_vendor == X86_VENDOR_INTEL) && (c->x86 == 6) && - (c->x86_model == 11) && (l2size == 0)) - l2size = 256; - - if (c->x86_vendor == X86_VENDOR_CENTAUR) { - /* VIA C3 CPUs (670-68F) need further shifting. */ - if ((c->x86 == 6) && - ((c->x86_model == 7) || (c->x86_model == 8))) { - l2size >>= 8; - } - - /* VIA also screwed up Nehemiah stepping 1, and made - it return '65KB' instead of '64KB' - - Note, it seems this may only be in engineering samples. */ - if ((c->x86==6) && (c->x86_model==9) && - (c->x86_mask==1) && (l2size==65)) - l2size -= 1; - } - - /* Allow user to override all this if necessary. */ - if (cachesize_override != -1) - l2size = cachesize_override; - - if ( l2size == 0 ) - return; /* Again, no L2 cache is possible */ - - c->x86_cache_size = l2size; - - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); -} - -static void __init init_c3(struct cpuinfo_x86 *c) -{ - /* Test for Centaur Extended Feature Flags presence */ - if (cpuid_eax(0xC0000000) >= 0xC0000001) { - /* store Centaur Extended Feature Flags as - * word 5 of the CPU capability bit array - */ - c->x86_capability[5] = cpuid_edx(0xC0000001); - } - - switch (c->x86_model) { - case 9: /* Nehemiah */ - default: - get_model_name(c); - display_cacheinfo(c); - break; - } -} - -static void __init init_centaur(struct cpuinfo_x86 *c) -{ - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*3231) anyway */ - clear_bit(0*32+31, &c->x86_capability); - - switch (c->x86) { - case 6: - init_c3(c); - break; - default: - panic("Unsupported Centaur CPU (%i)\n", c->x86); - } -} - -static int __init init_amd(struct cpuinfo_x86 *c) -{ - int r; - - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_bit(0*32+31, &c->x86_capability); - - r = get_model_name(c); - - switch(c->x86) - { - case 5: /* We don't like AMD K6 */ - panic("Unsupported AMD processor\n"); - case 6: /* An Athlon/Duron. We can trust the BIOS probably */ - break; - } - - display_cacheinfo(c); - return r; -} - - -static void __init init_intel(struct cpuinfo_x86 *c) -{ - char *p = NULL; - unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ - - if (c->cpuid_level > 1) { - /* supports eax=2 call */ - int i, j, n; - int regs[4]; - unsigned char *dp = (unsigned char *)regs; - - /* Number of times to iterate */ - n = cpuid_eax(2) & 0xFF; - - for ( i = 0 ; i < n ; i++ ) { - cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); - - /* If bit 31 is set, this is an unknown format */ - for ( j = 0 ; j < 3 ; j++ ) { - if ( regs[j] < 0 ) regs[j] = 0; - } - - /* Byte 0 is level count, not a descriptor */ - for ( j = 1 ; j < 16 ; j++ ) { - unsigned char des = dp[j]; - unsigned char dl, dh; - unsigned int cs; - - dh = des >> 4; - dl = des & 0x0F; - - /* Black magic... */ - - switch ( dh ) - { - case 0: - switch ( dl ) { - case 6: - /* L1 I cache */ - l1i += 8; - break; - case 8: - /* L1 I cache */ - l1i += 16; - break; - case 10: - /* L1 D cache */ - l1d += 8; - break; - case 12: - /* L1 D cache */ - l1d += 16; - break; - default:; - /* TLB, or unknown */ - } - break; - case 2: - if ( dl ) { - /* L3 cache */ - cs = (dl-1) << 9; - l3 += cs; - } - break; - case 4: - if ( c->x86 > 6 && dl ) { - /* P4 family */ - /* L3 cache */ - cs = 128 << (dl-1); - l3 += cs; - break; - } - /* else same as 8 - fall through */ - case 8: - if ( dl ) { - /* L2 cache */ - cs = 128 << (dl-1); - l2 += cs; - } - break; - case 6: - if (dl > 5) { - /* L1 D cache */ - cs = 8<<(dl-6); - l1d += cs; - } - break; - case 7: - if ( dl >= 8 ) - { - /* L2 cache */ - cs = 64<<(dl-8); - l2 += cs; - } else { - /* L0 I cache, count as L1 */ - cs = dl ? (16 << (dl-1)) : 12; - l1i += cs; - } - break; - default: - /* TLB, or something else we don't know about */ - break; - } - } - } - if ( l1i || l1d ) - printk(KERN_INFO "CPU: L1 I cache: %dK, L1 D cache: %dK\n", - l1i, l1d); - if ( l2 ) - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); - if ( l3 ) - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); - - /* - * This assumes the L3 cache is shared; it typically lives in - * the northbridge. The L1 caches are included by the L2 - * cache, and so should not be included for the purpose of - * SMP switching weights. - */ - c->x86_cache_size = l2 ? l2 : (l1i+l1d); - } - - /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */ - if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 ) - clear_bit(X86_FEATURE_SEP, &c->x86_capability); - - /* Names for the Pentium II/Celeron processors - detectable only by also checking the cache size. - Dixon is NOT a Celeron. */ - if (c->x86 == 6) { - switch (c->x86_model) { - case 5: - if (l2 == 0) - p = "Celeron (Covington)"; - if (l2 == 256) - p = "Mobile Pentium II (Dixon)"; - break; - - case 6: - if (l2 == 128) - p = "Celeron (Mendocino)"; - break; - - case 8: - if (l2 == 128) - p = "Celeron (Coppermine)"; - break; - } - } - - if ( p ) - strcpy(c->x86_model_id, p); -} - -void __init get_cpu_vendor(struct cpuinfo_x86 *c) -{ - char *v = c->x86_vendor_id; - - if (!strcmp(v, "GenuineIntel")) - c->x86_vendor = X86_VENDOR_INTEL; - else if (!strcmp(v, "AuthenticAMD")) - c->x86_vendor = X86_VENDOR_AMD; - else if (!strcmp(v, "CentaurHauls")) - c->x86_vendor = X86_VENDOR_CENTAUR; - else - c->x86_vendor = X86_VENDOR_UNKNOWN; -} - -struct cpu_model_info { - int vendor; - int family; - char *model_names[16]; -}; - -/* Naming convention should be: [()] */ -/* This table only is used unless init_() below doesn't set it; */ -/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ -static struct cpu_model_info cpu_models[] __initdata = { - { X86_VENDOR_INTEL, 6, - { "Pentium Pro A-step", "Pentium Pro", NULL, "Pentium II (Klamath)", - NULL, "Pentium II (Deschutes)", "Mobile Pentium II", - "Pentium III (Katmai)", "Pentium III (Coppermine)", NULL, - "Pentium III (Cascades)", NULL, NULL, NULL, NULL }}, - { X86_VENDOR_AMD, 6, /* Is this this really necessary?? */ - { "Athlon", "Athlon", - "Athlon", NULL, "Athlon", NULL, - NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL }} -}; - -/* Look up CPU names by table lookup. */ -static char __init *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info = cpu_models; - int i; - - if ( c->x86_model >= 16 ) - return NULL; /* Range check */ - - for ( i = 0 ; i < sizeof(cpu_models)/sizeof(struct cpu_model_info) ; i++ ) { - if ( info->vendor == c->x86_vendor && - info->family == c->x86 ) { - return info->model_names[c->x86_model]; - } - info++; - } - return NULL; /* Not found */ -} - - - -/* Standard macro to see if a specific flag is changeable */ -static inline int flag_is_changeable_p(u32 flag) -{ - u32 f1, f2; - - asm("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" - : "=&r" (f1), "=&r" (f2) - : "ir" (flag)); - - return ((f1^f2) & flag) != 0; -} - - -/* Probe for the CPUID instruction */ -static int __init have_cpuid_p(void) -{ - return flag_is_changeable_p(X86_EFLAGS_ID); -} - - - -#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) -unsigned char eddnr; -struct edd_info edd[EDDMAXNR]; -unsigned int edd_disk80_sig; -/** - * copy_edd() - Copy the BIOS EDD information - * from empty_zero_page into a safe place. - * - */ -static inline void copy_edd(void) -{ - eddnr = EDD_NR; - memcpy(edd, EDD_BUF, sizeof(edd)); - edd_disk80_sig = DISK80_SIGNATURE_BUFFER; -} -#else -static inline void copy_edd(void) {} -#endif - -/* - * This does the hard work of actually picking apart the CPU stuff... - */ -void __init identify_cpu(struct cpuinfo_x86 *c) -{ - int junk, i; - u32 xlvl, tfms; - - c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->cpuid_level = -1; /* CPUID not detected */ - c->x86_model = c->x86_mask = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - c->x86_model_id[0] = '\0'; /* Unset */ - memset(&c->x86_capability, 0, sizeof c->x86_capability); - c->hard_math = 1; - - if ( !have_cpuid_p() ) { - panic("Processor must support CPUID\n"); - } else { - /* CPU does have CPUID */ - - /* Get vendor name */ - cpuid(0x00000000, &c->cpuid_level, - (int *)&c->x86_vendor_id[0], - (int *)&c->x86_vendor_id[8], - (int *)&c->x86_vendor_id[4]); - - get_cpu_vendor(c); - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - - /* Intel-defined flags: level 0x00000001 */ - if ( c->cpuid_level >= 0x00000001 ) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &junk, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - c->x86 = (tfms >> 8) & 15; - c->x86_model = (tfms >> 4) & 15; - if (c->x86 == 0xf) { - c->x86 += (tfms >> 20) & 0xff; - c->x86_model += ((tfms >> 16) & 0xF) << 4; - } - c->x86_mask = tfms & 15; - } else { - /* Have CPUID level 0 only - unheard of */ - c->x86 = 4; - } - - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - if ( (xlvl & 0xffff0000) == 0x80000000 ) { - if ( xlvl >= 0x80000001 ) - c->x86_capability[1] = cpuid_edx(0x80000001); - if ( xlvl >= 0x80000004 ) - get_model_name(c); /* Default name */ - } - - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ( (xlvl & 0xffff0000) == 0x80860000 ) { - if ( xlvl >= 0x80860001 ) - c->x86_capability[2] = cpuid_edx(0x80860001); - } - } - - printk(KERN_DEBUG "CPU: Before vendor init, caps: %08x %08x %08x, vendor = %d\n", - c->x86_capability[0], - c->x86_capability[1], - c->x86_capability[2], - c->x86_vendor); - - /* - * Vendor-specific initialization. In this section we - * canonicalize the feature flags, meaning if there are - * features a certain CPU supports which CPUID doesn't - * tell us, CPUID claiming incorrect flags, or other bugs, - * we handle them here. - * - * At the end of this section, c->x86_capability better - * indicate the features this CPU genuinely supports! - */ - switch ( c->x86_vendor ) { - case X86_VENDOR_AMD: - init_amd(c); - break; - - case X86_VENDOR_INTEL: - init_intel(c); - break; - - case X86_VENDOR_CENTAUR: - init_centaur(c); - break; - - default: - printk("Unsupported CPU vendor (%d) -- please report!\n"); - } - - printk(KERN_DEBUG "CPU: After vendor init, caps: %08x %08x %08x %08x\n", - c->x86_capability[0], - c->x86_capability[1], - c->x86_capability[2], - c->x86_capability[3]); - - - /* If the model name is still unset, do table lookup. */ - if ( !c->x86_model_id[0] ) { - char *p; - p = table_lookup_model(c); - if ( p ) - strcpy(c->x86_model_id, p); - else - /* Last resort... */ - sprintf(c->x86_model_id, "%02x/%02x", - c->x86_vendor, c->x86_model); - } - - /* Now the feature flags better reflect actual CPU features! */ - - printk(KERN_DEBUG "CPU: After generic, caps: %08x %08x %08x %08x\n", - c->x86_capability[0], - c->x86_capability[1], - c->x86_capability[2], - c->x86_capability[3]); - - /* - * On SMP, boot_cpu_data holds the common feature set between - * all CPUs; so make sure that we indicate which features are - * common between the CPUs. The first time this routine gets - * executed, c == &boot_cpu_data. - */ - if ( c != &boot_cpu_data ) { - /* AND the already accumulated flags with these */ - for ( i = 0 ; i < NCAPINTS ; i++ ) - boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; - } - - printk(KERN_DEBUG "CPU: Common caps: %08x %08x %08x %08x\n", - boot_cpu_data.x86_capability[0], - boot_cpu_data.x86_capability[1], - boot_cpu_data.x86_capability[2], - boot_cpu_data.x86_capability[3]); -} - - -/* These need to match */ -static char *cpu_vendor_names[] __initdata = { - "Intel", "Cyrix", "AMD", "UMC", "NexGen", "Centaur", "Rise", "Transmeta" }; - - -void __init print_cpu_info(struct cpuinfo_x86 *c) -{ - char *vendor = NULL; - - if (c->x86_vendor < sizeof(cpu_vendor_names)/sizeof(char *)) - vendor = cpu_vendor_names[c->x86_vendor]; - else if (c->cpuid_level >= 0) - vendor = c->x86_vendor_id; - - if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) - printk("%s ", vendor); - - if (!c->x86_model_id[0]) - printk("%d86", c->x86); - else - printk("%s", c->x86_model_id); - - if (c->x86_mask || c->cpuid_level >= 0) - printk(" stepping %02x\n", c->x86_mask); - else - printk("\n"); -} - -/* - * Get CPU information for use by the procfs. - */ -static int show_cpuinfo(struct seq_file *m, void *v) -{ - /* - * These flag bits must match the definitions in . - * NULL means this bit is undefined or reserved; either way it doesn't - * have meaning as far as Linux is concerned. Note that it's important - * to realize there is a difference between this table and CPUID -- if - * applications want to get the raw CPUID data, they should access - * /dev/cpu//cpuid instead. - */ - static char *x86_cap_flags[] = { - /* Intel-defined */ - "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", - "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", - "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", - "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", - - /* AMD-defined */ - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "mp", NULL, NULL, "mmxext", NULL, - NULL, NULL, NULL, NULL, NULL, "lm", "3dnowext", "3dnow", - - /* Transmeta-defined */ - "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Other (Linux-defined) */ - "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", NULL, NULL, "tm2", - "est", NULL, "cid", NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* VIA/Cyrix/Centaur-defined */ - NULL, NULL, "xstore", NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - }; - struct cpuinfo_x86 *c = v; - int i, n = c - cpu_data; - int fpu_exception; - -#ifdef CONFIG_SMP - if (!(cpu_online_map & (1<x86_vendor_id[0] ? c->x86_vendor_id : "unknown", - c->x86, - c->x86_model, - c->x86_model_id[0] ? c->x86_model_id : "unknown"); - - if (c->x86_mask || c->cpuid_level >= 0) - seq_printf(m, "stepping\t: %d\n", c->x86_mask); - else - seq_printf(m, "stepping\t: unknown\n"); - - if ( test_bit(X86_FEATURE_TSC, &c->x86_capability) ) { - seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n", - cpu_khz / 1000, (cpu_khz % 1000)); - } - - /* Cache size */ - if (c->x86_cache_size >= 0) - seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); - - /* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */ - fpu_exception = c->hard_math && (ignore_irq13 || cpu_has_fpu); - seq_printf(m, "fdiv_bug\t: %s\n" - "hlt_bug\t\t: %s\n" - "f00f_bug\t: %s\n" - "coma_bug\t: %s\n" - "fpu\t\t: %s\n" - "fpu_exception\t: %s\n" - "cpuid level\t: %d\n" - "wp\t\t: %s\n" - "flags\t\t:", - c->fdiv_bug ? "yes" : "no", - c->hlt_works_ok ? "no" : "yes", - c->f00f_bug ? "yes" : "no", - c->coma_bug ? "yes" : "no", - c->hard_math ? "yes" : "no", - fpu_exception ? "yes" : "no", - c->cpuid_level, - c->wp_works_ok ? "yes" : "no"); - - for ( i = 0 ; i < 32*NCAPINTS ; i++ ) - if ( test_bit(i, &c->x86_capability) && - x86_cap_flags[i] != NULL ) - seq_printf(m, " %s", x86_cap_flags[i]); - - seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n", - c->loops_per_jiffy/(500000/HZ), - (c->loops_per_jiffy/(5000/HZ)) % 100); - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - return *pos < NR_CPUS ? cpu_data + *pos : NULL; -} -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return c_start(m, pos); -} -static void c_stop(struct seq_file *m, void *v) -{ -} -struct seq_operations cpuinfo_op = { - start: c_start, - next: c_next, - stop: c_stop, - show: show_cpuinfo, -}; - -unsigned long cpu_initialized __initdata = 0; - -/* - * cpu_init() initializes state that is per-CPU. Some data is already - * initialized (naturally) in the bootstrap process, such as the GDT - * and IDT. We reload them nevertheless, this function acts as a - * 'CPU state barrier', nothing should get across. - */ -void __init cpu_init (void) -{ - int nr = smp_processor_id(); - - if (test_and_set_bit(nr, &cpu_initialized)) { - printk(KERN_WARNING "CPU#%d already initialized!\n", nr); - for (;;) __sti(); - } - printk(KERN_INFO "Initializing CPU#%d\n", nr); - - /* - * set up and load the per-CPU TSS and LDT - */ - atomic_inc(&init_mm.mm_count); - current->active_mm = &init_mm; - if(current->mm) - BUG(); - enter_lazy_tlb(&init_mm, current, nr); - - HYPERVISOR_stack_switch(__KERNEL_DS, current->thread.esp0); - - load_LDT(&init_mm.context); - flush_page_update_queue(); - - /* Force FPU initialization. */ - current->flags &= ~PF_USEDFPU; - current->used_math = 0; - stts(); -} - - -/****************************************************************************** - * Time-to-die callback handling. - */ - -static void die_irq(int irq, void *unused, struct pt_regs *regs) -{ - extern void ctrl_alt_del(void); - ctrl_alt_del(); -} - -static int __init setup_die_event(void) -{ - (void)request_irq(HYPEREVENT_IRQ(_EVENT_DIE), die_irq, 0, "die", NULL); - return 0; -} - -__initcall(setup_die_event); - - -/****************************************************************************** - * Stop/pickle callback handling. - */ - -#include - -static void stop_task(void *unused) -{ - /* Hmmm... a cleaner interface to suspend/resume blkdevs would be nice. */ - extern void blkdev_suspend(void); - extern void blkdev_resume(void); - - unsigned long *pfn_to_mfn_frame_list = NULL; - suspend_record_t *suspend_record = NULL; - struct net_device *dev; - char name[6]; - int i, j; - - if ( (pfn_to_mfn_frame_list = (unsigned long *)__get_free_page(GFP_KERNEL)) - == NULL ) - goto out; - if ( (suspend_record = (suspend_record_t *)__get_free_page(GFP_KERNEL)) - == NULL ) - goto out; - - suspend_record->pfn_to_mfn_frame_list = - virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT; - suspend_record->nr_pfns = max_pfn; - - j = 0; - for ( i = 0; i < max_pfn; i += (PAGE_SIZE / sizeof(unsigned long)) ) - pfn_to_mfn_frame_list[j++] = - virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT; - - /* - * NB. This is /not/ a full dev_close() as that loses route information! - * Instead we do essentialy the same as dev_close() but without notifying - * various registered subsystems about the NETDEV_DOWN event. - */ - rtnl_lock(); - for ( i = 0; i < 10; i++ ) - { - sprintf(name, "eth%d", i); - if ( ((dev = __dev_get_by_name(name)) != NULL) && - (dev->flags & IFF_UP) ) - { - dev_deactivate(dev); - clear_bit(__LINK_STATE_START, &dev->state); - if ( dev->stop != NULL ) - dev->stop(dev); - dev->flags &= ~IFF_UP; - } - } - rtnl_unlock(); - - blkdev_suspend(); - - __cli(); - - HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; - clear_fixmap(FIX_SHARED_INFO); - - memcpy(&suspend_record->resume_info, &start_info, sizeof(start_info)); - - HYPERVISOR_stop(virt_to_machine(suspend_record) >> PAGE_SHIFT); - - memcpy(&start_info, &suspend_record->resume_info, sizeof(start_info)); - - set_fixmap(FIX_SHARED_INFO, start_info.shared_info); - HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); - memset(empty_zero_page, 0, PAGE_SIZE); - - __sti(); - - blkdev_resume(); - - /* - * We now do the opposite of the network suspend code. Basically it's - * dev_open() but without notifying anyone about NETDEV_UP. - */ - rtnl_lock(); - for ( i = 0; i < 10; i++ ) - { - sprintf(name, "eth%d", i); - if ( ((dev = __dev_get_by_name(name)) != NULL) && - !(dev->flags & IFF_UP) ) - { - set_bit(__LINK_STATE_START, &dev->state); - if ( (dev->open == NULL) || (dev->open(dev) == 0) ) - { - dev->flags |= IFF_UP; - dev_activate(dev); - } - else - { - clear_bit(__LINK_STATE_START, &dev->state); - } - } - } - rtnl_unlock(); - - out: - if ( pfn_to_mfn_frame_list != NULL ) - free_page((unsigned long)pfn_to_mfn_frame_list); - if ( suspend_record != NULL ) - free_page((unsigned long)suspend_record); -} - -static struct tq_struct stop_tq; - -static void stop_irq(int irq, void *unused, struct pt_regs *regs) -{ - stop_tq.routine = stop_task; - schedule_task(&stop_tq); -} - -static int __init setup_stop_event(void) -{ - (void)request_irq(HYPEREVENT_IRQ(_EVENT_STOP), stop_irq, 0, "stop", NULL); - return 0; -} - -__initcall(setup_stop_event); - diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/signal.c b/xenolinux-2.4.25-sparse/arch/xeno/kernel/signal.c deleted file mode 100644 index f646c5c0ca..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/signal.c +++ /dev/null @@ -1,717 +0,0 @@ -/* - * linux/arch/i386/kernel/signal.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson - * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DEBUG_SIG 0 - -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) - -int FASTCALL(do_signal(struct pt_regs *regs, sigset_t *oldset)); - -int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from) -{ - if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t))) - return -EFAULT; - if (from->si_code < 0) - return __copy_to_user(to, from, sizeof(siginfo_t)); - else { - int err; - - /* If you change siginfo_t structure, please be sure - this code is fixed accordingly. - It should never copy any pad contained in the structure - to avoid security leaks, but must copy the generic - 3 ints plus the relevant union member. */ - err = __put_user(from->si_signo, &to->si_signo); - err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user((short)from->si_code, &to->si_code); - /* First 32bits of unions are always present. */ - err |= __put_user(from->si_pid, &to->si_pid); - switch (from->si_code >> 16) { - case __SI_FAULT >> 16: - break; - case __SI_CHLD >> 16: - err |= __put_user(from->si_utime, &to->si_utime); - err |= __put_user(from->si_stime, &to->si_stime); - err |= __put_user(from->si_status, &to->si_status); - default: - err |= __put_user(from->si_uid, &to->si_uid); - break; - /* case __SI_RT: This is not generated by the kernel as of now. */ - } - return err; - } -} - -/* - * Atomically swap in the new signal mask, and wait for a signal. - */ -asmlinkage int -sys_sigsuspend(int history0, int history1, old_sigset_t mask) -{ - struct pt_regs * regs = (struct pt_regs *) &history0; - sigset_t saveset; - - mask &= _BLOCKABLE; - spin_lock_irq(¤t->sigmask_lock); - saveset = current->blocked; - siginitset(¤t->blocked, mask); - recalc_sigpending(current); - spin_unlock_irq(¤t->sigmask_lock); - - regs->eax = -EINTR; - while (1) { - current->state = TASK_INTERRUPTIBLE; - schedule(); - if (do_signal(regs, &saveset)) - return -EINTR; - } -} - -asmlinkage int -sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize) -{ - struct pt_regs * regs = (struct pt_regs *) &unewset; - sigset_t saveset, newset; - - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - - if (copy_from_user(&newset, unewset, sizeof(newset))) - return -EFAULT; - sigdelsetmask(&newset, ~_BLOCKABLE); - - spin_lock_irq(¤t->sigmask_lock); - saveset = current->blocked; - current->blocked = newset; - recalc_sigpending(current); - spin_unlock_irq(¤t->sigmask_lock); - - regs->eax = -EINTR; - while (1) { - current->state = TASK_INTERRUPTIBLE; - schedule(); - if (do_signal(regs, &saveset)) - return -EINTR; - } -} - -asmlinkage int -sys_sigaction(int sig, const struct old_sigaction *act, - struct old_sigaction *oact) -{ - struct k_sigaction new_ka, old_ka; - int ret; - - if (act) { - old_sigset_t mask; - if (verify_area(VERIFY_READ, act, sizeof(*act)) || - __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) - return -EFAULT; - __get_user(new_ka.sa.sa_flags, &act->sa_flags); - __get_user(mask, &act->sa_mask); - siginitset(&new_ka.sa.sa_mask, mask); - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) - return -EFAULT; - __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); - } - - return ret; -} - -asmlinkage int -sys_sigaltstack(const stack_t *uss, stack_t *uoss) -{ - struct pt_regs *regs = (struct pt_regs *) &uss; - return do_sigaltstack(uss, uoss, regs->esp); -} - - -/* - * Do a signal return; undo the signal stack. - */ - -struct sigframe -{ - char *pretcode; - int sig; - struct sigcontext sc; - struct _fpstate fpstate; - unsigned long extramask[_NSIG_WORDS-1]; - char retcode[8]; -}; - -struct rt_sigframe -{ - char *pretcode; - int sig; - struct siginfo *pinfo; - void *puc; - struct siginfo info; - struct ucontext uc; - struct _fpstate fpstate; - char retcode[8]; -}; - -static int -restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, int *peax) -{ - unsigned int err = 0; - -#define COPY(x) err |= __get_user(regs->x, &sc->x) - -#define COPY_SEG(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ - regs->x##seg = tmp; } - -#define COPY_SEG_STRICT(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ - regs->x##seg = tmp|3; } - -#define GET_SEG(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ - loadsegment(seg,tmp); } - - GET_SEG(gs); - GET_SEG(fs); - COPY_SEG(es); - COPY_SEG(ds); - COPY(edi); - COPY(esi); - COPY(ebp); - COPY(esp); - COPY(ebx); - COPY(edx); - COPY(ecx); - COPY(eip); - COPY_SEG_STRICT(cs); - COPY_SEG_STRICT(ss); - - { - unsigned int tmpflags; - err |= __get_user(tmpflags, &sc->eflags); - regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5); - regs->orig_eax = -1; /* disable syscall checks */ - } - - { - struct _fpstate * buf; - err |= __get_user(buf, &sc->fpstate); - if (buf) { - if (verify_area(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387(buf); - } - } - - err |= __get_user(*peax, &sc->eax); - return err; - -badframe: - return 1; -} - -asmlinkage int sys_sigreturn(unsigned long __unused) -{ - struct pt_regs *regs = (struct pt_regs *) &__unused; - struct sigframe *frame = (struct sigframe *)(regs->esp - 8); - sigset_t set; - int eax; - - if (verify_area(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; - if (__get_user(set.sig[0], &frame->sc.oldmask) - || (_NSIG_WORDS > 1 - && __copy_from_user(&set.sig[1], &frame->extramask, - sizeof(frame->extramask)))) - goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sigmask_lock); - current->blocked = set; - recalc_sigpending(current); - spin_unlock_irq(¤t->sigmask_lock); - - if (restore_sigcontext(regs, &frame->sc, &eax)) - goto badframe; - return eax; - -badframe: - force_sig(SIGSEGV, current); - return 0; -} - -asmlinkage int sys_rt_sigreturn(unsigned long __unused) -{ - struct pt_regs *regs = (struct pt_regs *) &__unused; - struct rt_sigframe *frame = (struct rt_sigframe *)(regs->esp - 4); - sigset_t set; - stack_t st; - int eax; - - if (verify_area(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; - if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) - goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sigmask_lock); - current->blocked = set; - recalc_sigpending(current); - spin_unlock_irq(¤t->sigmask_lock); - - if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax)) - goto badframe; - - if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st))) - goto badframe; - /* It is more difficult to avoid calling this function than to - call it and ignore errors. */ - do_sigaltstack(&st, NULL, regs->esp); - - return eax; - -badframe: - force_sig(SIGSEGV, current); - return 0; -} - -/* - * Set up a signal frame. - */ - -static int -setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate, - struct pt_regs *regs, unsigned long mask) -{ - int tmp, err = 0; - - tmp = 0; - __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); - err |= __put_user(tmp, (unsigned int *)&sc->gs); - __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); - err |= __put_user(tmp, (unsigned int *)&sc->fs); - - err |= __put_user(regs->xes, (unsigned int *)&sc->es); - err |= __put_user(regs->xds, (unsigned int *)&sc->ds); - err |= __put_user(regs->edi, &sc->edi); - err |= __put_user(regs->esi, &sc->esi); - err |= __put_user(regs->ebp, &sc->ebp); - err |= __put_user(regs->esp, &sc->esp); - err |= __put_user(regs->ebx, &sc->ebx); - err |= __put_user(regs->edx, &sc->edx); - err |= __put_user(regs->ecx, &sc->ecx); - err |= __put_user(regs->eax, &sc->eax); - err |= __put_user(current->thread.trap_no, &sc->trapno); - err |= __put_user(current->thread.error_code, &sc->err); - err |= __put_user(regs->eip, &sc->eip); - err |= __put_user(regs->xcs, (unsigned int *)&sc->cs); - err |= __put_user(regs->eflags, &sc->eflags); - err |= __put_user(regs->esp, &sc->esp_at_signal); - err |= __put_user(regs->xss, (unsigned int *)&sc->ss); - - tmp = save_i387(fpstate); - if (tmp < 0) - err = 1; - else - err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); - - /* non-iBCS2 extensions.. */ - err |= __put_user(mask, &sc->oldmask); - err |= __put_user(current->thread.cr2, &sc->cr2); - - return err; -} - -/* - * Determine which stack to use.. - */ -static inline void * -get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) -{ - unsigned long esp; - - /* Default to using normal stack */ - esp = regs->esp; - - /* This is the X/Open sanctioned signal stack switching. */ - if (ka->sa.sa_flags & SA_ONSTACK) { - if (sas_ss_flags(esp) == 0) - esp = current->sas_ss_sp + current->sas_ss_size; - } - - /* This is the legacy signal stack switching. */ - else if ((regs->xss & 0xffff) != __USER_DS && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) { - esp = (unsigned long) ka->sa.sa_restorer; - } - - return (void *)((esp - frame_size) & -8ul); -} - -static void setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs * regs) -{ - struct sigframe *frame; - int err = 0; - - frame = get_sigframe(ka, regs, sizeof(*frame)); - - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - goto give_sigsegv; - - err |= __put_user((current->exec_domain - && current->exec_domain->signal_invmap - && sig < 32 - ? current->exec_domain->signal_invmap[sig] - : sig), - &frame->sig); - if (err) - goto give_sigsegv; - - err |= setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); - if (err) - goto give_sigsegv; - - if (_NSIG_WORDS > 1) { - err |= __copy_to_user(frame->extramask, &set->sig[1], - sizeof(frame->extramask)); - } - if (err) - goto give_sigsegv; - - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ - if (ka->sa.sa_flags & SA_RESTORER) { - err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); - } else { - err |= __put_user(frame->retcode, &frame->pretcode); - /* This is popl %eax ; movl $,%eax ; int $0x80 */ - err |= __put_user(0xb858, (short *)(frame->retcode+0)); - err |= __put_user(__NR_sigreturn, (int *)(frame->retcode+2)); - err |= __put_user(0x80cd, (short *)(frame->retcode+6)); - } - - if (err) - goto give_sigsegv; - - /* Set up registers for signal handler */ - regs->esp = (unsigned long) frame; - regs->eip = (unsigned long) ka->sa.sa_handler; - - set_fs(USER_DS); - regs->xds = __USER_DS; - regs->xes = __USER_DS; - regs->xss = __USER_DS; - regs->xcs = __USER_CS; - regs->eflags &= ~TF_MASK; - -#if DEBUG_SIG - printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", - current->comm, current->pid, frame, regs->eip, frame->pretcode); -#endif - - return; - -give_sigsegv: - if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; - force_sig(SIGSEGV, current); -} - -static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs * regs) -{ - struct rt_sigframe *frame; - int err = 0; - - frame = get_sigframe(ka, regs, sizeof(*frame)); - - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - goto give_sigsegv; - - err |= __put_user((current->exec_domain - && current->exec_domain->signal_invmap - && sig < 32 - ? current->exec_domain->signal_invmap[sig] - : sig), - &frame->sig); - err |= __put_user(&frame->info, &frame->pinfo); - err |= __put_user(&frame->uc, &frame->puc); - err |= copy_siginfo_to_user(&frame->info, info); - if (err) - goto give_sigsegv; - - /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->esp), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) - goto give_sigsegv; - - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ - if (ka->sa.sa_flags & SA_RESTORER) { - err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); - } else { - err |= __put_user(frame->retcode, &frame->pretcode); - /* This is movl $,%eax ; int $0x80 */ - err |= __put_user(0xb8, (char *)(frame->retcode+0)); - err |= __put_user(__NR_rt_sigreturn, (int *)(frame->retcode+1)); - err |= __put_user(0x80cd, (short *)(frame->retcode+5)); - } - - if (err) - goto give_sigsegv; - - /* Set up registers for signal handler */ - regs->esp = (unsigned long) frame; - regs->eip = (unsigned long) ka->sa.sa_handler; - - set_fs(USER_DS); - regs->xds = __USER_DS; - regs->xes = __USER_DS; - regs->xss = __USER_DS; - regs->xcs = __USER_CS; - regs->eflags &= ~TF_MASK; - -#if DEBUG_SIG - printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", - current->comm, current->pid, frame, regs->eip, frame->pretcode); -#endif - - return; - -give_sigsegv: - if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; - force_sig(SIGSEGV, current); -} - -/* - * OK, we're invoking a handler - */ - -static void -handle_signal(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) -{ - /* Are we from a system call? */ - if (regs->orig_eax >= 0) { - /* If so, check system call restarting.. */ - switch (regs->eax) { - case -ERESTARTNOHAND: - regs->eax = -EINTR; - break; - - case -ERESTARTSYS: - if (!(ka->sa.sa_flags & SA_RESTART)) { - regs->eax = -EINTR; - break; - } - /* fallthrough */ - case -ERESTARTNOINTR: - regs->eax = regs->orig_eax; - regs->eip -= 2; - } - } - - /* Set up the stack frame */ - if (ka->sa.sa_flags & SA_SIGINFO) - setup_rt_frame(sig, ka, info, oldset, regs); - else - setup_frame(sig, ka, oldset, regs); - - if (ka->sa.sa_flags & SA_ONESHOT) - ka->sa.sa_handler = SIG_DFL; - - if (!(ka->sa.sa_flags & SA_NODEFER)) { - spin_lock_irq(¤t->sigmask_lock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); - sigaddset(¤t->blocked,sig); - recalc_sigpending(current); - spin_unlock_irq(¤t->sigmask_lock); - } -} - -/* - * Note that 'init' is a special process: it doesn't get signals it doesn't - * want to handle. Thus you cannot kill init even with a SIGKILL even by - * mistake. - */ -int do_signal(struct pt_regs *regs, sigset_t *oldset) -{ - siginfo_t info; - struct k_sigaction *ka; - - /* - * We want the common case to go fast, which - * is why we may in certain cases get here from - * kernel mode. Just return without doing anything - * if so. - */ - if ((regs->xcs & 2) != 2) - return 1; - - if (!oldset) - oldset = ¤t->blocked; - - for (;;) { - unsigned long signr; - - spin_lock_irq(¤t->sigmask_lock); - signr = dequeue_signal(¤t->blocked, &info); - spin_unlock_irq(¤t->sigmask_lock); - - if (!signr) - break; - - if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { - /* Let the debugger run. */ - current->exit_code = signr; - current->state = TASK_STOPPED; - notify_parent(current, SIGCHLD); - schedule(); - - /* We're back. Did the debugger cancel the sig? */ - if (!(signr = current->exit_code)) - continue; - current->exit_code = 0; - - /* The debugger continued. Ignore SIGSTOP. */ - if (signr == SIGSTOP) - continue; - - /* Update the siginfo structure. Is this good? */ - if (signr != info.si_signo) { - info.si_signo = signr; - info.si_errno = 0; - info.si_code = SI_USER; - info.si_pid = current->p_pptr->pid; - info.si_uid = current->p_pptr->uid; - } - - /* If the (new) signal is now blocked, requeue it. */ - if (sigismember(¤t->blocked, signr)) { - send_sig_info(signr, &info, current); - continue; - } - } - - ka = ¤t->sig->action[signr-1]; - if (ka->sa.sa_handler == SIG_IGN) { - if (signr != SIGCHLD) - continue; - /* Check for SIGCHLD: it's special. */ - while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) - /* nothing */; - continue; - } - - if (ka->sa.sa_handler == SIG_DFL) { - int exit_code = signr; - - /* Init gets no signals it doesn't want. */ - if (current->pid == 1) - continue; - - switch (signr) { - case SIGCONT: case SIGCHLD: case SIGWINCH: case SIGURG: - continue; - - case SIGTSTP: case SIGTTIN: case SIGTTOU: - if (is_orphaned_pgrp(current->pgrp)) - continue; - /* FALLTHRU */ - - case SIGSTOP: { - struct signal_struct *sig; - current->state = TASK_STOPPED; - current->exit_code = signr; - sig = current->p_pptr->sig; - if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) - notify_parent(current, SIGCHLD); - schedule(); - continue; - } - - case SIGQUIT: case SIGILL: case SIGTRAP: - case SIGABRT: case SIGFPE: case SIGSEGV: - case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ: - if (do_coredump(signr, regs)) - exit_code |= 0x80; - /* FALLTHRU */ - - default: - sig_exit(signr, exit_code, &info); - /* NOTREACHED */ - } - } - - /* Reenable any watchpoints before delivering the - * signal to user space. The processor register will - * have been cleared if the watchpoint triggered - * inside the kernel. - */ - if ( current->thread.debugreg[7] != 0 ) - HYPERVISOR_set_debugreg(7, current->thread.debugreg[7]); - - /* Whee! Actually deliver the signal. */ - handle_signal(signr, ka, &info, oldset, regs); - return 1; - } - - /* Did we come from a system call? */ - if (regs->orig_eax >= 0) { - /* Restart the system call - no handlers present */ - if (regs->eax == -ERESTARTNOHAND || - regs->eax == -ERESTARTSYS || - regs->eax == -ERESTARTNOINTR) { - regs->eax = regs->orig_eax; - regs->eip -= 2; - } - } - return 0; -} diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/time.c b/xenolinux-2.4.25-sparse/arch/xeno/kernel/time.c deleted file mode 100644 index 574ba786b4..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/time.c +++ /dev/null @@ -1,654 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- - **************************************************************************** - * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge - * (C) 2002-2003 - Keir Fraser - University of Cambridge - **************************************************************************** - * - * File: arch/xeno/kernel/time.c - * Author: Rolf Neugebauer and Keir Fraser - * - * Description: Interface with Xen to get correct notion of time - */ - -/* - * linux/arch/i386/kernel/time.c - * - * Copyright (C) 1991, 1992, 1995 Linus Torvalds - * - * This file contains the PC-specific time handling details: - * reading the RTC at bootup, etc.. - * 1994-07-02 Alan Modra - * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime - * 1995-03-26 Markus Kuhn - * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887 - * precision CMOS clock update - * 1996-05-03 Ingo Molnar - * fixed time warps in do_[slow|fast]_gettimeoffset() - * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 - * "A Kernel Model for Precision Timekeeping" by Dave Mills - * 1998-09-05 (Various) - * More robust do_fast_gettimeoffset() algorithm implemented - * (works with APM, Cyrix 6x86MX and Centaur C6), - * monotonic gettimeofday() with fast_get_timeoffset(), - * drift-proof precision TSC calibration on boot - * (C. Scott Ananian , Andrew D. - * Balsa , Philip Gladstone ; - * ported from 2.0.35 Jumbo-9 by Michael Krause ). - * 1998-12-16 Andrea Arcangeli - * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy - * because was not accounting lost_ticks. - * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli - * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to - * serialize accesses to xtime/lost_ticks). - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; -extern rwlock_t xtime_lock; -extern unsigned long wall_jiffies; - -unsigned long cpu_khz; /* get this from Xen, used elsewhere */ - -static unsigned int rdtsc_bitshift; -static u32 st_scale_f; /* convert ticks -> usecs */ -static u32 st_scale_i; /* convert ticks -> usecs */ - -/* These are peridically updated in shared_info, and then copied here. */ -static u32 shadow_tsc_stamp; -static u64 shadow_system_time; -static u32 shadow_time_version; -static struct timeval shadow_tv; - -/* - * We use this to ensure that gettimeofday() is monotonically increasing. We - * only break this guarantee if the wall clock jumps backwards "a long way". - */ -static struct timeval last_seen_tv = {0,0}; - -#ifdef CONFIG_XENO_PRIV -/* Periodically propagate synchronised time base to the RTC and to Xen. */ -static long last_update_to_rtc, last_update_to_xen; -#endif - -/* Periodically take synchronised time base from Xen, if we need it. */ -static long last_update_from_xen; /* UTC seconds when last read Xen clock. */ - -/* Keep track of last time we did processing/updating of jiffies and xtime. */ -static u64 processed_system_time; /* System time (ns) at last processing. */ - -#define NS_PER_TICK (1000000000ULL/HZ) - -#define HANDLE_USEC_UNDERFLOW(_tv) \ - do { \ - while ( (_tv).tv_usec < 0 ) \ - { \ - (_tv).tv_usec += 1000000; \ - (_tv).tv_sec--; \ - } \ - } while ( 0 ) -#define HANDLE_USEC_OVERFLOW(_tv) \ - do { \ - while ( (_tv).tv_usec >= 1000000 ) \ - { \ - (_tv).tv_usec -= 1000000; \ - (_tv).tv_sec++; \ - } \ - } while ( 0 ) - - -/* Does this guest OS track Xen time, or set its wall clock independently? */ -static int independent_wallclock = 0; -static int __init __independent_wallclock(char *str) -{ - independent_wallclock = 1; - return 1; -} -__setup("independent_wallclock", __independent_wallclock); - - -#ifdef CONFIG_XENO_PRIV -/* - * In order to set the CMOS clock precisely, set_rtc_mmss has to be - * called 500 ms after the second nowtime has started, because when - * nowtime is written into the registers of the CMOS clock, it will - * jump to the next second precisely 500 ms later. Check the Motorola - * MC146818A or Dallas DS12887 data sheet for details. - * - * BUG: This routine does not handle hour overflow properly; it just - * sets the minutes. Usually you'll only notice that after reboot! - */ -static int set_rtc_mmss(unsigned long nowtime) -{ - int retval = 0; - int real_seconds, real_minutes, cmos_minutes; - unsigned char save_control, save_freq_select; - - /* gets recalled with irq locally disabled */ - spin_lock(&rtc_lock); - save_control = CMOS_READ(RTC_CONTROL); - CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); - - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); - - cmos_minutes = CMOS_READ(RTC_MINUTES); - if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD ) - BCD_TO_BIN(cmos_minutes); - - /* - * since we're only adjusting minutes and seconds, don't interfere with - * hour overflow. This avoids messing with unknown time zones but requires - * your RTC not to be off by more than 15 minutes - */ - real_seconds = nowtime % 60; - real_minutes = nowtime / 60; - if ( ((abs(real_minutes - cmos_minutes) + 15)/30) & 1 ) - real_minutes += 30; /* correct for half hour time zone */ - real_minutes %= 60; - - if ( abs(real_minutes - cmos_minutes) < 30 ) - { - if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD ) - { - BIN_TO_BCD(real_seconds); - BIN_TO_BCD(real_minutes); - } - CMOS_WRITE(real_seconds,RTC_SECONDS); - CMOS_WRITE(real_minutes,RTC_MINUTES); - } - else - { - printk(KERN_WARNING - "set_rtc_mmss: can't update from %d to %d\n", - cmos_minutes, real_minutes); - retval = -1; - } - - /* The following flags have to be released exactly in this order, - * otherwise the DS12887 (popular MC146818A clone with integrated - * battery and quartz) will not reset the oscillator and will not - * update precisely 500 ms later. You won't find this mentioned in - * the Dallas Semiconductor data sheets, but who believes data - * sheets anyway ... -- Markus Kuhn - */ - CMOS_WRITE(save_control, RTC_CONTROL); - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - spin_unlock(&rtc_lock); - - return retval; -} -#endif - - -/* - * Reads a consistent set of time-base values from Xen, into a shadow data - * area. Must be called with the xtime_lock held for writing. - */ -static void __get_time_values_from_xen(void) -{ - do { - shadow_time_version = HYPERVISOR_shared_info->time_version2; - rmb(); - shadow_tv.tv_sec = HYPERVISOR_shared_info->wc_sec; - shadow_tv.tv_usec = HYPERVISOR_shared_info->wc_usec; - shadow_tsc_stamp = HYPERVISOR_shared_info->tsc_timestamp.tsc_bits; - shadow_system_time = HYPERVISOR_shared_info->system_time; - rmb(); - } - while ( shadow_time_version != HYPERVISOR_shared_info->time_version1 ); -} - -#define TIME_VALUES_UP_TO_DATE \ - (shadow_time_version == HYPERVISOR_shared_info->time_version2) - - -/* - * Returns the system time elapsed, in ns, since the current shadow_timestamp - * was calculated. Must be called with the xtime_lock held for reading. - */ -static inline unsigned long __get_time_delta_usecs(void) -{ - s32 delta_tsc; - u32 low; - u64 delta, tsc; - - rdtscll(tsc); - low = (u32)(tsc >> rdtsc_bitshift); - delta_tsc = (s32)(low - shadow_tsc_stamp); - if ( unlikely(delta_tsc < 0) ) delta_tsc = 0; - delta = ((u64)delta_tsc * st_scale_f); - delta >>= 32; - delta += ((u64)delta_tsc * st_scale_i); - - return (unsigned long)delta; -} - - -/* - * Returns the current time-of-day in UTC timeval format. - */ -void do_gettimeofday(struct timeval *tv) -{ - unsigned long flags, lost; - struct timeval _tv; - - again: - read_lock_irqsave(&xtime_lock, flags); - - _tv.tv_usec = __get_time_delta_usecs(); - if ( (lost = (jiffies - wall_jiffies)) != 0 ) - _tv.tv_usec += lost * (1000000 / HZ); - _tv.tv_sec = xtime.tv_sec; - _tv.tv_usec += xtime.tv_usec; - - if ( unlikely(!TIME_VALUES_UP_TO_DATE) ) - { - /* - * We may have blocked for a long time, rendering our calculations - * invalid (e.g. the time delta may have overflowed). Detect that - * and recalculate with fresh values. - */ - read_unlock_irqrestore(&xtime_lock, flags); - write_lock_irqsave(&xtime_lock, flags); - __get_time_values_from_xen(); - write_unlock_irqrestore(&xtime_lock, flags); - goto again; - } - - HANDLE_USEC_OVERFLOW(_tv); - - /* Ensure that time-of-day is monotonically increasing. */ - if ( (_tv.tv_sec < last_seen_tv.tv_sec) || - ((_tv.tv_sec == last_seen_tv.tv_sec) && - (_tv.tv_usec < last_seen_tv.tv_usec)) ) - _tv = last_seen_tv; - last_seen_tv = _tv; - - read_unlock_irqrestore(&xtime_lock, flags); - - *tv = _tv; -} - - -/* - * Sets the current time-of-day based on passed-in UTC timeval parameter. - */ -void do_settimeofday(struct timeval *tv) -{ - struct timeval newtv; - - if ( !independent_wallclock && !(start_info.flags & SIF_INITDOMAIN) ) - return; - - write_lock_irq(&xtime_lock); - - /* - * Ensure we don't get blocked for a long time so that our time delta - * overflows. If that were to happen then our shadow time values would - * be stale, so we can retry with fresh ones. - */ - again: - tv->tv_usec -= __get_time_delta_usecs(); - if ( unlikely(!TIME_VALUES_UP_TO_DATE) ) - { - __get_time_values_from_xen(); - goto again; - } - - HANDLE_USEC_UNDERFLOW(*tv); - - newtv = *tv; - - tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ); - HANDLE_USEC_UNDERFLOW(*tv); - - xtime = *tv; - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; - - /* Reset all our running time counts. They make no sense now. */ - last_seen_tv.tv_sec = 0; - last_update_from_xen = 0; - -#ifdef CONFIG_XENO_PRIV - if ( start_info.flags & SIF_INITDOMAIN ) - { - dom0_op_t op; - last_update_to_rtc = last_update_to_xen = 0; - op.cmd = DOM0_SETTIME; - op.u.settime.secs = newtv.tv_sec; - op.u.settime.usecs = newtv.tv_usec; - op.u.settime.system_time = shadow_system_time; - write_unlock_irq(&xtime_lock); - HYPERVISOR_dom0_op(&op); - } - else -#endif - { - write_unlock_irq(&xtime_lock); - } -} - - -asmlinkage long sys_stime(int *tptr) -{ - int value; - struct timeval tv; - - if ( !capable(CAP_SYS_TIME) ) - return -EPERM; - - if ( get_user(value, tptr) ) - return -EFAULT; - - tv.tv_sec = value; - tv.tv_usec = 0; - - do_settimeofday(&tv); - - return 0; -} - - -/* Convert jiffies to system time. Call with xtime_lock held for reading. */ -static inline u64 __jiffies_to_st(unsigned long j) -{ - return processed_system_time + ((j - jiffies) * NS_PER_TICK); -} - - -static inline void do_timer_interrupt(int irq, void *dev_id, - struct pt_regs *regs) -{ - s64 delta; - unsigned long ticks = 0; - long sec_diff; - - __get_time_values_from_xen(); - - if ( (delta = (s64)(shadow_system_time - processed_system_time)) < 0 ) - { - printk("Timer ISR: Time went backwards: %lld\n", delta); - return; - } - - /* Process elapsed jiffies since last call. */ - while ( delta >= NS_PER_TICK ) - { - ticks++; - delta -= NS_PER_TICK; - processed_system_time += NS_PER_TICK; - } - - if ( ticks != 0 ) - { - do_timer_ticks(ticks); - - if ( user_mode(regs) ) - update_process_times_us(ticks, 0); - else - update_process_times_us(0, ticks); - } - - /* - * Take synchronised time from Xen once a minute if we're not - * synchronised ourselves, and we haven't chosen to keep an independent - * time base. - */ - if ( !independent_wallclock && - ((time_status & STA_UNSYNC) != 0) && - (xtime.tv_sec > (last_update_from_xen + 60)) ) - { - /* Adjust shadow timeval for jiffies that haven't updated xtime yet. */ - shadow_tv.tv_usec -= (jiffies - wall_jiffies) * (1000000/HZ); - HANDLE_USEC_UNDERFLOW(shadow_tv); - - /* - * Reset our running time counts if they are invalidated by a warp - * backwards of more than 500ms. - */ - sec_diff = xtime.tv_sec - shadow_tv.tv_sec; - if ( unlikely(abs(sec_diff) > 1) || - unlikely(((sec_diff * 1000000) + - xtime.tv_usec - shadow_tv.tv_usec) > 500000) ) - { - last_update_to_rtc = last_update_to_xen = 0; - last_seen_tv.tv_sec = 0; - } - - /* Update our unsynchronised xtime appropriately. */ - xtime = shadow_tv; - - last_update_from_xen = xtime.tv_sec; - } - -#ifdef CONFIG_XENO_PRIV - if ( (start_info.flags & SIF_INITDOMAIN) && - ((time_status & STA_UNSYNC) == 0) ) - { - /* Send synchronised time to Xen approximately every minute. */ - if ( xtime.tv_sec > (last_update_to_xen + 60) ) - { - dom0_op_t op; - struct timeval tv = xtime; - - tv.tv_usec += (jiffies - wall_jiffies) * (1000000/HZ); - HANDLE_USEC_OVERFLOW(tv); - - op.cmd = DOM0_SETTIME; - op.u.settime.secs = tv.tv_sec; - op.u.settime.usecs = tv.tv_usec; - op.u.settime.system_time = shadow_system_time; - HYPERVISOR_dom0_op(&op); - - last_update_to_xen = xtime.tv_sec; - } - - /* - * If we have an externally synchronized Linux clock, then update CMOS - * clock accordingly every ~11 minutes. Set_rtc_mmss() has to be called - * as close as possible to 500 ms before the new second starts. - */ - if ( (xtime.tv_sec > (last_update_to_rtc + 660)) && - (xtime.tv_usec >= (500000 - ((unsigned) tick) / 2)) && - (xtime.tv_usec <= (500000 + ((unsigned) tick) / 2)) ) - { - if ( set_rtc_mmss(xtime.tv_sec) == 0 ) - last_update_to_rtc = xtime.tv_sec; - else - last_update_to_rtc = xtime.tv_sec - 600; - } - } -#endif -} - - -static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - write_lock(&xtime_lock); - while ( !TIME_VALUES_UP_TO_DATE ) - do_timer_interrupt(irq, NULL, regs); - write_unlock(&xtime_lock); -} - -static struct irqaction irq_timer = { - timer_interrupt, - SA_INTERRUPT, - 0, - "timer", - NULL, - NULL -}; - - -/* - * This function works out when the the next timer function has to be - * executed (by looking at the timer list) and sets the Xen one-shot - * domain timer to the appropriate value. This is typically called in - * cpu_idle() before the domain blocks. - * - * The function returns a non-0 value on error conditions. - * - * It must be called with interrupts disabled. - */ -extern spinlock_t timerlist_lock; -int set_timeout_timer(void) -{ - struct timer_list *timer; - u64 alarm = 0; - int ret = 0; - - spin_lock(&timerlist_lock); - - /* - * This is safe against long blocking (since calculations are not based on - * TSC deltas). It is also safe against warped system time since - * suspend-resume is cooperative and we would first get locked out. It is - * safe against normal updates of jiffies since interrupts are off. - */ - if ( (timer = next_timer_event()) != NULL ) - alarm = __jiffies_to_st(timer->expires); - - /* Tasks on the timer task queue expect to be executed on the next tick. */ - if ( TQ_ACTIVE(tq_timer) ) - alarm = __jiffies_to_st(jiffies + 1); - - /* Failure is pretty bad, but we'd best soldier on. */ - if ( HYPERVISOR_set_timer_op(alarm) != 0 ) - ret = -1; - - spin_unlock(&timerlist_lock); - - return ret; -} - - -/* Time debugging. */ -static void dbg_time_int(int irq, void *dev_id, struct pt_regs *ptregs) -{ - unsigned long flags, j; - u64 s_now, j_st; - struct timeval s_tv, tv; - - struct timer_list *timer; - u64 t_st; - - read_lock_irqsave(&xtime_lock, flags); - s_tv.tv_sec = shadow_tv.tv_sec; - s_tv.tv_usec = shadow_tv.tv_usec; - s_now = shadow_system_time; - read_unlock_irqrestore(&xtime_lock, flags); - - do_gettimeofday(&tv); - - j = jiffies; - j_st = __jiffies_to_st(j); - - timer = next_timer_event(); - t_st = __jiffies_to_st(timer->expires); - - printk(KERN_ALERT "time: shadow_st=0x%X:%08X\n", - (u32)(s_now>>32), (u32)s_now); - printk(KERN_ALERT "time: wct=%lds %ldus shadow_wct=%lds %ldus\n", - tv.tv_sec, tv.tv_usec, s_tv.tv_sec, s_tv.tv_usec); - printk(KERN_ALERT "time: jiffies=%lu(0x%X:%08X) timeout=%lu(0x%X:%08X)\n", - jiffies,(u32)(j_st>>32), (u32)j_st, - timer->expires,(u32)(t_st>>32), (u32)t_st); - printk(KERN_ALERT "time: processed_system_time=0x%X:%08X\n", - (u32)(processed_system_time>>32), (u32)processed_system_time); -} - -static struct irqaction dbg_time = { - dbg_time_int, - SA_SHIRQ, - 0, - "timer_dbg", - &dbg_time_int, - NULL -}; - -void __init time_init(void) -{ - unsigned long long alarm; - u64 __cpu_khz, cpu_freq, scale, scale2; - - __cpu_khz = HYPERVISOR_shared_info->cpu_freq; - do_div(__cpu_khz, 1000); - cpu_khz = (u32)__cpu_khz; - printk("Xen reported: %lu.%03lu MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - - xtime.tv_sec = HYPERVISOR_shared_info->wc_sec; - xtime.tv_usec = HYPERVISOR_shared_info->wc_usec; - processed_system_time = shadow_system_time; - - rdtsc_bitshift = HYPERVISOR_shared_info->tsc_timestamp.tsc_bitshift; - cpu_freq = HYPERVISOR_shared_info->cpu_freq; - - scale = 1000000LL << (32 + rdtsc_bitshift); - do_div(scale, (u32)cpu_freq); - - if ( (cpu_freq >> 32) != 0 ) - { - scale2 = 1000000LL << rdtsc_bitshift; - do_div(scale2, (u32)(cpu_freq>>32)); - scale += scale2; - } - - st_scale_f = scale & 0xffffffff; - st_scale_i = scale >> 32; - - __get_time_values_from_xen(); - processed_system_time = shadow_system_time; - - (void)setup_irq(HYPEREVENT_IRQ(_EVENT_TIMER), &irq_timer); - - (void)setup_irq(HYPEREVENT_IRQ(_EVENT_DEBUG), &dbg_time); - - rdtscll(alarm); - - clear_bit(_EVENT_TIMER, &HYPERVISOR_shared_info->events); -} - - -/* - * /proc/sys/xeno: This really belongs in another file. It can stay here for - * now however. - */ -static ctl_table xeno_subtable[] = { - {1, "independent_wallclock", &independent_wallclock, - sizeof(independent_wallclock), 0644, NULL, proc_dointvec}, - {0} -}; -static ctl_table xeno_table[] = { - {123, "xeno", NULL, 0, 0555, xeno_subtable}, - {0} -}; -static int __init xeno_sysctl_init(void) -{ - (void)register_sysctl_table(xeno_table, 0); - return 0; -} -__initcall(xeno_sysctl_init); diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/traps.c b/xenolinux-2.4.25-sparse/arch/xeno/kernel/traps.c deleted file mode 100644 index 63288fc282..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/traps.c +++ /dev/null @@ -1,684 +0,0 @@ -/* - * linux/arch/i386/traps.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Pentium III FXSR, SSE support - * Gareth Hughes , May 2000 - */ - -/* - * 'Traps.c' handles hardware traps and faults after we have saved some - * state in 'asm.s'. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include - -#include -#include - -asmlinkage int system_call(void); -asmlinkage void lcall7(void); -asmlinkage void lcall27(void); - -asmlinkage void divide_error(void); -asmlinkage void debug(void); -asmlinkage void int3(void); -asmlinkage void overflow(void); -asmlinkage void bounds(void); -asmlinkage void invalid_op(void); -asmlinkage void device_not_available(void); -asmlinkage void double_fault(void); -asmlinkage void coprocessor_segment_overrun(void); -asmlinkage void invalid_TSS(void); -asmlinkage void segment_not_present(void); -asmlinkage void stack_segment(void); -asmlinkage void general_protection(void); -asmlinkage void page_fault(void); -asmlinkage void safe_page_fault(void); -asmlinkage void coprocessor_error(void); -asmlinkage void simd_coprocessor_error(void); -asmlinkage void alignment_check(void); -asmlinkage void spurious_interrupt_bug(void); -asmlinkage void machine_check(void); - -int kstack_depth_to_print = 24; - - -/* - * If the address is either in the .text section of the - * kernel, or in the vmalloc'ed module regions, it *may* - * be the address of a calling routine - */ - -#ifdef CONFIG_MODULES - -extern struct module *module_list; -extern struct module kernel_module; - -static inline int kernel_text_address(unsigned long addr) -{ - int retval = 0; - struct module *mod; - - if (addr >= (unsigned long) &_stext && - addr <= (unsigned long) &_etext) - return 1; - - for (mod = module_list; mod != &kernel_module; mod = mod->next) { - /* mod_bound tests for addr being inside the vmalloc'ed - * module area. Of course it'd be better to test only - * for the .text subset... */ - if (mod_bound(addr, 0, mod)) { - retval = 1; - break; - } - } - - return retval; -} - -#else - -static inline int kernel_text_address(unsigned long addr) -{ - return (addr >= (unsigned long) &_stext && - addr <= (unsigned long) &_etext); -} - -#endif - -void show_trace(unsigned long * stack) -{ - int i; - unsigned long addr; - - if (!stack) - stack = (unsigned long*)&stack; - - printk("Call Trace: "); - i = 1; - while (((long) stack & (THREAD_SIZE-1)) != 0) { - addr = *stack++; - if (kernel_text_address(addr)) { - if (i && ((i % 6) == 0)) - printk("\n "); - printk("[<%08lx>] ", addr); - i++; - } - } - printk("\n"); -} - -void show_trace_task(struct task_struct *tsk) -{ - unsigned long esp = tsk->thread.esp; - - /* User space on another CPU? */ - if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) - return; - show_trace((unsigned long *)esp); -} - -void show_stack(unsigned long * esp) -{ - unsigned long *stack; - int i; - - // debugging aid: "show_stack(NULL);" prints the - // back trace for this cpu. - - if(esp==NULL) - esp=(unsigned long*)&esp; - - stack = esp; - for(i=0; i < kstack_depth_to_print; i++) { - if (((long) stack & (THREAD_SIZE-1)) == 0) - break; - if (i && ((i % 8) == 0)) - printk("\n "); - printk("%08lx ", *stack++); - } - printk("\n"); - show_trace(esp); -} - -void show_registers(struct pt_regs *regs) -{ - int in_kernel = 1; - unsigned long esp; - unsigned short ss; - - esp = (unsigned long) (®s->esp); - ss = __KERNEL_DS; - if (regs->xcs & 2) { - in_kernel = 0; - esp = regs->esp; - ss = regs->xss & 0xffff; - } - printk(KERN_ALERT "CPU: %d\n", smp_processor_id() ); - printk(KERN_ALERT "EIP: %04x:[<%08lx>] %s\n", - 0xffff & regs->xcs, regs->eip, print_tainted()); - printk(KERN_ALERT "EFLAGS: %08lx\n",regs->eflags); - printk(KERN_ALERT "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->eax, regs->ebx, regs->ecx, regs->edx); - printk(KERN_ALERT "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - regs->esi, regs->edi, regs->ebp, esp); - printk(KERN_ALERT "ds: %04x es: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, ss); - printk(KERN_ALERT "Process %s (pid: %d, stackpage=%08lx)", - current->comm, current->pid, 4096+(unsigned long)current); - /* - * When in-kernel, we also print out the stack and code at the - * time of the fault.. - */ - if (in_kernel) { - - printk(KERN_ALERT "\nStack: "); - show_stack((unsigned long*)esp); - -#if 0 - { - int i; - printk(KERN_ALERT "\nCode: "); - if(regs->eip < PAGE_OFFSET) - goto bad; - - for(i=0;i<20;i++) - { - unsigned char c; - if(__get_user(c, &((unsigned char*)regs->eip)[i])) { -bad: - printk(KERN_ALERT " Bad EIP value."); - break; - } - printk("%02x ", c); - } - } -#endif - } - printk(KERN_ALERT "\n"); -} - -spinlock_t die_lock = SPIN_LOCK_UNLOCKED; - -void die(const char * str, struct pt_regs * regs, long err) -{ - console_verbose(); - spin_lock_irq(&die_lock); - bust_spinlocks(1); - printk("%s: %04lx\n", str, err & 0xffff); - show_registers(regs); - bust_spinlocks(0); - spin_unlock_irq(&die_lock); - do_exit(SIGSEGV); -} - -static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) -{ - if (!(2 & regs->xcs)) - die(str, regs, err); -} - - -static void inline do_trap(int trapnr, int signr, char *str, - struct pt_regs * regs, long error_code, - siginfo_t *info) -{ - if (!(regs->xcs & 2)) - goto kernel_trap; - - /*trap_signal:*/ { - struct task_struct *tsk = current; - tsk->thread.error_code = error_code; - tsk->thread.trap_no = trapnr; - if (info) - force_sig_info(signr, info, tsk); - else - force_sig(signr, tsk); - return; - } - - kernel_trap: { - unsigned long fixup = search_exception_table(regs->eip); - if (fixup) - regs->eip = fixup; - else - die(str, regs, error_code); - return; - } -} - -#define DO_ERROR(trapnr, signr, str, name) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ -{ \ - do_trap(trapnr, signr, str, regs, error_code, NULL); \ -} - -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ -{ \ - siginfo_t info; \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = (void *)siaddr; \ - do_trap(trapnr, signr, str, regs, error_code, &info); \ -} - -DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip) -DO_ERROR( 3, SIGTRAP, "int3", int3) -DO_ERROR( 4, SIGSEGV, "overflow", overflow) -DO_ERROR( 5, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) -DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) -DO_ERROR( 8, SIGSEGV, "double fault", double_fault) -DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) -DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) -DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) -DO_ERROR(12, SIGBUS, "stack segment", stack_segment) -DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) -DO_ERROR(18, SIGBUS, "machine check", machine_check) - -asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) -{ - /* - * If we trapped on an LDT access then ensure that the default_ldt is - * loaded, if nothing else. We load default_ldt lazily because LDT - * switching costs time and many applications don't need it. - */ - if ( unlikely((error_code & 6) == 4) ) - { - unsigned long ldt; - __asm__ __volatile__ ( "sldt %0" : "=r" (ldt) ); - if ( ldt == 0 ) - { - mmu_update_t u; - u.ptr = MMU_EXTENDED_COMMAND; - u.ptr |= (unsigned long)&default_ldt[0]; - u.val = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT); - HYPERVISOR_mmu_update(&u, 1); - return; - } - } - - if (!(regs->xcs & 2)) - goto gp_in_kernel; - - current->thread.error_code = error_code; - current->thread.trap_no = 13; - force_sig(SIGSEGV, current); - return; - -gp_in_kernel: - { - unsigned long fixup; - fixup = search_exception_table(regs->eip); - if (fixup) { - regs->eip = fixup; - return; - } - die("general protection fault", regs, error_code); - } -} - - -asmlinkage void do_debug(struct pt_regs * regs, long error_code) -{ - unsigned int condition; - struct task_struct *tsk = current; - siginfo_t info; - - condition = HYPERVISOR_get_debugreg(6); - - /* Mask out spurious debug traps due to lazy DR7 setting */ - if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { - if (!tsk->thread.debugreg[7]) - goto clear_dr7; - } - - /* Save debug status register where ptrace can see it */ - tsk->thread.debugreg[6] = condition; - - /* Mask out spurious TF errors due to lazy TF clearing */ - if (condition & DR_STEP) { - /* - * The TF error should be masked out only if the current - * process is not traced and if the TRAP flag has been set - * previously by a tracing process (condition detected by - * the PT_DTRACE flag); remember that the i386 TRAP flag - * can be modified by the process itself in user mode, - * allowing programs to debug themselves without the ptrace() - * interface. - */ - if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) - goto clear_TF; - } - - /* Ok, finally something we can handle */ - tsk->thread.trap_no = 1; - tsk->thread.error_code = error_code; - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_BRKPT; - - /* If this is a kernel mode trap, save the user PC on entry to - * the kernel, that's what the debugger can make sense of. - */ - info.si_addr = ((regs->xcs & 2) == 0) ? (void *)tsk->thread.eip : - (void *)regs->eip; - force_sig_info(SIGTRAP, &info, tsk); - - /* Disable additional traps. They'll be re-enabled when - * the signal is delivered. - */ - clear_dr7: - HYPERVISOR_set_debugreg(7, 0); - return; - - clear_TF: - regs->eflags &= ~TF_MASK; - return; -} - - -/* - * Note that we play around with the 'TS' bit in an attempt to get - * the correct behaviour even in the presence of the asynchronous - * IRQ13 behaviour - */ -void math_error(void *eip) -{ - struct task_struct * task; - siginfo_t info; - unsigned short cwd, swd; - - /* - * Save the info for the exception handler and clear the error. - */ - task = current; - save_init_fpu(task); - task->thread.trap_no = 16; - task->thread.error_code = 0; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_code = __SI_FAULT; - info.si_addr = eip; - /* - * (~cwd & swd) will mask out exceptions that are not set to unmasked - * status. 0x3f is the exception bits in these regs, 0x200 is the - * C1 reg you need in case of a stack fault, 0x040 is the stack - * fault bit. We should only be taking one exception at a time, - * so if this combination doesn't produce any single exception, - * then we have a bad program that isn't syncronizing its FPU usage - * and it will suffer the consequences since we won't be able to - * fully reproduce the context of the exception - */ - cwd = get_fpu_cwd(task); - swd = get_fpu_swd(task); - switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) { - case 0x000: - default: - break; - case 0x001: /* Invalid Op */ - case 0x040: /* Stack Fault */ - case 0x240: /* Stack Fault | Direction */ - info.si_code = FPE_FLTINV; - break; - case 0x002: /* Denormalize */ - case 0x010: /* Underflow */ - info.si_code = FPE_FLTUND; - break; - case 0x004: /* Zero Divide */ - info.si_code = FPE_FLTDIV; - break; - case 0x008: /* Overflow */ - info.si_code = FPE_FLTOVF; - break; - case 0x020: /* Precision */ - info.si_code = FPE_FLTRES; - break; - } - force_sig_info(SIGFPE, &info, task); -} - -asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code) -{ - ignore_irq13 = 1; - math_error((void *)regs->eip); -} - -void simd_math_error(void *eip) -{ - struct task_struct * task; - siginfo_t info; - unsigned short mxcsr; - - /* - * Save the info for the exception handler and clear the error. - */ - task = current; - save_init_fpu(task); - task->thread.trap_no = 19; - task->thread.error_code = 0; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_code = __SI_FAULT; - info.si_addr = eip; - /* - * The SIMD FPU exceptions are handled a little differently, as there - * is only a single status/control register. Thus, to determine which - * unmasked exception was caught we must mask the exception mask bits - * at 0x1f80, and then use these to mask the exception bits at 0x3f. - */ - mxcsr = get_fpu_mxcsr(task); - switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { - case 0x000: - default: - break; - case 0x001: /* Invalid Op */ - info.si_code = FPE_FLTINV; - break; - case 0x002: /* Denormalize */ - case 0x010: /* Underflow */ - info.si_code = FPE_FLTUND; - break; - case 0x004: /* Zero Divide */ - info.si_code = FPE_FLTDIV; - break; - case 0x008: /* Overflow */ - info.si_code = FPE_FLTOVF; - break; - case 0x020: /* Precision */ - info.si_code = FPE_FLTRES; - break; - } - force_sig_info(SIGFPE, &info, task); -} - -asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs, - long error_code) -{ - if (cpu_has_xmm) { - /* Handle SIMD FPU exceptions on PIII+ processors. */ - ignore_irq13 = 1; - simd_math_error((void *)regs->eip); - } else { - die_if_kernel("cache flush denied", regs, error_code); - current->thread.trap_no = 19; - current->thread.error_code = error_code; - force_sig(SIGSEGV, current); - } -} - -asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs, - long error_code) -{ -} - -/* - * 'math_state_restore()' saves the current math information in the - * old math state array, and gets the new ones from the current task - * - * Careful.. There are problems with IBM-designed IRQ13 behaviour. - * Don't touch unless you *really* know how it works. - */ -asmlinkage void math_state_restore(struct pt_regs regs) -{ - if (current->used_math) { - restore_fpu(current); - } else { - init_fpu(); - } - current->flags |= PF_USEDFPU; /* So we fnsave on switch_to() */ -} - - -#define _set_gate(gate_addr,type,dpl,addr) \ -do { \ - int __d0, __d1; \ - __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \ - "movw %4,%%dx\n\t" \ - "movl %%eax,%0\n\t" \ - "movl %%edx,%1" \ - :"=m" (*((long *) (gate_addr))), \ - "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \ - :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \ - "3" ((char *) (addr)),"2" (__KERNEL_CS << 16)); \ -} while (0) - -static void __init set_call_gate(void *a, void *addr) -{ - _set_gate(a,12,3,addr); -} - - -/* NB. All these are "trap gates" (i.e. events_mask isn't cleared). */ -static trap_info_t trap_table[] = { - { 0, 0, __KERNEL_CS, (unsigned long)divide_error }, - { 1, 0, __KERNEL_CS, (unsigned long)debug }, - { 3, 3, __KERNEL_CS, (unsigned long)int3 }, - { 4, 3, __KERNEL_CS, (unsigned long)overflow }, - { 5, 3, __KERNEL_CS, (unsigned long)bounds }, - { 6, 0, __KERNEL_CS, (unsigned long)invalid_op }, - { 7, 0, __KERNEL_CS, (unsigned long)device_not_available }, - { 8, 0, __KERNEL_CS, (unsigned long)double_fault }, - { 9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun }, - { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS }, - { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present }, - { 12, 0, __KERNEL_CS, (unsigned long)stack_segment }, - { 13, 0, __KERNEL_CS, (unsigned long)general_protection }, - { 14, 0, __KERNEL_CS, (unsigned long)page_fault }, - { 15, 0, __KERNEL_CS, (unsigned long)spurious_interrupt_bug }, - { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error }, - { 17, 0, __KERNEL_CS, (unsigned long)alignment_check }, - { 18, 0, __KERNEL_CS, (unsigned long)machine_check }, - { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error }, - { SYSCALL_VECTOR, - 3, __KERNEL_CS, (unsigned long)system_call }, - { 0, 0, 0, 0 } -}; - - -void __init trap_init(void) -{ - HYPERVISOR_set_trap_table(trap_table); - HYPERVISOR_set_fast_trap(SYSCALL_VECTOR); - - /* - * The default LDT is a single-entry callgate to lcall7 for iBCS and a - * callgate to lcall27 for Solaris/x86 binaries. - */ - clear_page(&default_ldt[0]); - set_call_gate(&default_ldt[0],lcall7); - set_call_gate(&default_ldt[4],lcall27); - __make_page_readonly(&default_ldt[0]); - - cpu_init(); -} - - -/* - * install_safe_pf_handler / install_normal_pf_handler: - * - * These are used within the failsafe_callback handler in entry.S to avoid - * taking a full page fault when reloading FS and GS. This is because FS and - * GS could be invalid at pretty much any point while Xenolinux executes (we - * don't set them to safe values on entry to the kernel). At *any* point Xen - * may be entered due to a hardware interrupt --- on exit from Xen an invalid - * FS/GS will cause our failsafe_callback to be executed. This could occur, - * for example, while the mmmu_update_queue is in an inconsistent state. This - * is disastrous because the normal page-fault handler touches the update - * queue! - * - * Fortunately, within the failsafe handler it is safe to force DS/ES/FS/GS - * to zero if they cannot be reloaded -- at this point executing a normal - * page fault would not change this effect. The safe page-fault handler - * ensures this end result (blow away the selector value) without the dangers - * of the normal page-fault handler. - * - * NB. Perhaps this can all go away after we have implemented writeable - * page tables. :-) - */ - -asmlinkage void do_safe_page_fault(struct pt_regs *regs, - unsigned long error_code, - unsigned long address) -{ - unsigned long fixup; - - if ( (fixup = search_exception_table(regs->eip)) != 0 ) - { - regs->eip = fixup; - return; - } - - die("Unhandleable 'safe' page fault!", regs, error_code); -} - -unsigned long install_safe_pf_handler(void) -{ - static trap_info_t safe_pf[] = { - { 14, 0, __KERNEL_CS, (unsigned long)safe_page_fault }, - { 0, 0, 0, 0 } - }; - unsigned long flags; - local_irq_save(flags); - HYPERVISOR_set_trap_table(safe_pf); - return flags; /* This is returned in %%eax */ -} - -__attribute__((regparm(3))) /* This function take its arg in %%eax */ -void install_normal_pf_handler(unsigned long flags) -{ - static trap_info_t normal_pf[] = { - { 14, 0, __KERNEL_CS, (unsigned long)page_fault }, - { 0, 0, 0, 0 } - }; - HYPERVISOR_set_trap_table(normal_pf); - local_irq_restore(flags); -} diff --git a/xenolinux-2.4.25-sparse/arch/xeno/lib/Makefile b/xenolinux-2.4.25-sparse/arch/xeno/lib/Makefile deleted file mode 100644 index 3bbf40148e..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/lib/Makefile +++ /dev/null @@ -1,15 +0,0 @@ - -.S.o: - $(CC) $(AFLAGS) -c $< -o $*.o - -L_TARGET = lib.a - -obj-y = checksum.o old-checksum.o delay.o \ - usercopy.o getuser.o \ - memcpy.o strstr.o xeno_proc.o - -obj-$(CONFIG_X86_USE_3DNOW) += mmx.o -obj-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o -obj-$(CONFIG_DEBUG_IOVIRT) += iodebug.o - -include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xeno/lib/delay.c b/xenolinux-2.4.25-sparse/arch/xeno/lib/delay.c deleted file mode 100644 index 0035bed074..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/lib/delay.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Precise Delay Loops for i386 - * - * Copyright (C) 1993 Linus Torvalds - * Copyright (C) 1997 Martin Mares - * - * The __delay function must _NOT_ be inlined as its execution time - * depends wildly on alignment on many x86 processors. The additional - * jump magic is needed to get the timing stable on all the CPU's - * we have to worry about. - */ - -#include -#include -#include -#include -#include - -#ifdef CONFIG_SMP -#include -#endif - -void __delay(unsigned long loops) -{ - unsigned long bclock, now; - - rdtscl(bclock); - do - { - rep_nop(); - rdtscl(now); - } while ((now-bclock) < loops); -} - -inline void __const_udelay(unsigned long xloops) -{ - int d0; - __asm__("mull %0" - :"=d" (xloops), "=&a" (d0) - :"1" (xloops),"0" (current_cpu_data.loops_per_jiffy)); - __delay(xloops * HZ); -} - -void __udelay(unsigned long usecs) -{ - __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ -} - -void __ndelay(unsigned long nsecs) -{ - __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ -} diff --git a/xenolinux-2.4.25-sparse/arch/xeno/lib/xeno_proc.c b/xenolinux-2.4.25-sparse/arch/xeno/lib/xeno_proc.c deleted file mode 100644 index 2eca39d6aa..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/lib/xeno_proc.c +++ /dev/null @@ -1,18 +0,0 @@ - -#include -#include - -static struct proc_dir_entry *xeno_base; - -struct proc_dir_entry *create_xeno_proc_entry(const char *name, mode_t mode) -{ - if ( xeno_base == NULL ) - if ( (xeno_base = proc_mkdir("xen", &proc_root)) == NULL ) - panic("Couldn't create /proc/xen"); - return create_proc_entry(name, mode, xeno_base); -} - -void remove_xeno_proc_entry(const char *name) -{ - remove_proc_entry(name, xeno_base); -} diff --git a/xenolinux-2.4.25-sparse/arch/xeno/mm/Makefile b/xenolinux-2.4.25-sparse/arch/xeno/mm/Makefile deleted file mode 100644 index d0d16114b6..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/mm/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -# -# Makefile for the linux i386-specific parts of the memory manager. -# -# Note! Dependencies are done automagically by 'make dep', which also -# removes any old dependencies. DON'T put your own dependencies here -# unless it's something special (ie not a .c file). -# -# Note 2! The CFLAGS definition is now in the main makefile... - -O_TARGET := mm.o - -obj-y := init.o fault.o extable.o pageattr.o hypervisor.o ioremap.o - -export-objs := pageattr.o - -include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xeno/mm/fault.c b/xenolinux-2.4.25-sparse/arch/xeno/mm/fault.c deleted file mode 100644 index d1c34d93d7..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/mm/fault.c +++ /dev/null @@ -1,326 +0,0 @@ -/* - * linux/arch/i386/mm/fault.c - * - * Copyright (C) 1995 Linus Torvalds - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* For unblank_screen() */ - -#include -#include -#include -#include - -extern void die(const char *,struct pt_regs *,long); - -pgd_t *cur_pgd; - -extern spinlock_t timerlist_lock; - -/* - * Unlock any spinlocks which will prevent us from getting the - * message out (timerlist_lock is acquired through the - * console unblank code) - */ -void bust_spinlocks(int yes) -{ - spin_lock_init(&timerlist_lock); - if (yes) { - oops_in_progress = 1; - } else { - int loglevel_save = console_loglevel; -#ifdef CONFIG_VT - unblank_screen(); -#endif - oops_in_progress = 0; - /* - * OK, the message is on the console. Now we call printk() - * without oops_in_progress set so that printk will give klogd - * a poke. Hold onto your hats... - */ - console_loglevel = 15; /* NMI oopser may have shut the console up */ - printk(" "); - console_loglevel = loglevel_save; - } -} - -/* - * This routine handles page faults. It determines the address, - * and the problem, and then passes it off to one of the appropriate - * routines. - * - * error_code: - * bit 0 == 0 means no page found, 1 means protection fault - * bit 1 == 0 means read, 1 means write - * bit 2 == 0 means kernel, 1 means user-mode - */ -asmlinkage void do_page_fault(struct pt_regs *regs, - unsigned long error_code, - unsigned long address) -{ - struct task_struct *tsk = current; - struct mm_struct *mm; - struct vm_area_struct * vma; - unsigned long page; - unsigned long fixup; - int write; - siginfo_t info; - - /* Set the "privileged fault" bit to something sane. */ - error_code &= 3; - error_code |= (regs->xcs & 2) << 1; - -#if MMU_UPDATE_DEBUG > 0 - if ( (error_code == 0) && (address >= TASK_SIZE) ) - { - unsigned long paddr = __pa(address); - int i; - for ( i = 0; i < mmu_update_queue_idx; i++ ) - { - if ( update_debug_queue[i].ptr == paddr ) - { - printk("XXX now(EIP=%08lx:ptr=%08lx) " - "then(%s/%d:p/v=%08lx/%08lx)\n", - regs->eip, address, - update_debug_queue[i].file, - update_debug_queue[i].line, - update_debug_queue[i].ptr, - update_debug_queue[i].val); - } - } - } -#endif - - if ( flush_page_update_queue() != 0 ) return; - - /* - * We fault-in kernel-space virtual memory on-demand. The - * 'reference' page table is init_mm.pgd. - * - * NOTE! We MUST NOT take any locks for this case. We may - * be in an interrupt or a critical region, and should - * only copy the information from the master page table, - * nothing more. - * - * This verifies that the fault happens in kernel space - * (error_code & 4) == 0, and that the fault was not a - * protection error (error_code & 1) == 0. - */ - if (address >= TASK_SIZE && !(error_code & 5)) - goto vmalloc_fault; - - mm = tsk->mm; - info.si_code = SEGV_MAPERR; - - /* - * If we're in an interrupt or have no user - * context, we must not take the fault.. - */ - if (in_interrupt() || !mm) - goto no_context; - - down_read(&mm->mmap_sem); - - vma = find_vma(mm, address); - if (!vma) - goto bad_area; - if (vma->vm_start <= address) - goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; - if (error_code & 4) { - /* - * accessing the stack below %esp is always a bug. - * The "+ 32" is there due to some instructions (like - * pusha) doing post-decrement on the stack and that - * doesn't show up until later.. - */ - if (address + 32 < regs->esp) - goto bad_area; - } - if (expand_stack(vma, address)) - goto bad_area; -/* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. - */ -good_area: - info.si_code = SEGV_ACCERR; - write = 0; - switch (error_code & 3) { - default: /* 3: write, present */ - /* fall through */ - case 2: /* write, not present */ - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - write++; - break; - case 1: /* read, present */ - goto bad_area; - case 0: /* read, not present */ - if (!(vma->vm_flags & (VM_READ | VM_EXEC))) - goto bad_area; - } - - survive: - /* - * If for any reason at all we couldn't handle the fault, - * make sure we exit gracefully rather than endlessly redo - * the fault. - */ - switch (handle_mm_fault(mm, vma, address, write)) { - case 1: - tsk->min_flt++; - break; - case 2: - tsk->maj_flt++; - break; - case 0: - goto do_sigbus; - default: - goto out_of_memory; - } - - up_read(&mm->mmap_sem); - return; - -/* - * Something tried to access memory that isn't in our memory map.. - * Fix it, but check if it's kernel or user first.. - */ -bad_area: - up_read(&mm->mmap_sem); - - /* User mode accesses just cause a SIGSEGV */ - if (error_code & 4) { - tsk->thread.cr2 = address; - /* Kernel addresses are always protection faults */ - tsk->thread.error_code = error_code | (address >= TASK_SIZE); - tsk->thread.trap_no = 14; - info.si_signo = SIGSEGV; - info.si_errno = 0; - /* info.si_code has been set above */ - info.si_addr = (void *)address; - force_sig_info(SIGSEGV, &info, tsk); - return; - } - -no_context: - /* Are we prepared to handle this kernel fault? */ - if ((fixup = search_exception_table(regs->eip)) != 0) { - regs->eip = fixup; - return; - } - -/* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ - - bust_spinlocks(1); - - if (address < PAGE_SIZE) - printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); - else - printk(KERN_ALERT "Unable to handle kernel paging request"); - printk(" at virtual address %08lx\n",address); - printk(" printing eip:\n"); - printk("%08lx\n", regs->eip); - page = ((unsigned long *) cur_pgd)[address >> 22]; - printk(KERN_ALERT "*pde=%08lx(%08lx)\n", page, machine_to_phys(page)); - if (page & 1) { - page &= PAGE_MASK; - address &= 0x003ff000; - page = machine_to_phys(page); - page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; - printk(KERN_ALERT "*pte=%08lx(%08lx)\n", page, - machine_to_phys(page)); - } - die("Oops", regs, error_code); - bust_spinlocks(0); - do_exit(SIGKILL); - -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ -out_of_memory: - if (tsk->pid == 1) { - yield(); - goto survive; - } - up_read(&mm->mmap_sem); - printk("VM: killing process %s\n", tsk->comm); - if (error_code & 4) - do_exit(SIGKILL); - goto no_context; - -do_sigbus: - up_read(&mm->mmap_sem); - - /* - * Send a sigbus, regardless of whether we were in kernel - * or user mode. - */ - tsk->thread.cr2 = address; - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 14; - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void *)address; - force_sig_info(SIGBUS, &info, tsk); - - /* Kernel mode? Handle exceptions or die */ - if (!(error_code & 4)) - goto no_context; - return; - -vmalloc_fault: - { - /* - * Synchronize this task's top level page-table - * with the 'reference' page table. - * - * Do _not_ use "tsk" here. We might be inside - * an interrupt in the middle of a task switch.. - */ - int offset = __pgd_offset(address); - pgd_t *pgd, *pgd_k; - pmd_t *pmd, *pmd_k; - pte_t *pte_k; - - pgd = offset + cur_pgd; - pgd_k = init_mm.pgd + offset; - - if (!pgd_present(*pgd_k)) - goto no_context; - set_pgd(pgd, *pgd_k); - - pmd = pmd_offset(pgd, address); - pmd_k = pmd_offset(pgd_k, address); - if (!pmd_present(*pmd_k)) - goto no_context; - set_pmd(pmd, *pmd_k); - XENO_flush_page_update_queue(); /* flush PMD update */ - - pte_k = pte_offset(pmd_k, address); - if (!pte_present(*pte_k)) - goto no_context; - return; - } -} diff --git a/xenolinux-2.4.25-sparse/arch/xeno/mm/hypervisor.c b/xenolinux-2.4.25-sparse/arch/xeno/mm/hypervisor.c deleted file mode 100644 index 94592d63f1..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/mm/hypervisor.c +++ /dev/null @@ -1,243 +0,0 @@ -/****************************************************************************** - * xeno/mm/hypervisor.c - * - * Update page tables via the hypervisor. - * - * Copyright (c) 2002, K A Fraser - */ - -#include -#include -#include -#include -#include -#include - -/* - * This suffices to protect us if we ever move to SMP domains. - * Further, it protects us against interrupts. At the very least, this is - * required for the network driver which flushes the update queue before - * pushing new receive buffers. - */ -static spinlock_t update_lock = SPIN_LOCK_UNLOCKED; - -#define QUEUE_SIZE 2048 -static mmu_update_t update_queue[QUEUE_SIZE]; -unsigned int mmu_update_queue_idx = 0; -#define idx mmu_update_queue_idx - -#if MMU_UPDATE_DEBUG > 0 -page_update_debug_t update_debug_queue[QUEUE_SIZE] = {{0}}; -#undef queue_l1_entry_update -#undef queue_l2_entry_update -static void DEBUG_allow_pt_reads(void) -{ - pte_t *pte; - mmu_update_t update; - int i; - for ( i = idx-1; i >= 0; i-- ) - { - pte = update_debug_queue[i].ptep; - if ( pte == NULL ) continue; - update_debug_queue[i].ptep = NULL; - update.ptr = virt_to_machine(pte); - update.val = update_debug_queue[i].pteval; - HYPERVISOR_mmu_update(&update, 1); - } -} -static void DEBUG_disallow_pt_read(unsigned long va) -{ - pte_t *pte; - pmd_t *pmd; - pgd_t *pgd; - unsigned long pteval; - /* - * We may fault because of an already outstanding update. - * That's okay -- it'll get fixed up in the fault handler. - */ - mmu_update_t update; - pgd = pgd_offset_k(va); - pmd = pmd_offset(pgd, va); - pte = pte_offset(pmd, va); - update.ptr = virt_to_machine(pte); - pteval = *(unsigned long *)pte; - update.val = pteval & ~_PAGE_PRESENT; - HYPERVISOR_mmu_update(&update, 1); - update_debug_queue[idx].ptep = pte; - update_debug_queue[idx].pteval = pteval; -} -#endif - -#if MMU_UPDATE_DEBUG > 1 -#undef queue_pt_switch -#undef queue_tlb_flush -#undef queue_invlpg -#undef queue_pgd_pin -#undef queue_pgd_unpin -#undef queue_pte_pin -#undef queue_pte_unpin -#endif - - -/* - * MULTICALL_flush_page_update_queue: - * This is a version of the flush which queues as part of a multicall. - */ -void MULTICALL_flush_page_update_queue(void) -{ - unsigned long flags; - unsigned int _idx; - spin_lock_irqsave(&update_lock, flags); - if ( (_idx = idx) != 0 ) - { -#if MMU_UPDATE_DEBUG > 1 - printk("Flushing %d entries from pt update queue\n", idx); -#endif -#if MMU_UPDATE_DEBUG > 0 - DEBUG_allow_pt_reads(); -#endif - idx = 0; - wmb(); /* Make sure index is cleared first to avoid double updates. */ - queue_multicall2(__HYPERVISOR_mmu_update, - (unsigned long)update_queue, - _idx); - } - spin_unlock_irqrestore(&update_lock, flags); -} - -static inline void __flush_page_update_queue(void) -{ - unsigned int _idx = idx; -#if MMU_UPDATE_DEBUG > 1 - printk("Flushing %d entries from pt update queue\n", idx); -#endif -#if MMU_UPDATE_DEBUG > 0 - DEBUG_allow_pt_reads(); -#endif - idx = 0; - wmb(); /* Make sure index is cleared first to avoid double updates. */ - HYPERVISOR_mmu_update(update_queue, _idx); -} - -void _flush_page_update_queue(void) -{ - unsigned long flags; - spin_lock_irqsave(&update_lock, flags); - if ( idx != 0 ) __flush_page_update_queue(); - spin_unlock_irqrestore(&update_lock, flags); -} - -static inline void increment_index(void) -{ - idx++; - if ( unlikely(idx == QUEUE_SIZE) ) __flush_page_update_queue(); -} - -void queue_l1_entry_update(pte_t *ptr, unsigned long val) -{ - unsigned long flags; - spin_lock_irqsave(&update_lock, flags); -#if MMU_UPDATE_DEBUG > 0 - DEBUG_disallow_pt_read((unsigned long)ptr); -#endif - update_queue[idx].ptr = virt_to_machine(ptr); - update_queue[idx].val = val; - increment_index(); - spin_unlock_irqrestore(&update_lock, flags); -} - -void queue_l2_entry_update(pmd_t *ptr, unsigned long val) -{ - unsigned long flags; - spin_lock_irqsave(&update_lock, flags); - update_queue[idx].ptr = virt_to_machine(ptr); - update_queue[idx].val = val; - increment_index(); - spin_unlock_irqrestore(&update_lock, flags); -} - -void queue_pt_switch(unsigned long ptr) -{ - unsigned long flags; - spin_lock_irqsave(&update_lock, flags); - update_queue[idx].ptr = phys_to_machine(ptr); - update_queue[idx].ptr |= MMU_EXTENDED_COMMAND; - update_queue[idx].val = MMUEXT_NEW_BASEPTR; - increment_index(); - spin_unlock_irqrestore(&update_lock, flags); -} - -void queue_tlb_flush(void) -{ - unsigned long flags; - spin_lock_irqsave(&update_lock, flags); - update_queue[idx].ptr = MMU_EXTENDED_COMMAND; - update_queue[idx].val = MMUEXT_TLB_FLUSH; - increment_index(); - spin_unlock_irqrestore(&update_lock, flags); -} - -void queue_invlpg(unsigned long ptr) -{ - unsigned long flags; - spin_lock_irqsave(&update_lock, flags); - update_queue[idx].ptr = MMU_EXTENDED_COMMAND; - update_queue[idx].val = ptr & PAGE_MASK; - update_queue[idx].val |= MMUEXT_INVLPG; - increment_index(); - spin_unlock_irqrestore(&update_lock, flags); -} - -void queue_pgd_pin(unsigned long ptr) -{ - unsigned long flags; - spin_lock_irqsave(&update_lock, flags); - update_queue[idx].ptr = phys_to_machine(ptr); - update_queue[idx].ptr |= MMU_EXTENDED_COMMAND; - update_queue[idx].val = MMUEXT_PIN_L2_TABLE; - increment_index(); - spin_unlock_irqrestore(&update_lock, flags); -} - -void queue_pgd_unpin(unsigned long ptr) -{ - unsigned long flags; - spin_lock_irqsave(&update_lock, flags); - update_queue[idx].ptr = phys_to_machine(ptr); - update_queue[idx].ptr |= MMU_EXTENDED_COMMAND; - update_queue[idx].val = MMUEXT_UNPIN_TABLE; - increment_index(); - spin_unlock_irqrestore(&update_lock, flags); -} - -void queue_pte_pin(unsigned long ptr) -{ - unsigned long flags; - spin_lock_irqsave(&update_lock, flags); - update_queue[idx].ptr = phys_to_machine(ptr); - update_queue[idx].ptr |= MMU_EXTENDED_COMMAND; - update_queue[idx].val = MMUEXT_PIN_L1_TABLE; - increment_index(); - spin_unlock_irqrestore(&update_lock, flags); -} - -void queue_pte_unpin(unsigned long ptr) -{ - unsigned long flags; - spin_lock_irqsave(&update_lock, flags); - update_queue[idx].ptr = phys_to_machine(ptr); - update_queue[idx].ptr |= MMU_EXTENDED_COMMAND; - update_queue[idx].val = MMUEXT_UNPIN_TABLE; - increment_index(); - spin_unlock_irqrestore(&update_lock, flags); -} - -void queue_set_ldt(unsigned long ptr, unsigned long len) -{ - unsigned long flags; - spin_lock_irqsave(&update_lock, flags); - update_queue[idx].ptr = MMU_EXTENDED_COMMAND | ptr; - update_queue[idx].val = MMUEXT_SET_LDT | (len << MMUEXT_CMD_SHIFT); - increment_index(); - spin_unlock_irqrestore(&update_lock, flags); -} diff --git a/xenolinux-2.4.25-sparse/arch/xeno/mm/init.c b/xenolinux-2.4.25-sparse/arch/xeno/mm/init.c deleted file mode 100644 index c78a84d1c8..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/mm/init.c +++ /dev/null @@ -1,390 +0,0 @@ -/* - * linux/arch/i386/mm/init.c - * - * Copyright (C) 1995 Linus Torvalds - * - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_BLK_DEV_INITRD -#include -#endif -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -mmu_gather_t mmu_gathers[NR_CPUS]; -unsigned long highstart_pfn, highend_pfn; -static unsigned long totalram_pages; -static unsigned long totalhigh_pages; - -int do_check_pgt_cache(int low, int high) -{ - int freed = 0; - if(pgtable_cache_size > high) { - do { - if (!QUICKLIST_EMPTY(pgd_quicklist)) { - free_pgd_slow(get_pgd_fast()); - freed++; - } - if (!QUICKLIST_EMPTY(pte_quicklist)) { - pte_free_slow(pte_alloc_one_fast(NULL, 0)); - freed++; - } - } while(pgtable_cache_size > low); - } - return freed; -} - -void show_mem(void) -{ - int i, total = 0, reserved = 0; - int shared = 0, cached = 0; - int highmem = 0; - - printk("Mem-info:\n"); - show_free_areas(); - printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); - i = max_mapnr; - while (i-- > 0) { - total++; - if (PageHighMem(mem_map+i)) - highmem++; - if (PageReserved(mem_map+i)) - reserved++; - else if (PageSwapCache(mem_map+i)) - cached++; - else if (page_count(mem_map+i)) - shared += page_count(mem_map+i) - 1; - } - printk("%d pages of RAM\n", total); - printk("%d pages of HIGHMEM\n",highmem); - printk("%d reserved pages\n",reserved); - printk("%d pages shared\n",shared); - printk("%d pages swap cached\n",cached); - printk("%ld pages in page table cache\n",pgtable_cache_size); - show_buffers(); -} - -/* References to section boundaries */ - -extern char _text, _etext, _edata, __bss_start, _end; -extern char __init_begin, __init_end; - -static inline void set_pte_phys (unsigned long vaddr, - unsigned long phys, pgprot_t prot) -{ - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - - pgd = init_mm.pgd + __pgd_offset(vaddr); - if (pgd_none(*pgd)) { - printk("PAE BUG #00!\n"); - return; - } - pmd = pmd_offset(pgd, vaddr); - if (pmd_none(*pmd)) { - printk("PAE BUG #01!\n"); - return; - } - pte = pte_offset(pmd, vaddr); - - queue_l1_entry_update(pte, phys | pgprot_val(prot)); - - /* - * It's enough to flush this one mapping. - * (PGE mappings get flushed as well) - */ - __flush_tlb_one(vaddr); -} - -void __set_fixmap(enum fixed_addresses idx, unsigned long phys, - pgprot_t flags) -{ - unsigned long address = __fix_to_virt(idx); - - if (idx >= __end_of_fixed_addresses) { - printk("Invalid __set_fixmap\n"); - return; - } - set_pte_phys(address, phys, flags); -} - -void clear_fixmap(enum fixed_addresses idx) -{ - set_pte_phys(__fix_to_virt(idx), 0, __pgprot(0)); -} - -static void __init fixrange_init (unsigned long start, - unsigned long end, pgd_t *pgd_base) -{ - pgd_t *pgd, *kpgd; - pmd_t *pmd, *kpmd; - pte_t *pte, *kpte; - int i, j; - unsigned long vaddr; - - vaddr = start; - i = __pgd_offset(vaddr); - j = __pmd_offset(vaddr); - pgd = pgd_base + i; - - for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { -#if CONFIG_X86_PAE - if (pgd_none(*pgd)) { - pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pgd(pgd, __pgd(__pa(pmd) + 0x1)); - if (pmd != pmd_offset(pgd, 0)) - printk("PAE BUG #02!\n"); - } - pmd = pmd_offset(pgd, vaddr); -#else - pmd = (pmd_t *)pgd; -#endif - for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { - if (pmd_none(*pmd)) { - pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - clear_page(pte); - kpgd = pgd_offset_k((unsigned long)pte); - kpmd = pmd_offset(kpgd, (unsigned long)pte); - kpte = pte_offset(kpmd, (unsigned long)pte); - queue_l1_entry_update(kpte, - (*(unsigned long *)kpte)&~_PAGE_RW); - - set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte))); - } - vaddr += PMD_SIZE; - } - j = 0; - } - - XENO_flush_page_update_queue(); -} - - -static void __init zone_sizes_init(void) -{ - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; - unsigned int max_dma, high, low; - - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - low = max_low_pfn; - high = highend_pfn; - - if (low < max_dma) - zones_size[ZONE_DMA] = low; - else { - zones_size[ZONE_DMA] = max_dma; - zones_size[ZONE_NORMAL] = low - max_dma; -#ifdef CONFIG_HIGHMEM - zones_size[ZONE_HIGHMEM] = high - low; -#endif - } - free_area_init(zones_size); -} - -/* - * paging_init() sets up the page tables - note that the first 8MB are - * already mapped by head.S. - * - * This routines also unmaps the page at virtual kernel address 0, so - * that we can trap those pesky NULL-reference errors in the kernel. - */ -void __init paging_init(void) -{ - unsigned long vaddr; - - zone_sizes_init(); - - /* - * Fixed mappings, only the page table structure has to be created - - * mappings will be set by set_fixmap(): - */ - vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - fixrange_init(vaddr, HYPERVISOR_VIRT_START, init_mm.pgd); - - /* Switch to the real shared_info page, and clear the dummy page. */ - set_fixmap(FIX_SHARED_INFO, start_info.shared_info); - HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); - memset(empty_zero_page, 0, sizeof(empty_zero_page)); - -#ifdef CONFIG_HIGHMEM -#error - kmap_init(); -#endif -} - -static inline int page_is_ram (unsigned long pagenr) -{ - return 1; -} - -#ifdef CONFIG_HIGHMEM -void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) -{ - if (!page_is_ram(pfn)) { - SetPageReserved(page); - return; - } - - if (bad_ppro && page_kills_ppro(pfn)) { - SetPageReserved(page); - return; - } - - ClearPageReserved(page); - set_bit(PG_highmem, &page->flags); - atomic_set(&page->count, 1); - __free_page(page); - totalhigh_pages++; -} -#endif /* CONFIG_HIGHMEM */ - -static void __init set_max_mapnr_init(void) -{ -#ifdef CONFIG_HIGHMEM - highmem_start_page = mem_map + highstart_pfn; - max_mapnr = num_physpages = highend_pfn; - num_mappedpages = max_low_pfn; -#else - max_mapnr = num_mappedpages = num_physpages = max_low_pfn; -#endif -} - -static int __init free_pages_init(void) -{ -#ifdef CONFIG_HIGHMEM -#error Where is this supposed to be initialised? - int bad_ppro; -#endif - int reservedpages, pfn; - - /* this will put all low memory onto the freelists */ - totalram_pages += free_all_bootmem(); - - reservedpages = 0; - for (pfn = 0; pfn < max_low_pfn; pfn++) { - /* - * Only count reserved RAM pages - */ - if (page_is_ram(pfn) && PageReserved(mem_map+pfn)) - reservedpages++; - } -#ifdef CONFIG_HIGHMEM - for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--) - one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro); - totalram_pages += totalhigh_pages; -#endif - return reservedpages; -} - -void __init mem_init(void) -{ - int codesize, reservedpages, datasize, initsize; - - if (!mem_map) - BUG(); - - set_max_mapnr_init(); - - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - - /* clear the zero-page */ - memset(empty_zero_page, 0, PAGE_SIZE); - - reservedpages = free_pages_init(); - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", - (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - max_mapnr << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) - ); - - boot_cpu_data.wp_works_ok = 1; -} - -void free_initmem(void) -{ - unsigned long addr; - - addr = (unsigned long)(&__init_begin); - for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - set_page_count(virt_to_page(addr), 1); - free_page(addr); - totalram_pages++; - } - printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10); -} - -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - if (start < end) - printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - set_page_count(virt_to_page(start), 1); - free_page(start); - totalram_pages++; - } -} -#endif - -void si_meminfo(struct sysinfo *val) -{ - val->totalram = totalram_pages; - val->sharedram = 0; - val->freeram = nr_free_pages(); - val->bufferram = atomic_read(&buffermem_pages); - val->totalhigh = totalhigh_pages; - val->freehigh = nr_free_highpages(); - val->mem_unit = PAGE_SIZE; - return; -} - -#if defined(CONFIG_X86_PAE) -struct kmem_cache_s *pae_pgd_cachep; -void __init pgtable_cache_init(void) -{ - /* - * PAE pgds must be 16-byte aligned: - */ - pae_pgd_cachep = kmem_cache_create("pae_pgd", 32, 0, - SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, NULL, NULL); - if (!pae_pgd_cachep) - panic("init_pae(): Cannot alloc pae_pgd SLAB cache"); -} -#endif /* CONFIG_X86_PAE */ diff --git a/xenolinux-2.4.25-sparse/arch/xeno/mm/ioremap.c b/xenolinux-2.4.25-sparse/arch/xeno/mm/ioremap.c deleted file mode 100644 index 434e82c971..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/mm/ioremap.c +++ /dev/null @@ -1,226 +0,0 @@ -/* - * arch/xeno/mm/ioremap.c - * - * Re-map IO memory to kernel address space so that we can access it. - * - * (C) Copyright 1995 1996 Linus Torvalds - * - * Modifications for Xenolinux (c) 2003 Keir Fraser - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if defined(CONFIG_XENO_PRIV) - -/* These hacky macros avoid phys->machine translations. */ -#define __direct_pte(x) ((pte_t) { (x) } ) -#define __direct_mk_pte(page_nr,pgprot) \ - __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) -#define direct_mk_pte_phys(physpage, pgprot) \ - __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot) - -static inline void direct_remap_area_pte(pte_t *pte, - unsigned long address, - unsigned long size, - unsigned long machine_addr, - pgprot_t prot) -{ - unsigned long end; - - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - if (address >= end) - BUG(); - do { - if (!pte_none(*pte)) { - printk("direct_remap_area_pte: page already exists\n"); - BUG(); - } - set_pte(pte, pte_mkio(direct_mk_pte_phys(machine_addr, prot))); - address += PAGE_SIZE; - machine_addr += PAGE_SIZE; - pte++; - } while (address && (address < end)); -} - -static inline int direct_remap_area_pmd(struct mm_struct *mm, - pmd_t *pmd, - unsigned long address, - unsigned long size, - unsigned long machine_addr, - pgprot_t prot) -{ - unsigned long end; - - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - machine_addr -= address; - if (address >= end) - BUG(); - do { - pte_t * pte = pte_alloc(mm, pmd, address); - if (!pte) - return -ENOMEM; - direct_remap_area_pte(pte, address, end - address, - address + machine_addr, prot); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); - return 0; -} - -int direct_remap_area_pages(struct mm_struct *mm, - unsigned long address, - unsigned long machine_addr, - unsigned long size, - pgprot_t prot) -{ - int error = 0; - pgd_t * dir; - unsigned long end = address + size; - - machine_addr -= address; - dir = pgd_offset(mm, address); - flush_cache_all(); - if (address >= end) - BUG(); - spin_lock(&mm->page_table_lock); - do { - pmd_t *pmd = pmd_alloc(mm, dir, address); - error = -ENOMEM; - if (!pmd) - break; - error = direct_remap_area_pmd(mm, pmd, address, end - address, - machine_addr + address, prot); - if (error) - break; - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (address && (address < end)); - spin_unlock(&mm->page_table_lock); - flush_tlb_all(); - return error; -} - -#endif /* CONFIG_XENO_PRIV */ - - -/* - * Remap an arbitrary machine address space into the kernel virtual - * address space. Needed when a privileged instance of Xenolinux wants - * to access space outside its world directly. - * - * NOTE! We need to allow non-page-aligned mappings too: we will obviously - * have to convert them into an offset in a page-aligned mapping, but the - * caller shouldn't need to know that small detail. - */ -void * __ioremap(unsigned long machine_addr, - unsigned long size, - unsigned long flags) -{ -#if defined(CONFIG_XENO_PRIV) - void * addr; - struct vm_struct * area; - unsigned long offset, last_addr; - pgprot_t prot; - - /* Only privileged Xenolinux can make unchecked pagetable updates. */ - if ( !(start_info.flags & SIF_PRIVILEGED) ) - return NULL; - - /* Don't allow wraparound or zero size */ - last_addr = machine_addr + size - 1; - if (!size || last_addr < machine_addr) - return NULL; - - /* Mappings have to be page-aligned */ - offset = machine_addr & ~PAGE_MASK; - machine_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr+1) - machine_addr; - - /* Ok, go for it */ - area = get_vm_area(size, VM_IOREMAP); - if (!area) - return NULL; - addr = area->addr; - prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | - _PAGE_ACCESSED | flags); - if (direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(addr), - machine_addr, size, prot)) { - vfree(addr); - return NULL; - } - return (void *) (offset + (char *)addr); -#else - return NULL; -#endif -} - -void iounmap(void *addr) -{ - vfree((void *)((unsigned long)addr & PAGE_MASK)); -} - -/* implementation of boot time ioremap for purpose of provising access -to the vga console for privileged domains. Unlike boot time ioremap on -other architectures, ours is permanent and not reclaimed when then vmalloc -infrastructure is started */ - -void __init *bt_ioremap(unsigned long machine_addr, unsigned long size) -{ - unsigned long offset, last_addr; - unsigned int nrpages; - enum fixed_addresses idx; - - /* Don't allow wraparound or zero size */ - last_addr = machine_addr + size - 1; - if (!size || last_addr < machine_addr) - return NULL; - - /* - * Mappings have to be page-aligned - */ - offset = machine_addr & ~PAGE_MASK; - machine_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr) - machine_addr; - - /* - * Mappings have to fit in the FIX_BTMAP area. - */ - nrpages = size >> PAGE_SHIFT; - if (nrpages > NR_FIX_BTMAPS) - return NULL; - - /* - * Ok, go for it.. - */ - idx = FIX_BTMAP_BEGIN; - while (nrpages > 0) { - __set_fixmap(idx, machine_addr, - __pgprot(__PAGE_KERNEL|_PAGE_IO)); - machine_addr += PAGE_SIZE; - --idx; - --nrpages; - } - - flush_tlb_all(); - - return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN)); -} - - -#if 0 /* We don't support these functions. They shouldn't be required. */ -void __init bt_iounmap(void *addr, unsigned long size) {} -#endif diff --git a/xenolinux-2.4.25-sparse/arch/xeno/vmlinux.lds b/xenolinux-2.4.25-sparse/arch/xeno/vmlinux.lds deleted file mode 100644 index 7c4c4f8e9c..0000000000 --- a/xenolinux-2.4.25-sparse/arch/xeno/vmlinux.lds +++ /dev/null @@ -1,82 +0,0 @@ -/* ld script to make i386 Linux kernel - * Written by Martin Mares ; - */ -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(_start) -SECTIONS -{ - . = 0xC0000000 + 0x000000; - _text = .; /* Text and read-only data */ - .text : { - *(.text) - *(.fixup) - *(.gnu.warning) - } = 0x9090 - - _etext = .; /* End of text section */ - - .rodata : { *(.rodata) *(.rodata.*) } - .kstrtab : { *(.kstrtab) } - - . = ALIGN(16); /* Exception table */ - __start___ex_table = .; - __ex_table : { *(__ex_table) } - __stop___ex_table = .; - - __start___ksymtab = .; /* Kernel symbol table */ - __ksymtab : { *(__ksymtab) } - __stop___ksymtab = .; - - .data : { /* Data */ - *(.data) - CONSTRUCTORS - } - - _edata = .; /* End of data section */ - - . = ALIGN(8192); /* init_task */ - .data.init_task : { *(.data.init_task) } - - . = ALIGN(4096); /* Init code and data */ - __init_begin = .; - .text.init : { *(.text.init) } - .data.init : { *(.data.init) } - . = ALIGN(16); - __setup_start = .; - .setup.init : { *(.setup.init) } - __setup_end = .; - __initcall_start = .; - .initcall.init : { *(.initcall.init) } - __initcall_end = .; - . = ALIGN(4096); - __init_end = .; - - . = ALIGN(4096); - .data.page_aligned : { *(.data.idt) } - - . = ALIGN(32); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } - - __bss_start = .; /* BSS */ - .bss : { - *(.bss) - } - _end = . ; - - /* Sections to be discarded */ - /DISCARD/ : { - *(.text.exit) - *(.data.exit) - *(.exitcall.exit) - } - - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } -} diff --git a/xenolinux-2.4.25-sparse/drivers/block/ll_rw_blk.c b/xenolinux-2.4.25-sparse/drivers/block/ll_rw_blk.c index 419a050b25..8092d5fe7d 100644 --- a/xenolinux-2.4.25-sparse/drivers/block/ll_rw_blk.c +++ b/xenolinux-2.4.25-sparse/drivers/block/ll_rw_blk.c @@ -1557,7 +1557,7 @@ int __init blk_dev_init(void) #ifdef CONFIG_BLK_DEV_FD floppy_init(); #else -#if defined(__i386__) && !defined(CONFIG_XENO) /* Do we even need this? */ +#if defined(__i386__) && !defined(CONFIG_XEN) /* Do we even need this? */ outb_p(0xc, 0x3f2); #endif #endif @@ -1617,7 +1617,7 @@ int __init blk_dev_init(void) jsfd_init(); #endif -#ifdef CONFIG_XENOLINUX_BLOCK +#ifdef CONFIG_XEN_VBD xlblk_init(); #endif diff --git a/xenolinux-2.4.25-sparse/drivers/char/mem.c b/xenolinux-2.4.25-sparse/drivers/char/mem.c index 73e6d48713..dbc10d6382 100644 --- a/xenolinux-2.4.25-sparse/drivers/char/mem.c +++ b/xenolinux-2.4.25-sparse/drivers/char/mem.c @@ -7,8 +7,8 @@ * Jan-11-1998, C. Scott Ananian * Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar * - * MODIFIED FOR XENOLINUX by Keir Fraser, 10th July 2003. - * Xenolinux has strange semantics for /dev/mem and /dev/kmem!! + * MODIFIED FOR XEN by Keir Fraser, 10th July 2003. + * Linux running on Xen has strange semantics for /dev/mem and /dev/kmem!! * 1. mmap will not work on /dev/kmem * 2. mmap on /dev/mem interprets the 'file offset' as a machine address * rather than a physical address. @@ -201,18 +201,18 @@ static int mmap_mem(struct file * file, struct vm_area_struct * vma) { unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; -#if defined(CONFIG_XENO) && defined(CONFIG_XENO_PRIV) +#if defined(CONFIG_XEN) && defined(CONFIG_XEN_PRIVILEGED_GUEST) if (!(start_info.flags & SIF_PRIVILEGED)) return -ENXIO; - /* DONTCOPY is essential for Xenolinux as copy_page_range is broken. */ + /* DONTCOPY is essential for Xen as copy_page_range is broken. */ vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); if (direct_remap_area_pages(vma->vm_mm, vma->vm_start, offset, vma->vm_end-vma->vm_start, vma->vm_page_prot)) return -EAGAIN; return 0; -#elif defined(CONFIG_XENO) +#elif defined(CONFIG_XEN) return -ENXIO; #else /* @@ -426,7 +426,7 @@ static inline size_t read_zero_pagealigned(char * buf, size_t size) goto out_up; if (vma->vm_flags & VM_SHARED) break; -#if defined(CONFIG_XENO_PRIV) +#if defined(CONFIG_XEN_PRIVILEGED_GUEST) if (vma->vm_flags & VM_IO) break; #endif @@ -615,7 +615,7 @@ static int mmap_kmem(struct file * file, struct vm_area_struct * vma) unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; unsigned long size = vma->vm_end - vma->vm_start; -#if defined(CONFIG_XENO) +#if defined(CONFIG_XEN) return -ENXIO; #endif @@ -715,8 +715,8 @@ static int memory_open(struct inode * inode, struct file * filp) break; #if defined(CONFIG_ISA) || !defined(__mc68000__) case 4: -#if defined(CONFIG_XENO) -#if defined(CONFIG_XENO_PRIV) +#if defined(CONFIG_XEN) +#if defined(CONFIG_XEN_PRIVILEGED_GUEST) if (!(start_info.flags & SIF_PRIVILEGED)) #endif return -ENXIO; diff --git a/xenolinux-2.4.25-sparse/fs/exec.c b/xenolinux-2.4.25-sparse/fs/exec.c index 2548d1d1f8..16dc3193bf 100644 --- a/xenolinux-2.4.25-sparse/fs/exec.c +++ b/xenolinux-2.4.25-sparse/fs/exec.c @@ -311,7 +311,7 @@ void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long a if (vma) prot = vma->vm_page_prot; set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, prot)))); - XENO_flush_page_update_queue(); + XEN_flush_page_update_queue(); tsk->mm->rss++; spin_unlock(&tsk->mm->page_table_lock); diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/bugs.h b/xenolinux-2.4.25-sparse/include/asm-xen/bugs.h new file mode 100644 index 0000000000..c46b6a0b15 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/bugs.h @@ -0,0 +1,53 @@ +/* + * include/asm-i386/bugs.h + * + * Copyright (C) 1994 Linus Torvalds + * + * Cyrix stuff, June 1998 by: + * - Rafael R. Reilova (moved everything from head.S), + * + * - Channing Corn (tests & fixes), + * - Andrew D. Balsa (code cleanup). + * + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + */ + +/* + * This is included by init/main.c to check for architecture-dependent bugs. + * + * Needs: + * void check_bugs(void); + */ + +#include +#include +#include +#include + + +static void __init check_fpu(void) +{ + boot_cpu_data.fdiv_bug = 0; +} + +static void __init check_hlt(void) +{ + boot_cpu_data.hlt_works_ok = 1; +} + +static void __init check_bugs(void) +{ + extern void __init boot_init_fpu(void); + + identify_cpu(&boot_cpu_data); + boot_init_fpu(); +#ifndef CONFIG_SMP + printk("CPU: "); + print_cpu_info(&boot_cpu_data); +#endif + check_fpu(); + check_hlt(); + system_utsname.machine[1] = '0' + + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); +} diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/control_if.h b/xenolinux-2.4.25-sparse/include/asm-xen/control_if.h new file mode 100644 index 0000000000..dd15a96bff --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/control_if.h @@ -0,0 +1,32 @@ +/****************************************************************************** + * control_if.h + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __CONTROL_IF_H__ +#define __CONTROL_IF_H__ + +typedef struct { + u8 type; /* echoed in response */ + u8 subtype; /* echoed in response */ + u8 id; /* echoed in response */ + u8 length; /* number of bytes in 'msg' */ + unsigned char msg[60]; /* type-specific message data */ +} control_msg_t; + +#define CONTROL_RING_SIZE 8 +typedef unsigned int CONTROL_RING_IDX; +#define MASK_CONTROL_IDX(_i) ((_i)&(CONTROL_RING_SIZE-1)) + +typedef struct { + control_msg_t tx_ring[CONTROL_RING_SIZE]; /* guest-OS -> controller */ + control_msg_t rx_ring[CONTROL_RING_SIZE]; /* controller -> guest-OS */ + CONTROL_RING_IDX tx_req_prod, tx_resp_prod; + CONTROL_RING_IDX rx_req_prod, rx_resp_prod; +} control_if_t; + +#define CMSG_CONSOLE 0 +#define CMSG_CONSOLE_DATA 0 + +#endif /* __CONTROL_IF_H__ */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/desc.h b/xenolinux-2.4.25-sparse/include/asm-xen/desc.h new file mode 100644 index 0000000000..33309a9671 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/desc.h @@ -0,0 +1,41 @@ +#ifndef __ARCH_DESC_H +#define __ARCH_DESC_H + +#include + +#ifndef __ASSEMBLY__ + +struct desc_struct { + unsigned long a,b; +}; + +struct Xgt_desc_struct { + unsigned short size; + unsigned long address __attribute__((packed)); +}; + +extern struct desc_struct default_ldt[]; + +static inline void clear_LDT(void) +{ + /* + * NB. We load the default_ldt for lcall7/27 handling on demand, as + * it slows down context switching. Noone uses it anyway. + */ + queue_set_ldt(0, 0); +} + +static inline void load_LDT(mm_context_t *pc) +{ + void *segments = pc->ldt; + int count = pc->size; + + if ( count == 0 ) + segments = NULL; + + queue_set_ldt((unsigned long)segments, count); +} + +#endif /* __ASSEMBLY__ */ + +#endif /* __ARCH_DESC_H__ */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/evtchn.h b/xenolinux-2.4.25-sparse/include/asm-xen/evtchn.h new file mode 100644 index 0000000000..88c278d86e --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/evtchn.h @@ -0,0 +1,29 @@ +/****************************************************************************** + * evtchn.h + * + * Driver for receiving and demuxing event-channel signals. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __ASM_EVTCHN_H__ +#define __ASM_EVTCHN_H__ + +typedef void (*evtchn_receiver_t)(unsigned int); +#define PORT_NORMAL 0x0000 +#define PORT_DISCONNECT 0x8000 +#define PORTIDX_MASK 0x7fff + +/* /dev/xen/evtchn resides at device number major=10, minor=200 */ +#define EVTCHN_MINOR 200 + +/* /dev/xen/evtchn ioctls: */ +/* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */ +#define EVTCHN_RESET _IO('E', 1) + +int evtchn_request_port(unsigned int port, evtchn_receiver_t rx_fn); +int evtchn_free_port(unsigned int port); +void evtchn_clear_port(unsigned int port); + + +#endif /* __ASM_EVTCHN_H__ */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/fixmap.h b/xenolinux-2.4.25-sparse/include/asm-xen/fixmap.h new file mode 100644 index 0000000000..2441b01d4e --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/fixmap.h @@ -0,0 +1,101 @@ +/* + * fixmap.h: compile-time virtual memory allocation + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998 Ingo Molnar + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + */ + +#ifndef _ASM_FIXMAP_H +#define _ASM_FIXMAP_H + +#include +#include +#include +#include + +/* + * Here we define all the compile-time 'special' virtual + * addresses. The point is to have a constant address at + * compile time, but to set the physical address only + * in the boot process. We allocate these special addresses + * from the end of virtual memory (0xfffff000) backwards. + * Also this lets us do fail-safe vmalloc(), we + * can guarantee that these special addresses and + * vmalloc()-ed addresses never overlap. + * + * these 'compile-time allocated' memory buffers are + * fixed-size 4k pages. (or larger if used with an increment + * highger than 1) use fixmap_set(idx,phys) to associate + * physical memory with fixmap indices. + * + * TLB entries of such buffers will not be flushed across + * task switches. + */ + +enum fixed_addresses { +#ifdef CONFIG_HIGHMEM_XXX + FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, +#endif + FIX_BLKRING_BASE, + FIX_NETRING0_BASE, + FIX_NETRING1_BASE, + FIX_NETRING2_BASE, + FIX_NETRING3_BASE, + FIX_SHARED_INFO, + +#ifdef CONFIG_VGA_CONSOLE +#define NR_FIX_BTMAPS 32 /* 128KB For the Dom0 VGA Console A0000-C0000 */ +#else +#define NR_FIX_BTMAPS 1 /* in case anyone wants it in future... */ +#endif + FIX_BTMAP_END, + FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1, + /* our bt_ioremap is permanent, unlike other architectures */ + + __end_of_permanent_fixed_addresses, + __end_of_fixed_addresses = __end_of_permanent_fixed_addresses +}; + +extern void __set_fixmap (enum fixed_addresses idx, + unsigned long phys, pgprot_t flags); + +#define set_fixmap(idx, phys) \ + __set_fixmap(idx, phys, PAGE_KERNEL) +/* + * Some hardware wants to get fixmapped without caching. + */ +#define set_fixmap_nocache(idx, phys) \ + __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) + +extern void clear_fixmap(enum fixed_addresses idx); + +/* + * used by vmalloc.c. + * + * Leave one empty page between vmalloc'ed areas and + * the start of the fixmap, and leave one page empty + * at the top of mem.. + */ +#define FIXADDR_TOP (HYPERVISOR_VIRT_START - 2*PAGE_SIZE) +#define __FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) + +#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) + +/* + * 'index to address' translation. If anyone tries to use the idx + * directly without tranlation, we catch the bug with a NULL-deference + * kernel oops. Illegal ranges of incoming indices are caught too. + */ +static inline unsigned long fix_to_virt(unsigned int idx) +{ + return __fix_to_virt(idx); +} + +#endif diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/highmem.h b/xenolinux-2.4.25-sparse/include/asm-xen/highmem.h new file mode 100644 index 0000000000..7e56b1b32d --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/highmem.h @@ -0,0 +1,2 @@ +#error "Highmem unsupported!" + diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/hw_irq.h b/xenolinux-2.4.25-sparse/include/asm-xen/hw_irq.h new file mode 100644 index 0000000000..d99d15bd24 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/hw_irq.h @@ -0,0 +1,61 @@ +#ifndef _ASM_HW_IRQ_H +#define _ASM_HW_IRQ_H + +/* + * linux/include/asm/hw_irq.h + * + * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar + */ + +#include +#include +#include +#include + +#define SYSCALL_VECTOR 0x80 + +extern int irq_vector[NR_IRQS]; + +extern atomic_t irq_err_count; +extern atomic_t irq_mis_count; + +extern char _stext, _etext; + +extern unsigned long prof_cpu_mask; +extern unsigned int * prof_buffer; +extern unsigned long prof_len; +extern unsigned long prof_shift; + +/* + * x86 profiling function, SMP safe. We might want to do this in + * assembly totally? + */ +static inline void x86_do_profile (unsigned long eip) +{ + if (!prof_buffer) + return; + + /* + * Only measure the CPUs specified by /proc/irq/prof_cpu_mask. + * (default is all CPUs.) + */ + if (!((1<>= prof_shift; + /* + * Don't ignore out-of-bounds EIP values silently, + * put them into the last histogram slot, so if + * present, they will show up as a sharp peak. + */ + if (eip > prof_len-1) + eip = prof_len-1; + atomic_inc((atomic_t *)&prof_buffer[eip]); +} + +static inline void hw_resend_irq(struct hw_interrupt_type *h, + unsigned int i) +{} + +#endif /* _ASM_HW_IRQ_H */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/hypervisor.h b/xenolinux-2.4.25-sparse/include/asm-xen/hypervisor.h new file mode 100644 index 0000000000..34d0974471 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/hypervisor.h @@ -0,0 +1,465 @@ +/****************************************************************************** + * hypervisor.h + * + * Linux-specific hypervisor handling. + * + * Copyright (c) 2002, K A Fraser + */ + +#ifndef __HYPERVISOR_H__ +#define __HYPERVISOR_H__ + +#include +#include +#include +#include +#include +#include + +/* arch/xen/kernel/setup.c */ +union start_info_union +{ + start_info_t start_info; + char padding[512]; +}; +extern union start_info_union start_info_union; +#define start_info (start_info_union.start_info) + +/* arch/xen/kernel/hypervisor.c */ +void do_hypervisor_callback(struct pt_regs *regs); + + +/* arch/xen/mm/hypervisor.c */ +/* + * NB. ptr values should be PHYSICAL, not MACHINE. 'vals' should be already + * be MACHINE addresses. + */ + +extern unsigned int mmu_update_queue_idx; + +void queue_l1_entry_update(pte_t *ptr, unsigned long val); +void queue_l2_entry_update(pmd_t *ptr, unsigned long val); +void queue_pt_switch(unsigned long ptr); +void queue_tlb_flush(void); +void queue_invlpg(unsigned long ptr); +void queue_pgd_pin(unsigned long ptr); +void queue_pgd_unpin(unsigned long ptr); +void queue_pte_pin(unsigned long ptr); +void queue_pte_unpin(unsigned long ptr); +void queue_set_ldt(unsigned long ptr, unsigned long bytes); +#define MMU_UPDATE_DEBUG 0 + +#if MMU_UPDATE_DEBUG > 0 +typedef struct { + void *ptr; + unsigned long val, pteval; + void *ptep; + int line; char *file; +} page_update_debug_t; +extern page_update_debug_t update_debug_queue[]; +#define queue_l1_entry_update(_p,_v) ({ \ + update_debug_queue[mmu_update_queue_idx].ptr = (_p); \ + update_debug_queue[mmu_update_queue_idx].val = (_v); \ + update_debug_queue[mmu_update_queue_idx].line = __LINE__; \ + update_debug_queue[mmu_update_queue_idx].file = __FILE__; \ + queue_l1_entry_update((_p),(_v)); \ +}) +#define queue_l2_entry_update(_p,_v) ({ \ + update_debug_queue[mmu_update_queue_idx].ptr = (_p); \ + update_debug_queue[mmu_update_queue_idx].val = (_v); \ + update_debug_queue[mmu_update_queue_idx].line = __LINE__; \ + update_debug_queue[mmu_update_queue_idx].file = __FILE__; \ + queue_l2_entry_update((_p),(_v)); \ +}) +#endif + +#if MMU_UPDATE_DEBUG > 1 +#undef queue_l1_entry_update +#undef queue_l2_entry_update +#define queue_l1_entry_update(_p,_v) ({ \ + update_debug_queue[mmu_update_queue_idx].ptr = (_p); \ + update_debug_queue[mmu_update_queue_idx].val = (_v); \ + update_debug_queue[mmu_update_queue_idx].line = __LINE__; \ + update_debug_queue[mmu_update_queue_idx].file = __FILE__; \ + printk("L1 %s %d: %08lx (%08lx -> %08lx)\n", __FILE__, __LINE__, \ + (_p), pte_val(_p), \ + (unsigned long)(_v)); \ + queue_l1_entry_update((_p),(_v)); \ +}) +#define queue_l2_entry_update(_p,_v) ({ \ + update_debug_queue[mmu_update_queue_idx].ptr = (_p); \ + update_debug_queue[mmu_update_queue_idx].val = (_v); \ + update_debug_queue[mmu_update_queue_idx].line = __LINE__; \ + update_debug_queue[mmu_update_queue_idx].file = __FILE__; \ + printk("L2 %s %d: %08lx (%08lx -> %08lx)\n", __FILE__, __LINE__, \ + (_p), pmd_val(_p), \ + (unsigned long)(_v)); \ + queue_l2_entry_update((_p),(_v)); \ +}) +#define queue_pt_switch(_p) ({ \ + printk("PTSWITCH %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \ + queue_pt_switch(_p); \ +}) +#define queue_tlb_flush() ({ \ + printk("TLB FLUSH %s %d\n", __FILE__, __LINE__); \ + queue_tlb_flush(); \ +}) +#define queue_invlpg(_p) ({ \ + printk("INVLPG %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \ + queue_invlpg(_p); \ +}) +#define queue_pgd_pin(_p) ({ \ + printk("PGD PIN %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \ + queue_pgd_pin(_p); \ +}) +#define queue_pgd_unpin(_p) ({ \ + printk("PGD UNPIN %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \ + queue_pgd_unpin(_p); \ +}) +#define queue_pte_pin(_p) ({ \ + printk("PTE PIN %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \ + queue_pte_pin(_p); \ +}) +#define queue_pte_unpin(_p) ({ \ + printk("PTE UNPIN %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \ + queue_pte_unpin(_p); \ +}) +#define queue_set_ldt(_p,_l) ({ \ + printk("SETL LDT %s %d: %08lx %d\n", __FILE__, __LINE__, (_p), (_l)); \ + queue_set_ldt((_p), (_l)); \ +}) +#endif + +void _flush_page_update_queue(void); +static inline int flush_page_update_queue(void) +{ + unsigned int idx = mmu_update_queue_idx; + if ( idx != 0 ) _flush_page_update_queue(); + return idx; +} +#define XEN_flush_page_update_queue() (_flush_page_update_queue()) +void MULTICALL_flush_page_update_queue(void); + + +/* + * Assembler stubs for hyper-calls. + */ + +static inline int HYPERVISOR_set_trap_table(trap_info_t *table) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_set_trap_table), + "b" (table) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_mmu_update(mmu_update_t *req, int count) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_mmu_update), + "b" (req), "c" (count) : "memory" ); + + if ( unlikely(ret < 0) ) + { + extern void show_trace(unsigned long *); + show_trace(NULL); + panic("Failed mmu update: %p, %d", req, count); + } + + return ret; +} + +static inline int HYPERVISOR_set_gdt(unsigned long *frame_list, int entries) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_set_gdt), + "b" (frame_list), "c" (entries) : "memory" ); + + + return ret; +} + +static inline int HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_stack_switch), + "b" (ss), "c" (esp) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_set_callbacks( + unsigned long event_selector, unsigned long event_address, + unsigned long failsafe_selector, unsigned long failsafe_address) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_set_callbacks), + "b" (event_selector), "c" (event_address), + "d" (failsafe_selector), "S" (failsafe_address) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_net_io_op(netop_t *op) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_net_io_op), + "b" (op) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_fpu_taskswitch(void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_fpu_taskswitch) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_yield(void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_sched_op), + "b" (SCHEDOP_yield) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_block(void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_sched_op), + "b" (SCHEDOP_block) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_exit(void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_sched_op), + "b" (SCHEDOP_exit) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_stop(unsigned long srec) +{ + int ret; + /* NB. On suspend, control software expects a suspend record in %esi. */ + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_sched_op), + "b" (SCHEDOP_stop), "S" (srec) : "memory" ); + + return ret; +} + +static inline long HYPERVISOR_set_timer_op(u64 timeout) +{ + int ret; + unsigned long timeout_hi = (unsigned long)(timeout>>32); + unsigned long timeout_lo = (unsigned long)timeout; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_set_timer_op), + "b" (timeout_hi), "c" (timeout_lo) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_dom0_op(dom0_op_t *dom0_op) +{ + int ret; + dom0_op->interface_version = DOM0_INTERFACE_VERSION; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_dom0_op), + "b" (dom0_op) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_network_op(void *network_op) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_network_op), + "b" (network_op) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_block_io_op(void *block_io_op) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_block_io_op), + "b" (block_io_op) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_set_debugreg(int reg, unsigned long value) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_set_debugreg), + "b" (reg), "c" (value) : "memory" ); + + return ret; +} + +static inline unsigned long HYPERVISOR_get_debugreg(int reg) +{ + unsigned long ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_get_debugreg), + "b" (reg) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_update_descriptor( + unsigned long pa, unsigned long word1, unsigned long word2) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_update_descriptor), + "b" (pa), "c" (word1), "d" (word2) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_set_fast_trap(int idx) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_set_fast_trap), + "b" (idx) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_dom_mem_op(void *dom_mem_op) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_dom_mem_op), + "b" (dom_mem_op) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_multicall(void *call_list, int nr_calls) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_multicall), + "b" (call_list), "c" (nr_calls) : "memory" ); + + return ret; +} + +static inline long HYPERVISOR_kbd_op(unsigned char op, unsigned char val) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_kbd_op), + "b" (op), "c" (val) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_update_va_mapping( + unsigned long page_nr, pte_t new_val, unsigned long flags) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_update_va_mapping), + "b" (page_nr), "c" ((new_val).pte_low), "d" (flags) : "memory" ); + + if ( unlikely(ret < 0) ) + panic("Failed update VA mapping: %08lx, %08lx, %08lx", + page_nr, (new_val).pte_low, flags); + + return ret; +} + +static inline int HYPERVISOR_event_channel_op(void *op) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_event_channel_op), + "b" (op) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_xen_version(int cmd) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_xen_version), + "b" (cmd) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_console_io(int cmd, int count, char *str) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_console_io), + "b" (cmd), "c" (count), "d" (str) : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_physdev_op(void *physdev_op) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_physdev_op), + "b" (physdev_op) : "memory" ); + + return ret; +} + +#endif /* __HYPERVISOR_H__ */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/io.h b/xenolinux-2.4.25-sparse/include/asm-xen/io.h new file mode 100644 index 0000000000..3d78e20950 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/io.h @@ -0,0 +1,430 @@ +#ifndef _ASM_IO_H +#define _ASM_IO_H + +#include + +/* + * This file contains the definitions for the x86 IO instructions + * inb/inw/inl/outb/outw/outl and the "string versions" of the same + * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" + * versions of the single-IO instructions (inb_p/inw_p/..). + * + * This file is not meant to be obfuscating: it's just complicated + * to (a) handle it all in a way that makes gcc able to optimize it + * as well as possible and (b) trying to avoid writing the same thing + * over and over again with slight variations and possibly making a + * mistake somewhere. + */ + +/* + * Thanks to James van Artsdalen for a better timing-fix than + * the two short jumps: using outb's to a nonexistent port seems + * to guarantee better timings even on fast machines. + * + * On the other hand, I'd like to be sure of a non-existent port: + * I feel a bit unsafe about using 0x80 (should be safe, though) + * + * Linus + */ + + /* + * Bit simplified and optimized by Jan Hubicka + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. + * + * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, + * isa_read[wl] and isa_write[wl] fixed + * - Arnaldo Carvalho de Melo + */ + +#define IO_SPACE_LIMIT 0xffff + +#define XQUAD_PORTIO_BASE 0xfe400000 +#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ +#define XQUAD_PORTIO_LEN 0x80000 /* Only remapping first 2 quads */ + +#ifdef __KERNEL__ + +#include + +/* + * Temporary debugging check to catch old code using + * unmapped ISA addresses. Will be removed in 2.4. + */ +#if CONFIG_DEBUG_IOVIRT + extern void *__io_virt_debug(unsigned long x, const char *file, int line); + extern unsigned long __io_phys_debug(unsigned long x, const char *file, int line); + #define __io_virt(x) __io_virt_debug((unsigned long)(x), __FILE__, __LINE__) +//#define __io_phys(x) __io_phys_debug((unsigned long)(x), __FILE__, __LINE__) +#else + #define __io_virt(x) ((void *)(x)) +//#define __io_phys(x) __pa(x) +#endif + +/** + * virt_to_phys - map virtual addresses to physical + * @address: address to remap + * + * The returned physical address is the physical (CPU) mapping for + * the memory address given. It is only valid to use this function on + * addresses directly mapped or allocated via kmalloc. + * + * This function does not give bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ + +static inline unsigned long virt_to_phys(volatile void * address) +{ + return __pa(address); +} + +/** + * phys_to_virt - map physical address to virtual + * @address: address to remap + * + * The returned virtual address is a current CPU mapping for + * the memory address given. It is only valid to use this function on + * addresses that have a kernel mapping + * + * This function does not handle bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ + +static inline void * phys_to_virt(unsigned long address) +{ + return __va(address); +} + +/* + * Change "struct page" to physical address. + */ +#ifdef CONFIG_HIGHMEM64G +#define page_to_phys(page) ((u64)(page - mem_map) << PAGE_SHIFT) +#else +#define page_to_phys(page) ((page - mem_map) << PAGE_SHIFT) +#endif + +extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); + +/** + * ioremap - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + */ + +static inline void * ioremap (unsigned long offset, unsigned long size) +{ + return __ioremap(offset, size, 0); +} + +/** + * ioremap_nocache - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap_nocache performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * This version of ioremap ensures that the memory is marked uncachable + * on the CPU as well as honouring existing caching rules from things like + * the PCI bus. Note that there are other caches and buffers on many + * busses. In paticular driver authors should read up on PCI writes + * + * It's useful if some control registers are in such an area and + * write combining or read caching is not desirable: + */ + +static inline void * ioremap_nocache (unsigned long offset, unsigned long size) +{ + return __ioremap(offset, size, _PAGE_PCD); +} + +extern void iounmap(void *addr); + +/* + * bt_ioremap() and bt_iounmap() are for temporary early boot-time + * mappings, before the real ioremap() is functional. + * A boot-time mapping is currently limited to at most 16 pages. + */ +extern void *bt_ioremap(unsigned long offset, unsigned long size); +extern void bt_iounmap(void *addr, unsigned long size); + +/* + * IO bus memory addresses are also 1:1 with the physical address + */ +#define virt_to_bus(_x) phys_to_machine(virt_to_phys(_x)) +#define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x)) +#define page_to_bus(_x) phys_to_machine(page_to_phys(_x)) + +/* + * readX/writeX() are used to access memory mapped devices. On some + * architectures the memory mapped IO stuff needs to be accessed + * differently. On the x86 architecture, we just read/write the + * memory location directly. + */ + +#define readb(addr) (*(volatile unsigned char *) __io_virt(addr)) +#define readw(addr) (*(volatile unsigned short *) __io_virt(addr)) +#define readl(addr) (*(volatile unsigned int *) __io_virt(addr)) +#define __raw_readb readb +#define __raw_readw readw +#define __raw_readl readl + +#define writeb(b,addr) (*(volatile unsigned char *) __io_virt(addr) = (b)) +#define writew(b,addr) (*(volatile unsigned short *) __io_virt(addr) = (b)) +#define writel(b,addr) (*(volatile unsigned int *) __io_virt(addr) = (b)) +#define __raw_writeb writeb +#define __raw_writew writew +#define __raw_writel writel + +#define memset_io(a,b,c) __memset(__io_virt(a),(b),(c)) +#define memcpy_fromio(a,b,c) __memcpy((a),__io_virt(b),(c)) +#define memcpy_toio(a,b,c) __memcpy(__io_virt(a),(b),(c)) + +/* + * ISA space is 'always mapped' on a typical x86 system, no need to + * explicitly ioremap() it. The fact that the ISA IO space is mapped + * to PAGE_OFFSET is pure coincidence - it does not mean ISA values + * are physical addresses. The following constant pointer can be + * used as the IO-area pointer (it can be iounmapped as well, so the + * analogy with PCI is quite large): + */ +#define __ISA_IO_base ((char *)(PAGE_OFFSET)) + +#define isa_readb(a) readb(__ISA_IO_base + (a)) +#define isa_readw(a) readw(__ISA_IO_base + (a)) +#define isa_readl(a) readl(__ISA_IO_base + (a)) +#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a)) +#define isa_writew(w,a) writew(w,__ISA_IO_base + (a)) +#define isa_writel(l,a) writel(l,__ISA_IO_base + (a)) +#define isa_memset_io(a,b,c) memset_io(__ISA_IO_base + (a),(b),(c)) +#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),__ISA_IO_base + (b),(c)) +#define isa_memcpy_toio(a,b,c) memcpy_toio(__ISA_IO_base + (a),(b),(c)) + + +/* + * Again, i386 does not require mem IO specific function. + */ + +#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(b),(c),(d)) +#define isa_eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(__ISA_IO_base + (b)),(c),(d)) + +/** + * check_signature - find BIOS signatures + * @io_addr: mmio address to check + * @signature: signature block + * @length: length of signature + * + * Perform a signature comparison with the mmio address io_addr. This + * address should have been obtained by ioremap. + * Returns 1 on a match. + */ + +static inline int check_signature(unsigned long io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + do { + if (readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + +/** + * isa_check_signature - find BIOS signatures + * @io_addr: mmio address to check + * @signature: signature block + * @length: length of signature + * + * Perform a signature comparison with the ISA mmio address io_addr. + * Returns 1 on a match. + * + * This function is deprecated. New drivers should use ioremap and + * check_signature. + */ + + +static inline int isa_check_signature(unsigned long io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + do { + if (isa_readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + +/* + * Cache management + * + * This needed for two cases + * 1. Out of order aware processors + * 2. Accidentally out of order processors (PPro errata #51) + */ + +#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) + +static inline void flush_write_buffers(void) +{ + __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory"); +} + +#define dma_cache_inv(_start,_size) flush_write_buffers() +#define dma_cache_wback(_start,_size) flush_write_buffers() +#define dma_cache_wback_inv(_start,_size) flush_write_buffers() + +#else + +/* Nothing to do */ + +#define dma_cache_inv(_start,_size) do { } while (0) +#define dma_cache_wback(_start,_size) do { } while (0) +#define dma_cache_wback_inv(_start,_size) do { } while (0) +#define flush_write_buffers() + +#endif + +#endif /* __KERNEL__ */ + +#ifdef SLOW_IO_BY_JUMPING +#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:" +#else +#define __SLOW_DOWN_IO "\noutb %%al,$0x80" +#endif + +#ifdef REALLY_SLOW_IO +#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO +#else +#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO +#endif + +#ifdef CONFIG_MULTIQUAD +extern void *xquad_portio; /* Where the IO area was mapped */ +#endif /* CONFIG_MULTIQUAD */ + +/* + * Talk about misusing macros.. + */ +#define __OUT1(s,x) \ +static inline void out##s(unsigned x value, unsigned short port) { + +#define __OUT2(s,s1,s2) \ +__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" + +#if defined (CONFIG_MULTIQUAD) && !defined(STANDALONE) +#define __OUTQ(s,ss,x) /* Do the equivalent of the portio op on quads */ \ +static inline void out##ss(unsigned x value, unsigned short port) { \ + if (xquad_portio) \ + write##s(value, (unsigned long) xquad_portio + port); \ + else /* We're still in early boot, running on quad 0 */ \ + out##ss##_local(value, port); \ +} \ +static inline void out##ss##_quad(unsigned x value, unsigned short port, int quad) { \ + if (xquad_portio) \ + write##s(value, (unsigned long) xquad_portio + (XQUAD_PORTIO_QUAD*quad)\ + + port); \ +} + +#define __INQ(s,ss) /* Do the equivalent of the portio op on quads */ \ +static inline RETURN_TYPE in##ss(unsigned short port) { \ + if (xquad_portio) \ + return read##s((unsigned long) xquad_portio + port); \ + else /* We're still in early boot, running on quad 0 */ \ + return in##ss##_local(port); \ +} \ +static inline RETURN_TYPE in##ss##_quad(unsigned short port, int quad) { \ + if (xquad_portio) \ + return read##s((unsigned long) xquad_portio + (XQUAD_PORTIO_QUAD*quad)\ + + port); \ + else\ + return 0;\ +} +#endif /* CONFIG_MULTIQUAD && !STANDALONE */ + +#if !defined(CONFIG_MULTIQUAD) || defined(STANDALONE) +#define __OUT(s,s1,x) \ +__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ +__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} +#else +/* Make the default portio routines operate on quad 0 */ +#define __OUT(s,s1,x) \ +__OUT1(s##_local,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ +__OUT1(s##_p_local,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \ +__OUTQ(s,s,x) \ +__OUTQ(s,s##_p,x) +#endif /* !CONFIG_MULTIQUAD || STANDALONE */ + +#define __IN1(s) \ +static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; + +#define __IN2(s,s1,s2) \ +__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" + +#if !defined(CONFIG_MULTIQUAD) || defined(STANDALONE) +#define __IN(s,s1,i...) \ +__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } +#else +/* Make the default portio routines operate on quad 0 */ +#define __IN(s,s1,i...) \ +__IN1(s##_local) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +__IN1(s##_p_local) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +__INQ(s,s) \ +__INQ(s,s##_p) +#endif /* !CONFIG_MULTIQUAD || STANDALONE */ + +#define __INS(s) \ +static inline void ins##s(unsigned short port, void * addr, unsigned long count) \ +{ __asm__ __volatile__ ("rep ; ins" #s \ +: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } + +#define __OUTS(s) \ +static inline void outs##s(unsigned short port, const void * addr, unsigned long count) \ +{ __asm__ __volatile__ ("rep ; outs" #s \ +: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } + +#define RETURN_TYPE unsigned char +__IN(b,"") +#undef RETURN_TYPE +#define RETURN_TYPE unsigned short +__IN(w,"") +#undef RETURN_TYPE +#define RETURN_TYPE unsigned int +__IN(l,"") +#undef RETURN_TYPE + +__OUT(b,"b",char) +__OUT(w,"w",short) +__OUT(l,,int) + +__INS(b) +__INS(w) +__INS(l) + +__OUTS(b) +__OUTS(w) +__OUTS(l) + +#endif diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/irq.h b/xenolinux-2.4.25-sparse/include/asm-xen/irq.h new file mode 100644 index 0000000000..917a05334d --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/irq.h @@ -0,0 +1,40 @@ +#ifndef _ASM_IRQ_H +#define _ASM_IRQ_H + +/* + * linux/include/asm/irq.h + * + * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar + * + * IRQ/IPI changes taken from work by Thomas Radke + * + */ + +#include +#include +#include + +#define NR_IRQS 256 + +#define PHYS_IRQ_BASE 0 +#define NR_PHYS_IRQS 128 + +#define HYPEREVENT_IRQ_BASE 128 +#define NR_HYPEREVENT_IRQS 128 + +#define HYPEREVENT_IRQ(_ev) ((_ev) + HYPEREVENT_IRQ_BASE) +#define HYPEREVENT_FROM_IRQ(_irq) ((_irq) - HYPEREVENT_IRQ_BASE) + +extern void physirq_init(void); + +#define irq_cannonicalize(_irq) (_irq) + +extern void disable_irq(unsigned int); +extern void disable_irq_nosync(unsigned int); +extern void enable_irq(unsigned int); + +#ifdef CONFIG_X86_LOCAL_APIC +#define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ +#endif + +#endif /* _ASM_IRQ_H */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/keyboard.h b/xenolinux-2.4.25-sparse/include/asm-xen/keyboard.h new file mode 100644 index 0000000000..79d72da929 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/keyboard.h @@ -0,0 +1,97 @@ +/* Portions copyright (c) 2003 James Scott, Intel Research Cambridge */ +/* + * Talks to hypervisor to get PS/2 keyboard and mouse events, and send keyboard + * and mouse commands + */ + +/* Based on: + * linux/include/asm-i386/keyboard.h + * + * Created 3 Nov 1996 by Geert Uytterhoeven + */ + +#ifndef _XEN_KEYBOARD_H +#define _XEN_KEYBOARD_H + +#ifdef __KERNEL__ + +#include +#include +#include +#include +#include + +extern int pckbd_setkeycode(unsigned int scancode, unsigned int keycode); +extern int pckbd_getkeycode(unsigned int scancode); +extern int pckbd_translate(unsigned char scancode, unsigned char *keycode, + char raw_mode); +extern char pckbd_unexpected_up(unsigned char keycode); +extern void pckbd_leds(unsigned char leds); +extern void pckbd_init_hw(void); +extern int pckbd_pm_resume(struct pm_dev *, pm_request_t, void *); + +extern pm_callback pm_kbd_request_override; +extern unsigned char pckbd_sysrq_xlate[128]; + +#define kbd_setkeycode pckbd_setkeycode +#define kbd_getkeycode pckbd_getkeycode +#define kbd_translate pckbd_translate +#define kbd_unexpected_up pckbd_unexpected_up +#define kbd_leds pckbd_leds +#define kbd_init_hw pckbd_init_hw +#define kbd_sysrq_xlate pckbd_sysrq_xlate + +#define SYSRQ_KEY 0x54 + + +/* THIS SECTION TALKS TO XEN TO DO PS2 SUPPORT */ +#include +#include + +#define kbd_controller_present xen_kbd_controller_present + +static inline int xen_kbd_controller_present () +{ + return start_info.flags & SIF_INITDOMAIN; +} + +/* resource allocation */ +#define kbd_request_region() \ + do { } while (0) +#define kbd_request_irq(handler) \ + request_irq(HYPEREVENT_IRQ(_EVENT_PS2), handler, 0, "ps/2", NULL) + +/* could implement these with command to xen to filter mouse stuff... */ +#define aux_request_irq(hand, dev_id) 0 +#define aux_free_irq(dev_id) do { } while(0) + +/* Some stoneage hardware needs delays after some operations. */ +#define kbd_pause() do { } while(0) + +static unsigned char kbd_current_scancode = 0; + +static unsigned char kbd_read_input(void) +{ + return kbd_current_scancode; +} + +static unsigned char kbd_read_status(void) +{ + long res; + res = HYPERVISOR_kbd_op(KBD_OP_READ,0); + if ( res<0 ) + { + kbd_current_scancode = 0; + return 0; /* error with our request - wrong domain? */ + } + kbd_current_scancode = KBD_CODE_SCANCODE(res); + return KBD_CODE_STATUS(res); +} + + +#define kbd_write_output(val) HYPERVISOR_kbd_op(KBD_OP_WRITEOUTPUT, val); +#define kbd_write_command(val) HYPERVISOR_kbd_op(KBD_OP_WRITECOMMAND, val); + + +#endif /* __KERNEL__ */ +#endif /* _XEN_KEYBOARD_H */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/mmu_context.h b/xenolinux-2.4.25-sparse/include/asm-xen/mmu_context.h new file mode 100644 index 0000000000..7972ce7d74 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/mmu_context.h @@ -0,0 +1,74 @@ +#ifndef __I386_MMU_CONTEXT_H +#define __I386_MMU_CONTEXT_H + +#include +#include +#include +#include + +/* + * hooks to add arch specific data into the mm struct. + * Note that destroy_context is called even if init_new_context + * fails. + */ +int init_new_context(struct task_struct *tsk, struct mm_struct *mm); +void destroy_context(struct mm_struct *mm); + +#ifdef CONFIG_SMP + +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +{ + if(cpu_tlbstate[cpu].state == TLBSTATE_OK) + cpu_tlbstate[cpu].state = TLBSTATE_LAZY; +} +#else +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +{ +} +#endif + +extern pgd_t *cur_pgd; + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu) +{ + if (prev != next) { + /* stop flush ipis for the previous mm */ + clear_bit(cpu, &prev->cpu_vm_mask); +#ifdef CONFIG_SMP + cpu_tlbstate[cpu].state = TLBSTATE_OK; + cpu_tlbstate[cpu].active_mm = next; +#endif + + /* Re-load page tables */ + cur_pgd = next->pgd; + queue_pt_switch(__pa(cur_pgd)); + /* load_LDT, if either the previous or next thread + * has a non-default LDT. + */ + if (next->context.size+prev->context.size) + load_LDT(&next->context); + } +#ifdef CONFIG_SMP + else { + cpu_tlbstate[cpu].state = TLBSTATE_OK; + if(cpu_tlbstate[cpu].active_mm != next) + out_of_line_bug(); + if(!test_and_set_bit(cpu, &next->cpu_vm_mask)) { + /* We were in lazy tlb mode and leave_mm disabled + * tlb flush IPI delivery. We must reload %cr3. + */ + cur_pgd = next->pgd; + queue_pt_switch(__pa(cur_pgd)); + load_LDT(next); + } + } +#endif +} + +#define activate_mm(prev, next) \ +do { \ + switch_mm((prev),(next),NULL,smp_processor_id()); \ + flush_page_update_queue(); \ +} while ( 0 ) + +#endif diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/msr.h b/xenolinux-2.4.25-sparse/include/asm-xen/msr.h new file mode 100644 index 0000000000..1a2c8765a8 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/msr.h @@ -0,0 +1,138 @@ +#ifndef __ASM_MSR_H +#define __ASM_MSR_H + +/* + * Access to machine-specific registers (available on 586 and better only) + * Note: the rd* operations modify the parameters directly (without using + * pointer indirection), this allows gcc to optimize better + */ + +#define rdmsr(msr,val1,val2) \ +{ \ + dom0_op_t op; \ + op.cmd = DOM0_MSR; \ + op.u.msr.write = 0; \ + op.u.msr.msr = msr; \ + op.u.msr.cpu_mask = (1 << current->processor); \ + HYPERVISOR_dom0_op(&op); \ + val1 = op.u.msr.out1; \ + val2 = op.u.msr.out2; \ +} + +#define wrmsr(msr,val1,val2) \ +{ \ + dom0_op_t op; \ + op.cmd = DOM0_MSR; \ + op.u.msr.write = 1; \ + op.u.msr.cpu_mask = (1 << current->processor); \ + op.u.msr.msr = msr; \ + op.u.msr.in1 = val1; \ + op.u.msr.in2 = val2; \ + HYPERVISOR_dom0_op(&op); \ +} + +#define rdtsc(low,high) \ + __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) + +#define rdtscl(low) \ + __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx") + +#define rdtscll(val) \ + __asm__ __volatile__("rdtsc" : "=A" (val)) + +#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) + +#define rdpmc(counter,low,high) \ + __asm__ __volatile__("rdpmc" \ + : "=a" (low), "=d" (high) \ + : "c" (counter)) + +/* symbolic names for some interesting MSRs */ +/* Intel defined MSRs. */ +#define MSR_IA32_P5_MC_ADDR 0 +#define MSR_IA32_P5_MC_TYPE 1 +#define MSR_IA32_PLATFORM_ID 0x17 +#define MSR_IA32_EBL_CR_POWERON 0x2a + +#define MSR_IA32_APICBASE 0x1b +#define MSR_IA32_APICBASE_BSP (1<<8) +#define MSR_IA32_APICBASE_ENABLE (1<<11) +#define MSR_IA32_APICBASE_BASE (0xfffff<<12) + +#define MSR_IA32_UCODE_WRITE 0x79 +#define MSR_IA32_UCODE_REV 0x8b + +#define MSR_IA32_BBL_CR_CTL 0x119 + +#define MSR_IA32_MCG_CAP 0x179 +#define MSR_IA32_MCG_STATUS 0x17a +#define MSR_IA32_MCG_CTL 0x17b + +#define MSR_IA32_THERM_CONTROL 0x19a +#define MSR_IA32_THERM_INTERRUPT 0x19b +#define MSR_IA32_THERM_STATUS 0x19c +#define MSR_IA32_MISC_ENABLE 0x1a0 + +#define MSR_IA32_DEBUGCTLMSR 0x1d9 +#define MSR_IA32_LASTBRANCHFROMIP 0x1db +#define MSR_IA32_LASTBRANCHTOIP 0x1dc +#define MSR_IA32_LASTINTFROMIP 0x1dd +#define MSR_IA32_LASTINTTOIP 0x1de + +#define MSR_IA32_MC0_CTL 0x400 +#define MSR_IA32_MC0_STATUS 0x401 +#define MSR_IA32_MC0_ADDR 0x402 +#define MSR_IA32_MC0_MISC 0x403 + +#define MSR_P6_PERFCTR0 0xc1 +#define MSR_P6_PERFCTR1 0xc2 +#define MSR_P6_EVNTSEL0 0x186 +#define MSR_P6_EVNTSEL1 0x187 + +#define MSR_IA32_PERF_STATUS 0x198 +#define MSR_IA32_PERF_CTL 0x199 + +/* AMD Defined MSRs */ +#define MSR_K6_EFER 0xC0000080 +#define MSR_K6_STAR 0xC0000081 +#define MSR_K6_WHCR 0xC0000082 +#define MSR_K6_UWCCR 0xC0000085 +#define MSR_K6_EPMR 0xC0000086 +#define MSR_K6_PSOR 0xC0000087 +#define MSR_K6_PFIR 0xC0000088 + +#define MSR_K7_EVNTSEL0 0xC0010000 +#define MSR_K7_PERFCTR0 0xC0010004 +#define MSR_K7_HWCR 0xC0010015 +#define MSR_K7_CLK_CTL 0xC001001b +#define MSR_K7_FID_VID_CTL 0xC0010041 +#define MSR_K7_VID_STATUS 0xC0010042 + +/* Centaur-Hauls/IDT defined MSRs. */ +#define MSR_IDT_FCR1 0x107 +#define MSR_IDT_FCR2 0x108 +#define MSR_IDT_FCR3 0x109 +#define MSR_IDT_FCR4 0x10a + +#define MSR_IDT_MCR0 0x110 +#define MSR_IDT_MCR1 0x111 +#define MSR_IDT_MCR2 0x112 +#define MSR_IDT_MCR3 0x113 +#define MSR_IDT_MCR4 0x114 +#define MSR_IDT_MCR5 0x115 +#define MSR_IDT_MCR6 0x116 +#define MSR_IDT_MCR7 0x117 +#define MSR_IDT_MCR_CTRL 0x120 + +/* VIA Cyrix defined MSRs*/ +#define MSR_VIA_FCR 0x1107 +#define MSR_VIA_LONGHAUL 0x110a +#define MSR_VIA_BCR2 0x1147 + +/* Transmeta defined MSRs */ +#define MSR_TMTA_LONGRUN_CTRL 0x80868010 +#define MSR_TMTA_LONGRUN_FLAGS 0x80868011 +#define MSR_TMTA_LRTI_READOUT 0x80868018 +#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a + +#endif /* __ASM_MSR_H */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/multicall.h b/xenolinux-2.4.25-sparse/include/asm-xen/multicall.h new file mode 100644 index 0000000000..f0ea5c3a66 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/multicall.h @@ -0,0 +1,84 @@ +/****************************************************************************** + * multicall.h + */ + +#ifndef __MULTICALL_H__ +#define __MULTICALL_H__ + +#include + +extern multicall_entry_t multicall_list[]; +extern int nr_multicall_ents; + +static inline void queue_multicall0(unsigned long op) +{ + int i = nr_multicall_ents; + multicall_list[i].op = op; + nr_multicall_ents = i+1; +} + +static inline void queue_multicall1(unsigned long op, unsigned long arg1) +{ + int i = nr_multicall_ents; + multicall_list[i].op = op; + multicall_list[i].args[0] = arg1; + nr_multicall_ents = i+1; +} + +static inline void queue_multicall2( + unsigned long op, unsigned long arg1, unsigned long arg2) +{ + int i = nr_multicall_ents; + multicall_list[i].op = op; + multicall_list[i].args[0] = arg1; + multicall_list[i].args[1] = arg2; + nr_multicall_ents = i+1; +} + +static inline void queue_multicall3( + unsigned long op, unsigned long arg1, unsigned long arg2, + unsigned long arg3) +{ + int i = nr_multicall_ents; + multicall_list[i].op = op; + multicall_list[i].args[0] = arg1; + multicall_list[i].args[1] = arg2; + multicall_list[i].args[2] = arg3; + nr_multicall_ents = i+1; +} + +static inline void queue_multicall4( + unsigned long op, unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4) +{ + int i = nr_multicall_ents; + multicall_list[i].op = op; + multicall_list[i].args[0] = arg1; + multicall_list[i].args[1] = arg2; + multicall_list[i].args[2] = arg3; + multicall_list[i].args[3] = arg4; + nr_multicall_ents = i+1; +} + +static inline void queue_multicall5( + unsigned long op, unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, unsigned long arg5) +{ + int i = nr_multicall_ents; + multicall_list[i].op = op; + multicall_list[i].args[0] = arg1; + multicall_list[i].args[1] = arg2; + multicall_list[i].args[2] = arg3; + multicall_list[i].args[3] = arg4; + multicall_list[i].args[4] = arg5; + nr_multicall_ents = i+1; +} + +static inline void execute_multicall_list(void) +{ + if ( unlikely(nr_multicall_ents == 0) ) return; + (void)HYPERVISOR_multicall(multicall_list, nr_multicall_ents); + nr_multicall_ents = 0; +} + +#endif /* __MULTICALL_H__ */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/page.h b/xenolinux-2.4.25-sparse/include/asm-xen/page.h new file mode 100644 index 0000000000..b7640a7d78 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/page.h @@ -0,0 +1,173 @@ +#ifndef _I386_PAGE_H +#define _I386_PAGE_H + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ + +#include +#include + +#ifdef CONFIG_X86_USE_3DNOW + +#include + +#define clear_page(page) mmx_clear_page((void *)(page)) +#define copy_page(to,from) mmx_copy_page(to,from) + +#else + +/* + * On older X86 processors its not a win to use MMX here it seems. + * Maybe the K6-III ? + */ + +#define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) +#define copy_page(to,from) memcpy((void *)(to), (void *)(from), PAGE_SIZE) + +#endif + +#define clear_user_page(page, vaddr) clear_page(page) +#define copy_user_page(to, from, vaddr) copy_page(to, from) + +/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ +extern unsigned long *phys_to_machine_mapping; +#define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)]) +#define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)]) +static inline unsigned long phys_to_machine(unsigned long phys) +{ + unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT); + machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK); + return machine; +} +static inline unsigned long machine_to_phys(unsigned long machine) +{ + unsigned long phys = mfn_to_pfn(machine >> PAGE_SHIFT); + phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK); + return phys; +} + +/* + * These are used to make use of C type-checking.. + */ +#if CONFIG_X86_PAE +typedef struct { unsigned long pte_low, pte_high; } pte_t; +typedef struct { unsigned long long pmd; } pmd_t; +typedef struct { unsigned long long pgd; } pgd_t; +#define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) +#else +typedef struct { unsigned long pte_low; } pte_t; +typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pgd; } pgd_t; +static inline unsigned long pte_val(pte_t x) +{ + unsigned long ret = x.pte_low; + if ( (ret & 1) ) ret = machine_to_phys(ret); + return ret; +} +#endif +#define PTE_MASK PAGE_MASK + +typedef struct { unsigned long pgprot; } pgprot_t; + +static inline unsigned long pmd_val(pmd_t x) +{ + unsigned long ret = x.pmd; + if ( (ret & 1) ) ret = machine_to_phys(ret); + return ret; +} +#define pgd_val(x) ({ BUG(); (unsigned long)0; }) +#define pgprot_val(x) ((x).pgprot) + +static inline pte_t __pte(unsigned long x) +{ + if ( (x & 1) ) x = phys_to_machine(x); + return ((pte_t) { (x) }); +} +static inline pmd_t __pmd(unsigned long x) +{ + if ( (x & 1) ) x = phys_to_machine(x); + return ((pmd_t) { (x) }); +} +#define __pgd(x) ({ BUG(); (pgprot_t) { 0 }; }) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +#endif /* !__ASSEMBLY__ */ + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) + +/* + * This handles the memory map.. We could make this a config + * option, but too many people screw it up, and too few need + * it. + * + * A __PAGE_OFFSET of 0xC0000000 means that the kernel has + * a virtual address space of one gigabyte, which limits the + * amount of physical memory you can use to about 950MB. + * + * If you want more physical memory than this then see the CONFIG_HIGHMEM4G + * and CONFIG_HIGHMEM64G options in the kernel configuration. + */ + +#define __PAGE_OFFSET (0xC0000000) + +#ifndef __ASSEMBLY__ + +/* + * Tell the user there is some problem. Beep too, so we can + * see^H^H^Hhear bugs in early bootup as well! + * The offending file and line are encoded after the "officially + * undefined" opcode for parsing in the trap handler. + */ + +#if 1 /* Set to zero for a slightly smaller kernel */ +#define BUG() \ + __asm__ __volatile__( "ud2\n" \ + "\t.word %c0\n" \ + "\t.long %c1\n" \ + : : "i" (__LINE__), "i" (__FILE__)) +#else +#define BUG() __asm__ __volatile__("ud2\n") +#endif + +#define PAGE_BUG(page) do { \ + BUG(); \ +} while (0) + +/* Pure 2^n version of get_order */ +static __inline__ int get_order(unsigned long size) +{ + int order; + + size = (size-1) >> (PAGE_SHIFT-1); + order = -1; + do { + size >>= 1; + order++; + } while (size); + return order; +} + +#endif /* __ASSEMBLY__ */ + +#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) +#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) +#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) +#define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT)) +#define VALID_PAGE(page) ((page - mem_map) < max_mapnr) + +#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +/* VIRT <-> MACHINE conversion */ +#define virt_to_machine(_a) (phys_to_machine(__pa(_a))) +#define machine_to_virt(_m) (__va(machine_to_phys(_m))) + +#endif /* __KERNEL__ */ + +#endif /* _I386_PAGE_H */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/pgalloc.h b/xenolinux-2.4.25-sparse/include/asm-xen/pgalloc.h new file mode 100644 index 0000000000..308a1b7c40 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/pgalloc.h @@ -0,0 +1,274 @@ +#ifndef _I386_PGALLOC_H +#define _I386_PGALLOC_H + +#include +#include +#include +#include +#include + +/* + * Quick lists are aligned so that least significant bits of array pointer + * are all zero when list is empty, and all one when list is full. + */ +#define QUICKLIST_ENTRIES 256 +#define QUICKLIST_EMPTY(_l) !((unsigned long)(_l) & ((QUICKLIST_ENTRIES*4)-1)) +#define QUICKLIST_FULL(_l) QUICKLIST_EMPTY((_l)+1) +#define pgd_quicklist (current_cpu_data.pgd_quick) +#define pmd_quicklist (current_cpu_data.pmd_quick) +#define pte_quicklist (current_cpu_data.pte_quick) +#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) + +#define pmd_populate(mm, pmd, pte) \ + do { \ + set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \ + XEN_flush_page_update_queue(); \ + } while ( 0 ) + +/* + * Allocate and free page tables. + */ + +#if defined (CONFIG_X86_PAE) + +#error "no PAE support as yet" + +/* + * We can't include here, thus these uglinesses. + */ +struct kmem_cache_s; + +extern struct kmem_cache_s *pae_pgd_cachep; +extern void *kmem_cache_alloc(struct kmem_cache_s *, int); +extern void kmem_cache_free(struct kmem_cache_s *, void *); + + +static inline pgd_t *get_pgd_slow(void) +{ + int i; + pgd_t *pgd = kmem_cache_alloc(pae_pgd_cachep, GFP_KERNEL); + + if (pgd) { + for (i = 0; i < USER_PTRS_PER_PGD; i++) { + unsigned long pmd = __get_free_page(GFP_KERNEL); + if (!pmd) + goto out_oom; + clear_page(pmd); + set_pgd(pgd + i, __pgd(1 + __pa(pmd))); + } + memcpy(pgd + USER_PTRS_PER_PGD, + swapper_pg_dir + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + } + return pgd; +out_oom: + for (i--; i >= 0; i--) + free_page((unsigned long)__va(pgd_val(pgd[i])-1)); + kmem_cache_free(pae_pgd_cachep, pgd); + return NULL; +} + +#else + +static inline pgd_t *get_pgd_slow(void) +{ + pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); + + if (pgd) { + memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); + memcpy(pgd + USER_PTRS_PER_PGD, + init_mm.pgd + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + __make_page_readonly(pgd); + queue_pgd_pin(__pa(pgd)); + + } + return pgd; +} + +#endif /* CONFIG_X86_PAE */ + +static inline pgd_t *get_pgd_fast(void) +{ + unsigned long ret; + + if ( !QUICKLIST_EMPTY(pgd_quicklist) ) { + ret = *(--pgd_quicklist); + pgtable_cache_size--; + + } else + ret = (unsigned long)get_pgd_slow(); + return (pgd_t *)ret; +} + +static inline void free_pgd_slow(pgd_t *pgd) +{ +#if defined(CONFIG_X86_PAE) +#error + int i; + + for (i = 0; i < USER_PTRS_PER_PGD; i++) + free_page((unsigned long)__va(pgd_val(pgd[i])-1)); + kmem_cache_free(pae_pgd_cachep, pgd); +#else + queue_pgd_unpin(__pa(pgd)); + __make_page_writeable(pgd); + free_page((unsigned long)pgd); +#endif +} + +static inline void free_pgd_fast(pgd_t *pgd) +{ + if ( !QUICKLIST_FULL(pgd_quicklist) ) { + *(pgd_quicklist++) = (unsigned long)pgd; + pgtable_cache_size++; + } else + free_pgd_slow(pgd); +} + +static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) +{ + pte_t *pte; + + pte = (pte_t *) __get_free_page(GFP_KERNEL); + if (pte) + { + clear_page(pte); + __make_page_readonly(pte); + queue_pte_pin(__pa(pte)); + } + return pte; + +} + +static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, + unsigned long address) +{ + unsigned long ret = 0; + if ( !QUICKLIST_EMPTY(pte_quicklist) ) { + ret = *(--pte_quicklist); + pgtable_cache_size--; + } + return (pte_t *)ret; +} + +static __inline__ void pte_free_slow(pte_t *pte) +{ + queue_pte_unpin(__pa(pte)); + __make_page_writeable(pte); + free_page((unsigned long)pte); +} + +static inline void pte_free_fast(pte_t *pte) +{ + if ( !QUICKLIST_FULL(pte_quicklist) ) { + *(pte_quicklist++) = (unsigned long)pte; + pgtable_cache_size++; + } else + pte_free_slow(pte); +} + +#define pte_free(pte) pte_free_fast(pte) +#define pgd_free(pgd) free_pgd_fast(pgd) +#define pgd_alloc(mm) get_pgd_fast() + +/* + * allocating and freeing a pmd is trivial: the 1-entry pmd is + * inside the pgd, so has no extra memory associated with it. + * (In the PAE case we free the pmds as part of the pgd.) + */ + +#define pmd_alloc_one_fast(mm, addr) ({ BUG(); ((pmd_t *)1); }) +#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) +#define pmd_free_slow(x) do { } while (0) +#define pmd_free_fast(x) do { } while (0) +#define pmd_free(x) do { } while (0) +#define pgd_populate(mm, pmd, pte) BUG() + +extern int do_check_pgt_cache(int, int); + +/* + * TLB flushing: + * + * - flush_tlb() flushes the current mm struct TLBs + * - flush_tlb_all() flushes all processes TLBs + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(mm, start, end) flushes a range of pages + * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables + * + * ..but the i386 has somewhat limited tlb flushing capabilities, + * and page-granular flushes are available only on i486 and up. + */ + +#ifndef CONFIG_SMP + +#define flush_tlb() __flush_tlb() +#define flush_tlb_all() __flush_tlb_all() +#define local_flush_tlb() __flush_tlb() + +static inline void flush_tlb_mm(struct mm_struct *mm) +{ + if (mm == current->active_mm) queue_tlb_flush(); + XEN_flush_page_update_queue(); +} + +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long addr) +{ + if (vma->vm_mm == current->active_mm) queue_invlpg(addr); + XEN_flush_page_update_queue(); +} + +static inline void flush_tlb_range(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + if (mm == current->active_mm) queue_tlb_flush(); + XEN_flush_page_update_queue(); +} + +#else +#error no guestos SMP support yet... +#include + +#define local_flush_tlb() \ + __flush_tlb() + +extern void flush_tlb_all(void); +extern void flush_tlb_current_task(void); +extern void flush_tlb_mm(struct mm_struct *); +extern void flush_tlb_page(struct vm_area_struct *, unsigned long); + +#define flush_tlb() flush_tlb_current_task() + +static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, unsigned long end) +{ + flush_tlb_mm(mm); +} + +#define TLBSTATE_OK 1 +#define TLBSTATE_LAZY 2 + +struct tlb_state +{ + struct mm_struct *active_mm; + int state; +} ____cacheline_aligned; +extern struct tlb_state cpu_tlbstate[NR_CPUS]; + +#endif /* CONFIG_SMP */ + +static inline void flush_tlb_pgtables(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + /* i386 does not keep any page table caches in TLB */ + XEN_flush_page_update_queue(); +} + +extern int direct_remap_area_pages(struct mm_struct *mm, + unsigned long address, + unsigned long machine_addr, + unsigned long size, + pgprot_t prot); + +#endif /* _I386_PGALLOC_H */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/pgtable-2level.h b/xenolinux-2.4.25-sparse/include/asm-xen/pgtable-2level.h new file mode 100644 index 0000000000..c780f644c0 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/pgtable-2level.h @@ -0,0 +1,71 @@ +#ifndef _I386_PGTABLE_2LEVEL_H +#define _I386_PGTABLE_2LEVEL_H + +/* + * traditional i386 two-level paging structure: + */ + +#define PGDIR_SHIFT 22 +#define PTRS_PER_PGD 1024 + +/* + * the i386 is two-level, so we don't really have any + * PMD directory physically. + */ +#define PMD_SHIFT 22 +#define PTRS_PER_PMD 1 + +#define PTRS_PER_PTE 1024 + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + +/* + * The "pgd_xxx()" functions here are trivial for a folded two-level + * setup: the pgd is never bad, and a pmd always exists (as it's folded + * into the pgd entry) + */ +static inline int pgd_none(pgd_t pgd) { return 0; } +static inline int pgd_bad(pgd_t pgd) { return 0; } +static inline int pgd_present(pgd_t pgd) { return 1; } +#define pgd_clear(xp) do { } while (0) + +#define set_pte(pteptr, pteval) queue_l1_entry_update(pteptr, (pteval).pte_low) +#define set_pte_atomic(pteptr, pteval) queue_l1_entry_update(pteptr, (pteval).pte_low) +#define set_pmd(pmdptr, pmdval) queue_l2_entry_update((pmdptr), (pmdval).pmd) +#define set_pgd(pgdptr, pgdval) ((void)0) + +#define pgd_page(pgd) \ +((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) + +static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) +{ + return (pmd_t *) dir; +} + +/* + * A note on implementation of this atomic 'get-and-clear' operation. + * This is actually very simple because XenoLinux can only run on a single + * processor. Therefore, we cannot race other processors setting the 'accessed' + * or 'dirty' bits on a page-table entry. + * Even if pages are shared between domains, that is not a problem because + * each domain will have separate page tables, with their own versions of + * accessed & dirty state. + */ +static inline pte_t ptep_get_and_clear(pte_t *xp) +{ + pte_t pte = *xp; + queue_l1_entry_update(xp, 0); + return pte; +} + +#define pte_same(a, b) ((a).pte_low == (b).pte_low) +#define pte_page(x) (mem_map+((unsigned long)((pte_val(x) >> PAGE_SHIFT)))) +#define pte_none(x) (!(x).pte_low) +#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) + +#endif /* _I386_PGTABLE_2LEVEL_H */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/pgtable.h b/xenolinux-2.4.25-sparse/include/asm-xen/pgtable.h new file mode 100644 index 0000000000..36655e63e5 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/pgtable.h @@ -0,0 +1,370 @@ +#ifndef _I386_PGTABLE_H +#define _I386_PGTABLE_H + +#include + +/* + * The Linux memory management assumes a three-level page table setup. On + * the i386, we use that, but "fold" the mid level into the top-level page + * table, so that we physically have the same two-level page table as the + * i386 mmu expects. + * + * This file contains the functions and defines necessary to modify and use + * the i386 page table tree. + */ +#ifndef __ASSEMBLY__ +#include +#include +#include +#include + +#ifndef _I386_BITOPS_H +#include +#endif + +#define swapper_pg_dir 0 +extern void paging_init(void); + +/* Caches aren't brain-dead on the intel. */ +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_range(mm, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr) do { } while (0) +#define flush_page_to_ram(page) do { } while (0) +#define flush_dcache_page(page) do { } while (0) +#define flush_icache_range(start, end) do { } while (0) +#define flush_icache_page(vma,pg) do { } while (0) +#define flush_icache_user_range(vma,pg,adr,len) do { } while (0) + +extern unsigned long pgkern_mask; + +#define __flush_tlb() ({ queue_tlb_flush(); XEN_flush_page_update_queue(); }) +#define __flush_tlb_global() __flush_tlb() +#define __flush_tlb_all() __flush_tlb_global() +#define __flush_tlb_one(addr) ({ queue_invlpg(addr); XEN_flush_page_update_queue(); }) +#define __flush_tlb_single(addr) ({ queue_invlpg(addr); XEN_flush_page_update_queue(); }) + +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern unsigned long empty_zero_page[1024]; +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) + +#endif /* !__ASSEMBLY__ */ + +/* + * The Linux x86 paging architecture is 'compile-time dual-mode', it + * implements both the traditional 2-level x86 page tables and the + * newer 3-level PAE-mode page tables. + */ +#ifndef __ASSEMBLY__ +#if CONFIG_X86_PAE +# include + +/* + * Need to initialise the X86 PAE caches + */ +extern void pgtable_cache_init(void); + +#else +# include + +/* + * No page table caches to initialise + */ +#define pgtable_cache_init() do { } while (0) + +#endif +#endif + +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) +#define FIRST_USER_PGD_NR 0 + +#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) +#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) + +#define TWOLEVEL_PGDIR_SHIFT 22 +#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT) +#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS) + + +#ifndef __ASSEMBLY__ +/* 4MB is just a nice "safety zone". Also, we align to a fresh pde. */ +#define VMALLOC_OFFSET (4*1024*1024) +extern void * high_memory; +#define VMALLOC_START (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \ + ~(VMALLOC_OFFSET-1)) +#define VMALLOC_VMADDR(x) ((unsigned long)(x)) +#define VMALLOC_END (FIXADDR_START - 2*PAGE_SIZE) + +#define _PAGE_BIT_PRESENT 0 +#define _PAGE_BIT_RW 1 +#define _PAGE_BIT_USER 2 +#define _PAGE_BIT_PWT 3 +#define _PAGE_BIT_PCD 4 +#define _PAGE_BIT_ACCESSED 5 +#define _PAGE_BIT_DIRTY 6 +#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page, Pentium+, if present.. */ +#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ +#define _PAGE_BIT_IO 9 + +#define _PAGE_PRESENT 0x001 +#define _PAGE_RW 0x002 +#define _PAGE_USER 0x004 +#define _PAGE_PWT 0x008 +#define _PAGE_PCD 0x010 +#define _PAGE_ACCESSED 0x020 +#define _PAGE_DIRTY 0x040 +#define _PAGE_PSE 0x080 /* 4 MB (or 2MB) page, Pentium+, if present.. */ +#define _PAGE_GLOBAL 0x100 /* Global TLB entry PPro+ */ +#define _PAGE_IO 0x200 + +#define _PAGE_PROTNONE 0x080 /* If not present */ + +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) + +#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) + +#define __PAGE_KERNEL \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) +#define __PAGE_KERNEL_NOCACHE \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED) +#define __PAGE_KERNEL_RO \ + (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED) + +#if 0 +#define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL) +#else +#define MAKE_GLOBAL(x) __pgprot(x) +#endif + +#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL) +#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO) +#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE) + +/* + * The i386 can't do page protection for execute, and considers that + * the same are read. Also, write permissions imply read permissions. + * This is the closest we can get.. + */ +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY +#define __P100 PAGE_READONLY +#define __P101 PAGE_READONLY +#define __P110 PAGE_COPY +#define __P111 PAGE_COPY + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED +#define __S100 PAGE_READONLY +#define __S101 PAGE_READONLY +#define __S110 PAGE_SHARED +#define __S111 PAGE_SHARED + +#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) +#define pte_clear(xp) queue_l1_entry_update(xp, 0) + +#define pmd_none(x) (!(x).pmd) +#define pmd_present(x) ((x).pmd & _PAGE_PRESENT) +#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) +#define pmd_bad(x) (((x).pmd & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) + + +#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; } +static inline int pte_exec(pte_t pte) { return (pte).pte_low & _PAGE_USER; } +static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; } +static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; } +static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; } +static inline int pte_io(pte_t pte) { return (pte).pte_low & _PAGE_IO; } + +static inline pte_t pte_rdprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; } +static inline pte_t pte_exprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; } +static inline pte_t pte_mkclean(pte_t pte) { (pte).pte_low &= ~_PAGE_DIRTY; return pte; } +static inline pte_t pte_mkold(pte_t pte) { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; } +static inline pte_t pte_wrprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_RW; return pte; } +static inline pte_t pte_mkread(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; } +static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; } +static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; } +static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } +static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } +static inline pte_t pte_mkio(pte_t pte) { (pte).pte_low |= _PAGE_IO; return pte; } + +static inline int ptep_test_and_clear_dirty(pte_t *ptep) +{ + unsigned long pteval = *(unsigned long *)ptep; + int ret = pteval & _PAGE_DIRTY; + if ( ret ) queue_l1_entry_update(ptep, pteval & ~_PAGE_DIRTY); + return ret; +} +static inline int ptep_test_and_clear_young(pte_t *ptep) +{ + unsigned long pteval = *(unsigned long *)ptep; + int ret = pteval & _PAGE_ACCESSED; + if ( ret ) queue_l1_entry_update(ptep, pteval & ~_PAGE_ACCESSED); + return ret; +} +static inline void ptep_set_wrprotect(pte_t *ptep) +{ + unsigned long pteval = *(unsigned long *)ptep; + if ( (pteval & _PAGE_RW) ) + queue_l1_entry_update(ptep, pteval & ~_PAGE_RW); +} +static inline void ptep_mkdirty(pte_t *ptep) +{ + unsigned long pteval = *(unsigned long *)ptep; + if ( !(pteval & _PAGE_DIRTY) ) + queue_l1_entry_update(ptep, pteval | _PAGE_DIRTY); +} + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ + +#define mk_pte(page, pgprot) __mk_pte((page) - mem_map, (pgprot)) + +/* This takes a physical page address that is used by the remapping functions */ +#define mk_pte_phys(physpage, pgprot) __mk_pte((physpage) >> PAGE_SHIFT, pgprot) + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + pte.pte_low &= _PAGE_CHG_MASK; + pte.pte_low |= pgprot_val(newprot); + return pte; +} + +#define page_pte(page) page_pte_prot(page, __pgprot(0)) + +#define pmd_page(pmd) \ +((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) + +/* to find an entry in a page-table-directory. */ +#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) + +#define __pgd_offset(address) pgd_index(address) + +#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address)) + +/* to find an entry in a kernel page-table-directory */ +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +#define __pmd_offset(address) \ + (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) + +/* Find an entry in the third-level page table.. */ +#define __pte_offset(address) \ + ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \ + __pte_offset(address)) + +/* + * The i386 doesn't have any external MMU info: the kernel page + * tables contain all the necessary information. + */ +#define update_mmu_cache(vma,address,pte) do { } while (0) + +/* Encode and de-code a swap entry */ +#define SWP_TYPE(x) (((x).val >> 1) & 0x3f) +#define SWP_OFFSET(x) ((x).val >> 8) +#define SWP_ENTRY(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) +#define pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) +#define swp_entry_to_pte(x) ((pte_t) { (x).val }) + +struct page; +int change_page_attr(struct page *, int, pgprot_t prot); + +static inline void __make_page_readonly(void *va) +{ + pgd_t *pgd = pgd_offset_k((unsigned long)va); + pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); + pte_t *pte = pte_offset(pmd, (unsigned long)va); + queue_l1_entry_update(pte, (*(unsigned long *)pte)&~_PAGE_RW); +} + +static inline void __make_page_writeable(void *va) +{ + pgd_t *pgd = pgd_offset_k((unsigned long)va); + pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); + pte_t *pte = pte_offset(pmd, (unsigned long)va); + queue_l1_entry_update(pte, (*(unsigned long *)pte)|_PAGE_RW); +} + +static inline void make_page_readonly(void *va) +{ + pgd_t *pgd = pgd_offset_k((unsigned long)va); + pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); + pte_t *pte = pte_offset(pmd, (unsigned long)va); + queue_l1_entry_update(pte, (*(unsigned long *)pte)&~_PAGE_RW); + if ( (unsigned long)va >= VMALLOC_START ) + __make_page_readonly(machine_to_virt( + *(unsigned long *)pte&PAGE_MASK)); +} + +static inline void make_page_writeable(void *va) +{ + pgd_t *pgd = pgd_offset_k((unsigned long)va); + pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); + pte_t *pte = pte_offset(pmd, (unsigned long)va); + queue_l1_entry_update(pte, (*(unsigned long *)pte)|_PAGE_RW); + if ( (unsigned long)va >= VMALLOC_START ) + __make_page_writeable(machine_to_virt( + *(unsigned long *)pte&PAGE_MASK)); +} + +static inline void make_pages_readonly(void *va, unsigned int nr) +{ + while ( nr-- != 0 ) + { + make_page_readonly(va); + va = (void *)((unsigned long)va + PAGE_SIZE); + } +} + +static inline void make_pages_writeable(void *va, unsigned int nr) +{ + while ( nr-- != 0 ) + { + make_page_writeable(va); + va = (void *)((unsigned long)va + PAGE_SIZE); + } +} + +static inline unsigned long arbitrary_virt_to_phys(void *va) +{ + pgd_t *pgd = pgd_offset_k((unsigned long)va); + pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); + pte_t *pte = pte_offset(pmd, (unsigned long)va); + unsigned long pa = (*(unsigned long *)pte) & PAGE_MASK; + return pa | ((unsigned long)va & (PAGE_SIZE-1)); +} + +#endif /* !__ASSEMBLY__ */ + +/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ +#define PageSkip(page) (0) +#define kern_addr_valid(addr) (1) + +#define io_remap_page_range remap_page_range + +#endif /* _I386_PGTABLE_H */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/proc_cmd.h b/xenolinux-2.4.25-sparse/include/asm-xen/proc_cmd.h new file mode 100644 index 0000000000..4ce2930daa --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/proc_cmd.h @@ -0,0 +1,28 @@ +/****************************************************************************** + * proc_cmd.h + * + * Interface to /proc/cmd and /proc/xen/privcmd. + */ + +#ifndef __PROC_CMD_H__ +#define __PROC_CMD_H__ + +typedef struct privcmd_hypercall +{ + unsigned long op; + unsigned long arg[5]; +} privcmd_hypercall_t; + +typedef struct privcmd_blkmsg +{ + unsigned long op; + void *buf; + int buf_size; +} privcmd_blkmsg_t; + +#define IOCTL_PRIVCMD_HYPERCALL \ + _IOC(_IOC_NONE, 'P', 0, sizeof(privcmd_hypercall_t)) +#define IOCTL_PRIVCMD_BLKMSG \ + _IOC(_IOC_NONE, 'P', 1, sizeof(privcmd_blkmsg_t)) + +#endif /* __PROC_CMD_H__ */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/processor.h b/xenolinux-2.4.25-sparse/include/asm-xen/processor.h new file mode 100644 index 0000000000..2b290252be --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/processor.h @@ -0,0 +1,484 @@ +/* + * include/asm-i386/processor.h + * + * Copyright (C) 1994 Linus Torvalds + */ + +#ifndef __ASM_I386_PROCESSOR_H +#define __ASM_I386_PROCESSOR_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Default implementation of macro that returns current + * instruction pointer ("program counter"). + */ +#define current_text_addr() ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; }) + +/* + * CPU type and hardware bug flags. Kept separately for each CPU. + * Members of this structure are referenced in head.S, so think twice + * before touching them. [mj] + */ + +struct cpuinfo_x86 { + __u8 x86; /* CPU family */ + __u8 x86_vendor; /* CPU vendor */ + __u8 x86_model; + __u8 x86_mask; + char wp_works_ok; /* It doesn't on 386's */ + char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */ + char hard_math; + char rfu; + int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ + __u32 x86_capability[NCAPINTS]; + char x86_vendor_id[16]; + char x86_model_id[64]; + int x86_cache_size; /* in KB - valid for CPUS which support this + call */ + int fdiv_bug; + int f00f_bug; + int coma_bug; + unsigned long loops_per_jiffy; + unsigned long *pgd_quick; + unsigned long *pmd_quick; + unsigned long *pte_quick; + unsigned long pgtable_cache_sz; +} __attribute__((__aligned__(SMP_CACHE_BYTES))); + +#define X86_VENDOR_INTEL 0 +#define X86_VENDOR_CYRIX 1 +#define X86_VENDOR_AMD 2 +#define X86_VENDOR_UMC 3 +#define X86_VENDOR_NEXGEN 4 +#define X86_VENDOR_CENTAUR 5 +#define X86_VENDOR_RISE 6 +#define X86_VENDOR_TRANSMETA 7 +#define X86_VENDOR_NSC 8 +#define X86_VENDOR_SIS 9 +#define X86_VENDOR_UNKNOWN 0xff + +/* + * capabilities of CPUs + */ + +extern struct cpuinfo_x86 boot_cpu_data; +extern struct tss_struct init_tss[NR_CPUS]; + +#ifdef CONFIG_SMP +extern struct cpuinfo_x86 cpu_data[]; +#define current_cpu_data cpu_data[smp_processor_id()] +#else +#define cpu_data (&boot_cpu_data) +#define current_cpu_data boot_cpu_data +#endif + +extern char ignore_irq13; + +extern void identify_cpu(struct cpuinfo_x86 *); +extern void print_cpu_info(struct cpuinfo_x86 *); + +/* + * EFLAGS bits + */ +#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ +#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ +#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ +#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ +#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ +#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ +#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ +#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ +#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ +#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ +#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ +#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ +#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ +#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ +#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ +#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ +#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ + +/* + * Generic CPUID function + */ +static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) +{ + __asm__("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (op)); +} + +/* + * CPUID functions returning a single datum + */ +static inline unsigned int cpuid_eax(unsigned int op) +{ + unsigned int eax; + + __asm__("cpuid" + : "=a" (eax) + : "0" (op) + : "bx", "cx", "dx"); + return eax; +} +static inline unsigned int cpuid_ebx(unsigned int op) +{ + unsigned int eax, ebx; + + __asm__("cpuid" + : "=a" (eax), "=b" (ebx) + : "0" (op) + : "cx", "dx" ); + return ebx; +} +static inline unsigned int cpuid_ecx(unsigned int op) +{ + unsigned int eax, ecx; + + __asm__("cpuid" + : "=a" (eax), "=c" (ecx) + : "0" (op) + : "bx", "dx" ); + return ecx; +} +static inline unsigned int cpuid_edx(unsigned int op) +{ + unsigned int eax, edx; + + __asm__("cpuid" + : "=a" (eax), "=d" (edx) + : "0" (op) + : "bx", "cx"); + return edx; +} + +/* + * Intel CPU features in CR4 + */ +#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ +#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ +#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ +#define X86_CR4_DE 0x0008 /* enable debugging extensions */ +#define X86_CR4_PSE 0x0010 /* enable page size extensions */ +#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ +#define X86_CR4_MCE 0x0040 /* Machine check enable */ +#define X86_CR4_PGE 0x0080 /* enable global pages */ +#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ +#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ +#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ + +#define load_cr3(pgdir) \ + asm volatile("movl %0,%%cr3": :"r" (__pa(pgdir))); + +extern unsigned long mmu_cr4_features; + +#include + +static inline void set_in_cr4 (unsigned long mask) +{ + BUG(); +} + +static inline void clear_in_cr4 (unsigned long mask) +{ + BUG(); +} + +/* + * Cyrix CPU configuration register indexes + */ +#define CX86_CCR0 0xc0 +#define CX86_CCR1 0xc1 +#define CX86_CCR2 0xc2 +#define CX86_CCR3 0xc3 +#define CX86_CCR4 0xe8 +#define CX86_CCR5 0xe9 +#define CX86_CCR6 0xea +#define CX86_CCR7 0xeb +#define CX86_DIR0 0xfe +#define CX86_DIR1 0xff +#define CX86_ARR_BASE 0xc4 +#define CX86_RCR_BASE 0xdc + +/* + * Cyrix CPU indexed register access macros + */ + +#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); }) + +#define setCx86(reg, data) do { \ + outb((reg), 0x22); \ + outb((data), 0x23); \ +} while (0) + +/* + * Bus types (default is ISA, but people can check others with these..) + */ +#ifdef CONFIG_EISA +extern int EISA_bus; +#else +#define EISA_bus (0) +#endif +extern int MCA_bus; + +/* from system description table in BIOS. Mostly for MCA use, but +others may find it useful. */ +extern unsigned int machine_id; +extern unsigned int machine_submodel_id; +extern unsigned int BIOS_revision; +extern unsigned int mca_pentium_flag; + +/* + * User space process size: 3GB (default). + */ +#define TASK_SIZE (PAGE_OFFSET) + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_BASE (TASK_SIZE / 3) + +/* + * Size of io_bitmap in longwords: 32 is ports 0-0x3ff. + */ +#define IO_BITMAP_SIZE 32 +#define IO_BITMAP_BYTES (IO_BITMAP_SIZE * 4) +#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) +#define INVALID_IO_BITMAP_OFFSET 0x8000 + +struct i387_fsave_struct { + long cwd; + long swd; + long twd; + long fip; + long fcs; + long foo; + long fos; + long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ + long status; /* software status information */ +}; + +struct i387_fxsave_struct { + unsigned short cwd; + unsigned short swd; + unsigned short twd; + unsigned short fop; + long fip; + long fcs; + long foo; + long fos; + long mxcsr; + long reserved; + long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ + long padding[56]; +} __attribute__ ((aligned (16))); + +struct i387_soft_struct { + long cwd; + long swd; + long twd; + long fip; + long fcs; + long foo; + long fos; + long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ + unsigned char ftop, changed, lookahead, no_update, rm, alimit; + struct info *info; + unsigned long entry_eip; +}; + +union i387_union { + struct i387_fsave_struct fsave; + struct i387_fxsave_struct fxsave; + struct i387_soft_struct soft; +}; + +typedef struct { + unsigned long seg; +} mm_segment_t; + +struct tss_struct { + unsigned short back_link,__blh; + unsigned long esp0; + unsigned short ss0,__ss0h; + unsigned long esp1; + unsigned short ss1,__ss1h; + unsigned long esp2; + unsigned short ss2,__ss2h; + unsigned long __cr3; + unsigned long eip; + unsigned long eflags; + unsigned long eax,ecx,edx,ebx; + unsigned long esp; + unsigned long ebp; + unsigned long esi; + unsigned long edi; + unsigned short es, __esh; + unsigned short cs, __csh; + unsigned short ss, __ssh; + unsigned short ds, __dsh; + unsigned short fs, __fsh; + unsigned short gs, __gsh; + unsigned short ldt, __ldth; + unsigned short trace, bitmap; + unsigned long io_bitmap[IO_BITMAP_SIZE+1]; + /* + * pads the TSS to be cacheline-aligned (size is 0x100) + */ + unsigned long __cacheline_filler[5]; +}; + +struct thread_struct { + unsigned long esp0; + unsigned long eip; + unsigned long esp; + unsigned long fs; + unsigned long gs; + unsigned int io_pl; +/* Hardware debugging registers */ + unsigned long debugreg[8]; /* %%db0-7 debug registers */ +/* fault info */ + unsigned long cr2, trap_no, error_code; +/* floating point info */ + union i387_union i387; +/* virtual 86 mode info */ + struct vm86_struct * vm86_info; + unsigned long screen_bitmap; + unsigned long v86flags, v86mask, saved_esp0; +}; + +#define INIT_THREAD { sizeof(init_stack) + (long) &init_stack, \ + 0, 0, 0, 0, 0, 0, {0}, 0, 0, 0, {{0}}, 0, 0, 0, 0, 0 } + +#define INIT_TSS { \ + 0,0, /* back_link, __blh */ \ + sizeof(init_stack) + (long) &init_stack, /* esp0 */ \ + __KERNEL_DS, 0, /* ss0 */ \ + 0,0,0,0,0,0, /* stack1, stack2 */ \ + 0, /* cr3 */ \ + 0,0, /* eip,eflags */ \ + 0,0,0,0, /* eax,ecx,edx,ebx */ \ + 0,0,0,0, /* esp,ebp,esi,edi */ \ + 0,0,0,0,0,0, /* es,cs,ss */ \ + 0,0,0,0,0,0, /* ds,fs,gs */ \ + 0,0, /* ldt */ \ + 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \ + {~0, } /* ioperm */ \ +} + +#define start_thread(regs, new_eip, new_esp) do { \ + __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \ + set_fs(USER_DS); \ + regs->xds = __USER_DS; \ + regs->xes = __USER_DS; \ + regs->xss = __USER_DS; \ + regs->xcs = __USER_CS; \ + regs->eip = new_eip; \ + regs->esp = new_esp; \ +} while (0) + +/* Forward declaration, a strange C thing */ +struct task_struct; +struct mm_struct; + +/* Free all resources held by a thread. */ +extern void release_thread(struct task_struct *); +/* + * create a kernel thread without removing it from tasklists + */ +extern int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); + +/* Copy and release all segment info associated with a VM + * Unusable due to lack of error handling, use {init_new,destroy}_context + * instead. + */ +static inline void copy_segments(struct task_struct *p, struct mm_struct * mm) { } +static inline void release_segments(struct mm_struct * mm) { } + +/* + * Return saved PC of a blocked thread. + */ +static inline unsigned long thread_saved_pc(struct thread_struct *t) +{ + return ((unsigned long *)t->esp)[3]; +} + +unsigned long get_wchan(struct task_struct *p); +#define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019]) +#define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022]) + +#define THREAD_SIZE (2*PAGE_SIZE) +#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1)) +#define free_task_struct(p) free_pages((unsigned long) (p), 1) +#define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count) + +#define init_task (init_task_union.task) +#define init_stack (init_task_union.stack) + +struct microcode { + unsigned int hdrver; + unsigned int rev; + unsigned int date; + unsigned int sig; + unsigned int cksum; + unsigned int ldrver; + unsigned int pf; + unsigned int reserved[5]; + unsigned int bits[500]; +}; + +/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */ +#define MICROCODE_IOCFREE _IO('6',0) + +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ +static inline void rep_nop(void) +{ + __asm__ __volatile__("rep;nop" ::: "memory"); +} + +#define cpu_relax() rep_nop() + +/* Prefetch instructions for Pentium III and AMD Athlon */ +#if defined(CONFIG_MPENTIUMIII) || defined (CONFIG_MPENTIUM4) + +#define ARCH_HAS_PREFETCH +extern inline void prefetch(const void *x) +{ + __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x)); +} + +#elif CONFIG_X86_USE_3DNOW + +#define ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCHW +#define ARCH_HAS_SPINLOCK_PREFETCH + +extern inline void prefetch(const void *x) +{ + __asm__ __volatile__ ("prefetch (%0)" : : "r"(x)); +} + +extern inline void prefetchw(const void *x) +{ + __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x)); +} +#define spin_lock_prefetch(x) prefetchw(x) + +#endif + +#define TF_MASK 0x100 + +#endif /* __ASM_I386_PROCESSOR_H */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/ptrace.h b/xenolinux-2.4.25-sparse/include/asm-xen/ptrace.h new file mode 100644 index 0000000000..4457ac0b17 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/ptrace.h @@ -0,0 +1,63 @@ +#ifndef _I386_PTRACE_H +#define _I386_PTRACE_H + +#define EBX 0 +#define ECX 1 +#define EDX 2 +#define ESI 3 +#define EDI 4 +#define EBP 5 +#define EAX 6 +#define DS 7 +#define ES 8 +#define FS 9 +#define GS 10 +#define ORIG_EAX 11 +#define EIP 12 +#define CS 13 +#define EFL 14 +#define UESP 15 +#define SS 16 +#define FRAME_SIZE 17 + +/* this struct defines the way the registers are stored on the + stack during a system call. */ + +struct pt_regs { + long ebx; + long ecx; + long edx; + long esi; + long edi; + long ebp; + long eax; + int xds; + int xes; + long orig_eax; + long eip; + int xcs; + long eflags; + long esp; + int xss; +}; + +/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 +#define PTRACE_GETFPREGS 14 +#define PTRACE_SETFPREGS 15 +#define PTRACE_GETFPXREGS 18 +#define PTRACE_SETFPXREGS 19 + +#define PTRACE_SETOPTIONS 21 + +/* options set using PTRACE_SETOPTIONS */ +#define PTRACE_O_TRACESYSGOOD 0x00000001 + +#ifdef __KERNEL__ +#define user_mode(regs) ((regs) && (2 & (regs)->xcs)) +#define instruction_pointer(regs) ((regs) ? (regs)->eip : NULL) +extern void show_regs(struct pt_regs *); +#endif + +#endif diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/segment.h b/xenolinux-2.4.25-sparse/include/asm-xen/segment.h new file mode 100644 index 0000000000..ca13028ce0 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/segment.h @@ -0,0 +1,15 @@ +#ifndef _ASM_SEGMENT_H +#define _ASM_SEGMENT_H + +#ifndef __ASSEMBLY__ +#include +#endif +#include + +#define __KERNEL_CS FLAT_RING1_CS +#define __KERNEL_DS FLAT_RING1_DS + +#define __USER_CS FLAT_RING3_CS +#define __USER_DS FLAT_RING3_DS + +#endif diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/smp.h b/xenolinux-2.4.25-sparse/include/asm-xen/smp.h new file mode 100644 index 0000000000..804b93c332 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/smp.h @@ -0,0 +1,102 @@ +#ifndef __ASM_SMP_H +#define __ASM_SMP_H + +/* + * We need the APIC definitions automatically as part of 'smp.h' + */ +#ifndef __ASSEMBLY__ +#include +#include +#include +#endif + +#ifdef CONFIG_X86_LOCAL_APIC +#ifndef __ASSEMBLY__ +#include +#include +#ifdef CONFIG_X86_IO_APIC +#include +#endif +#include +#endif +#endif + +#ifdef CONFIG_SMP +#ifndef __ASSEMBLY__ + +/* + * Private routines/data + */ + +extern void smp_alloc_memory(void); +extern unsigned long phys_cpu_present_map; +extern unsigned long cpu_online_map; +extern volatile unsigned long smp_invalidate_needed; +extern int pic_mode; +extern int smp_num_siblings; +extern int cpu_sibling_map[]; + +extern void smp_flush_tlb(void); +extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); +extern void smp_send_reschedule(int cpu); +extern void smp_invalidate_rcv(void); /* Process an NMI */ +extern void (*mtrr_hook) (void); +extern void zap_low_mappings (void); + +/* + * On x86 all CPUs are mapped 1:1 to the APIC space. + * This simplifies scheduling and IPI sending and + * compresses data structures. + */ +static inline int cpu_logical_map(int cpu) +{ + return cpu; +} +static inline int cpu_number_map(int cpu) +{ + return cpu; +} + +/* + * Some lowlevel functions might want to know about + * the real APIC ID <-> CPU # mapping. + */ +#define MAX_APICID 256 +extern volatile int cpu_to_physical_apicid[NR_CPUS]; +extern volatile int physical_apicid_to_cpu[MAX_APICID]; +extern volatile int cpu_to_logical_apicid[NR_CPUS]; +extern volatile int logical_apicid_to_cpu[MAX_APICID]; + +/* + * General functions that each host system must provide. + */ + +extern void smp_boot_cpus(void); +extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial udelay numbers */ + +/* + * This function is needed by all SMP systems. It must _always_ be valid + * from the initial startup. We map APIC_BASE very early in page_setup(), + * so this is correct in the x86 case. + */ + +#define smp_processor_id() (current->processor) + +#endif /* !__ASSEMBLY__ */ + +#define NO_PROC_ID 0xFF /* No processor magic marker */ + +/* + * This magic constant controls our willingness to transfer + * a process across CPUs. Such a transfer incurs misses on the L1 + * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My + * gut feeling is this will vary by board in value. For a board + * with separate L2 cache it probably depends also on the RSS, and + * for a board with shared L2 cache it ought to decay fast as other + * processes are run. + */ + +#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */ + +#endif +#endif diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/suspend.h b/xenolinux-2.4.25-sparse/include/asm-xen/suspend.h new file mode 100644 index 0000000000..937137f005 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/suspend.h @@ -0,0 +1,25 @@ +/****************************************************************************** + * suspend.h + * + * NB. This file is part of the Xenolinux interface with Xenoserver control + * software. It can be included in such software without invoking the GPL. + * + * Copyright (c) 2003, K A Fraser + */ + +#ifndef __ASM_XEN_SUSPEND_H__ +#define __ASM_XEN_SUSPEND_H__ + +typedef struct suspend_record_st { + /* To be filled in before resume. */ + start_info_t resume_info; + /* + * The number of a machine frame containing, in sequence, the number of + * each machine frame that contains PFN -> MFN translation table data. + */ + unsigned long pfn_to_mfn_frame_list; + /* Number of entries in the PFN -> MFN translation table. */ + unsigned long nr_pfns; +} suspend_record_t; + +#endif /* __ASM_XEN_SUSPEND_H__ */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/system.h b/xenolinux-2.4.25-sparse/include/asm-xen/system.h new file mode 100644 index 0000000000..3b59252ca3 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/system.h @@ -0,0 +1,400 @@ +#ifndef __ASM_SYSTEM_H +#define __ASM_SYSTEM_H + +#include +#include +#include +#include +#include +#include /* for LOCK_PREFIX */ + +#ifdef __KERNEL__ + +struct task_struct; +extern void FASTCALL(__switch_to(struct task_struct *prev, + struct task_struct *next)); + +#define prepare_to_switch() \ +do { \ + struct thread_struct *__t = ¤t->thread; \ + __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (*(int *)&__t->fs) ); \ + __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (*(int *)&__t->gs) ); \ +} while (0) +#define switch_to(prev,next,last) do { \ + asm volatile("pushl %%esi\n\t" \ + "pushl %%edi\n\t" \ + "pushl %%ebp\n\t" \ + "movl %%esp,%0\n\t" /* save ESP */ \ + "movl %3,%%esp\n\t" /* restore ESP */ \ + "movl $1f,%1\n\t" /* save EIP */ \ + "pushl %4\n\t" /* restore EIP */ \ + "jmp __switch_to\n" \ + "1:\t" \ + "popl %%ebp\n\t" \ + "popl %%edi\n\t" \ + "popl %%esi\n\t" \ + :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \ + "=b" (last) \ + :"m" (next->thread.esp),"m" (next->thread.eip), \ + "a" (prev), "d" (next), \ + "b" (prev)); \ +} while (0) + +#define _set_base(addr,base) do { unsigned long __pr; \ +__asm__ __volatile__ ("movw %%dx,%1\n\t" \ + "rorl $16,%%edx\n\t" \ + "movb %%dl,%2\n\t" \ + "movb %%dh,%3" \ + :"=&d" (__pr) \ + :"m" (*((addr)+2)), \ + "m" (*((addr)+4)), \ + "m" (*((addr)+7)), \ + "0" (base) \ + ); } while(0) + +#define _set_limit(addr,limit) do { unsigned long __lr; \ +__asm__ __volatile__ ("movw %%dx,%1\n\t" \ + "rorl $16,%%edx\n\t" \ + "movb %2,%%dh\n\t" \ + "andb $0xf0,%%dh\n\t" \ + "orb %%dh,%%dl\n\t" \ + "movb %%dl,%2" \ + :"=&d" (__lr) \ + :"m" (*(addr)), \ + "m" (*((addr)+6)), \ + "0" (limit) \ + ); } while(0) + +#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) ) +#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1)>>12 ) + +static inline unsigned long _get_base(char * addr) +{ + unsigned long __base; + __asm__("movb %3,%%dh\n\t" + "movb %2,%%dl\n\t" + "shll $16,%%edx\n\t" + "movw %1,%%dx" + :"=&d" (__base) + :"m" (*((addr)+2)), + "m" (*((addr)+4)), + "m" (*((addr)+7))); + return __base; +} + +#define get_base(ldt) _get_base( ((char *)&(ldt)) ) + +/* + * Load a segment. Fall back on loading the zero + * segment if something goes wrong.. + */ +#define loadsegment(seg,value) \ + asm volatile("\n" \ + "1:\t" \ + "movl %0,%%" #seg "\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3:\t" \ + "pushl $0\n\t" \ + "popl %%" #seg "\n\t" \ + "jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n\t" \ + ".align 4\n\t" \ + ".long 1b,3b\n" \ + ".previous" \ + : :"m" (*(unsigned int *)&(value))) + +#define clts() ((void)0) +#define read_cr0() ({ \ + unsigned int __dummy; \ + __asm__( \ + "movl %%cr0,%0\n\t" \ + :"=r" (__dummy)); \ + __dummy; \ +}) +#define write_cr0(x) \ + __asm__("movl %0,%%cr0": :"r" (x)); + +#define read_cr4() ({ \ + unsigned int __dummy; \ + __asm__( \ + "movl %%cr4,%0\n\t" \ + :"=r" (__dummy)); \ + __dummy; \ +}) +#define write_cr4(x) \ + __asm__("movl %0,%%cr4": :"r" (x)); +#define stts() (HYPERVISOR_fpu_taskswitch()) + +#endif /* __KERNEL__ */ + +#define wbinvd() \ + __asm__ __volatile__ ("wbinvd": : :"memory"); + +static inline unsigned long get_limit(unsigned long segment) +{ + unsigned long __limit; + __asm__("lsll %1,%0" + :"=r" (__limit):"r" (segment)); + return __limit+1; +} + +#define nop() __asm__ __volatile__ ("nop") + +#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) + +#define tas(ptr) (xchg((ptr),1)) + +struct __xchg_dummy { unsigned long a[100]; }; +#define __xg(x) ((struct __xchg_dummy *)(x)) + + +/* + * The semantics of XCHGCMP8B are a bit strange, this is why + * there is a loop and the loading of %%eax and %%edx has to + * be inside. This inlines well in most cases, the cached + * cost is around ~38 cycles. (in the future we might want + * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that + * might have an implicit FPU-save as a cost, so it's not + * clear which path to go.) + * + * chmxchg8b must be used with the lock prefix here to allow + * the instruction to be executed atomically, see page 3-102 + * of the instruction set reference 24319102.pdf. We need + * the reader side to see the coherent 64bit value. + */ +static inline void __set_64bit (unsigned long long * ptr, + unsigned int low, unsigned int high) +{ + __asm__ __volatile__ ( + "\n1:\t" + "movl (%0), %%eax\n\t" + "movl 4(%0), %%edx\n\t" + "lock cmpxchg8b (%0)\n\t" + "jnz 1b" + : /* no outputs */ + : "D"(ptr), + "b"(low), + "c"(high) + : "ax","dx","memory"); +} + +static inline void __set_64bit_constant (unsigned long long *ptr, + unsigned long long value) +{ + __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL)); +} +#define ll_low(x) *(((unsigned int*)&(x))+0) +#define ll_high(x) *(((unsigned int*)&(x))+1) + +static inline void __set_64bit_var (unsigned long long *ptr, + unsigned long long value) +{ + __set_64bit(ptr,ll_low(value), ll_high(value)); +} + +#define set_64bit(ptr,value) \ +(__builtin_constant_p(value) ? \ + __set_64bit_constant(ptr, value) : \ + __set_64bit_var(ptr, value) ) + +#define _set_64bit(ptr,value) \ +(__builtin_constant_p(value) ? \ + __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \ + __set_64bit(ptr, ll_low(value), ll_high(value)) ) + +/* + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway + * Note 2: xchg has side effect, so that attribute volatile is necessary, + * but generally the primitive is invalid, *ptr is output argument. --ANK + */ +static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) +{ + switch (size) { + case 1: + __asm__ __volatile__("xchgb %b0,%1" + :"=q" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 2: + __asm__ __volatile__("xchgw %w0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 4: + __asm__ __volatile__("xchgl %0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + } + return x; +} + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ + +#ifdef CONFIG_X86_CMPXCHG +#define __HAVE_ARCH_CMPXCHG 1 + +static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + +#define cmpxchg(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) + +#else +/* Compiling for a 386 proper. Is it worth implementing via cli/sti? */ +#endif + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + * + * For now, "wmb()" doesn't actually do anything, as all + * Intel CPU's follow what Intel calls a *Processor Order*, + * in which all writes are seen in the program order even + * outside the CPU. + * + * I expect future Intel CPU's to have a weaker ordering, + * but I'd also expect them to finally get their act together + * and add some real memory barriers if so. + * + * Some non intel clones support out of order store. wmb() ceases to be a + * nop for these. + */ + +#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#define rmb() mb() + +#ifdef CONFIG_X86_OOSTORE +#define wmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#else +#define wmb() __asm__ __volatile__ ("": : :"memory") +#endif + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#define set_mb(var, value) do { xchg(&var, value); } while (0) +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define set_mb(var, value) do { var = value; barrier(); } while (0) +#endif + +#define set_wmb(var, value) do { var = value; wmb(); } while (0) + + +#define __cli() \ +do { \ + clear_bit(EVENTS_MASTER_ENABLE_BIT, &HYPERVISOR_shared_info->events_mask);\ + barrier(); \ +} while (0) + +#define __sti() \ +do { \ + shared_info_t *_shared = HYPERVISOR_shared_info; \ + set_bit(EVENTS_MASTER_ENABLE_BIT, &_shared->events_mask); \ + barrier(); \ + if ( unlikely(_shared->events) ) do_hypervisor_callback(NULL); \ +} while (0) + +#define __save_flags(x) \ +do { \ + (x) = test_bit(EVENTS_MASTER_ENABLE_BIT, \ + &HYPERVISOR_shared_info->events_mask); \ + barrier(); \ +} while (0) + +#define __restore_flags(x) do { if (x) __sti(); } while (0) + +#define safe_halt() ((void)0) + +#define __save_and_cli(x) do { __save_flags(x); __cli(); } while(0); +#define __save_and_sti(x) do { __save_flags(x); __sti(); } while(0); + +#define local_irq_save(x) \ +do { \ + (x) = test_and_clear_bit(EVENTS_MASTER_ENABLE_BIT, \ + &HYPERVISOR_shared_info->events_mask); \ + barrier(); \ +} while (0) +#define local_irq_restore(x) __restore_flags(x) +#define local_irq_disable() __cli() +#define local_irq_enable() __sti() + + +#ifdef CONFIG_SMP +#error no SMP +extern void __global_cli(void); +extern void __global_sti(void); +extern unsigned long __global_save_flags(void); +extern void __global_restore_flags(unsigned long); +#define cli() __global_cli() +#define sti() __global_sti() +#define save_flags(x) ((x)=__global_save_flags()) +#define restore_flags(x) __global_restore_flags(x) +#define save_and_cli(x) do { save_flags(x); cli(); } while(0); +#define save_and_sti(x) do { save_flags(x); sti(); } while(0); + +#else + +#define cli() __cli() +#define sti() __sti() +#define save_flags(x) __save_flags(x) +#define restore_flags(x) __restore_flags(x) +#define save_and_cli(x) __save_and_cli(x) +#define save_and_sti(x) __save_and_sti(x) + +#endif + +/* + * disable hlt during certain critical i/o operations + */ +#define HAVE_DISABLE_HLT +void disable_hlt(void); +void enable_hlt(void); + +extern unsigned long dmi_broken; +extern int is_sony_vaio_laptop; + +#define BROKEN_ACPI_Sx 0x0001 +#define BROKEN_INIT_AFTER_S1 0x0002 +#define BROKEN_PNP_BIOS 0x0004 + +#endif diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/vga.h b/xenolinux-2.4.25-sparse/include/asm-xen/vga.h new file mode 100644 index 0000000000..d0624cf480 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/vga.h @@ -0,0 +1,42 @@ +/* + * Access to VGA videoram + * + * (c) 1998 Martin Mares + */ + +#ifndef _LINUX_ASM_VGA_H_ +#define _LINUX_ASM_VGA_H_ + +#include + +extern unsigned char *vgacon_mmap; + +static unsigned long VGA_MAP_MEM(unsigned long x) +{ + if( vgacon_mmap == NULL ) + { + /* This is our first time in this function. This whole thing + is a rather grim hack. We know we're going to get asked + to map a 32KB region between 0xb0000 and 0xb8000 because + that's what VGAs are. We used the boot time permanent + fixed map region, and map it to machine pages. + */ + if( x != 0xb8000 ) + panic("Argghh! VGA Console is weird. 1:%08lx\n",x); + + vgacon_mmap = (unsigned char*) bt_ioremap( 0xa0000, 128*1024 ); + return (unsigned long) (vgacon_mmap+x-0xa0000); + } + else + { + if( x != 0xc0000 && x != 0xa0000 ) /* vidmem_end or charmap fonts */ + panic("Argghh! VGA Console is weird. 2:%08lx\n",x); + return (unsigned long) (vgacon_mmap+x-0xa0000); + } + return 0; +} + +static inline unsigned char vga_readb(unsigned char * x) { return (*(x)); } +static inline void vga_writeb(unsigned char x, unsigned char *y) { *(y) = (x); } + +#endif diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/xeno_proc.h b/xenolinux-2.4.25-sparse/include/asm-xen/xeno_proc.h new file mode 100644 index 0000000000..d62791e95c --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/xeno_proc.h @@ -0,0 +1,13 @@ + +#ifndef __ASM_XEN_PROC_H__ +#define __ASM_XEN_PROC_H__ + +#include +#include + +extern struct proc_dir_entry *create_xen_proc_entry( + const char *name, mode_t mode); +extern void remove_xen_proc_entry( + const char *name); + +#endif /* __ASM_XEN_PROC_H__ */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/bugs.h b/xenolinux-2.4.25-sparse/include/asm-xeno/bugs.h deleted file mode 100644 index c46b6a0b15..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/bugs.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * include/asm-i386/bugs.h - * - * Copyright (C) 1994 Linus Torvalds - * - * Cyrix stuff, June 1998 by: - * - Rafael R. Reilova (moved everything from head.S), - * - * - Channing Corn (tests & fixes), - * - Andrew D. Balsa (code cleanup). - * - * Pentium III FXSR, SSE support - * Gareth Hughes , May 2000 - */ - -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - */ - -#include -#include -#include -#include - - -static void __init check_fpu(void) -{ - boot_cpu_data.fdiv_bug = 0; -} - -static void __init check_hlt(void) -{ - boot_cpu_data.hlt_works_ok = 1; -} - -static void __init check_bugs(void) -{ - extern void __init boot_init_fpu(void); - - identify_cpu(&boot_cpu_data); - boot_init_fpu(); -#ifndef CONFIG_SMP - printk("CPU: "); - print_cpu_info(&boot_cpu_data); -#endif - check_fpu(); - check_hlt(); - system_utsname.machine[1] = '0' + - (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); -} diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/control_if.h b/xenolinux-2.4.25-sparse/include/asm-xeno/control_if.h deleted file mode 100644 index dd15a96bff..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/control_if.h +++ /dev/null @@ -1,32 +0,0 @@ -/****************************************************************************** - * control_if.h - * - * Copyright (c) 2004, K A Fraser - */ - -#ifndef __CONTROL_IF_H__ -#define __CONTROL_IF_H__ - -typedef struct { - u8 type; /* echoed in response */ - u8 subtype; /* echoed in response */ - u8 id; /* echoed in response */ - u8 length; /* number of bytes in 'msg' */ - unsigned char msg[60]; /* type-specific message data */ -} control_msg_t; - -#define CONTROL_RING_SIZE 8 -typedef unsigned int CONTROL_RING_IDX; -#define MASK_CONTROL_IDX(_i) ((_i)&(CONTROL_RING_SIZE-1)) - -typedef struct { - control_msg_t tx_ring[CONTROL_RING_SIZE]; /* guest-OS -> controller */ - control_msg_t rx_ring[CONTROL_RING_SIZE]; /* controller -> guest-OS */ - CONTROL_RING_IDX tx_req_prod, tx_resp_prod; - CONTROL_RING_IDX rx_req_prod, rx_resp_prod; -} control_if_t; - -#define CMSG_CONSOLE 0 -#define CMSG_CONSOLE_DATA 0 - -#endif /* __CONTROL_IF_H__ */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/desc.h b/xenolinux-2.4.25-sparse/include/asm-xeno/desc.h deleted file mode 100644 index 33309a9671..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/desc.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef __ARCH_DESC_H -#define __ARCH_DESC_H - -#include - -#ifndef __ASSEMBLY__ - -struct desc_struct { - unsigned long a,b; -}; - -struct Xgt_desc_struct { - unsigned short size; - unsigned long address __attribute__((packed)); -}; - -extern struct desc_struct default_ldt[]; - -static inline void clear_LDT(void) -{ - /* - * NB. We load the default_ldt for lcall7/27 handling on demand, as - * it slows down context switching. Noone uses it anyway. - */ - queue_set_ldt(0, 0); -} - -static inline void load_LDT(mm_context_t *pc) -{ - void *segments = pc->ldt; - int count = pc->size; - - if ( count == 0 ) - segments = NULL; - - queue_set_ldt((unsigned long)segments, count); -} - -#endif /* __ASSEMBLY__ */ - -#endif /* __ARCH_DESC_H__ */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/evtchn.h b/xenolinux-2.4.25-sparse/include/asm-xeno/evtchn.h deleted file mode 100644 index 88c278d86e..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/evtchn.h +++ /dev/null @@ -1,29 +0,0 @@ -/****************************************************************************** - * evtchn.h - * - * Driver for receiving and demuxing event-channel signals. - * - * Copyright (c) 2004, K A Fraser - */ - -#ifndef __ASM_EVTCHN_H__ -#define __ASM_EVTCHN_H__ - -typedef void (*evtchn_receiver_t)(unsigned int); -#define PORT_NORMAL 0x0000 -#define PORT_DISCONNECT 0x8000 -#define PORTIDX_MASK 0x7fff - -/* /dev/xen/evtchn resides at device number major=10, minor=200 */ -#define EVTCHN_MINOR 200 - -/* /dev/xen/evtchn ioctls: */ -/* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */ -#define EVTCHN_RESET _IO('E', 1) - -int evtchn_request_port(unsigned int port, evtchn_receiver_t rx_fn); -int evtchn_free_port(unsigned int port); -void evtchn_clear_port(unsigned int port); - - -#endif /* __ASM_EVTCHN_H__ */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/fixmap.h b/xenolinux-2.4.25-sparse/include/asm-xeno/fixmap.h deleted file mode 100644 index 2441b01d4e..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/fixmap.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * fixmap.h: compile-time virtual memory allocation - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1998 Ingo Molnar - * - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - */ - -#ifndef _ASM_FIXMAP_H -#define _ASM_FIXMAP_H - -#include -#include -#include -#include - -/* - * Here we define all the compile-time 'special' virtual - * addresses. The point is to have a constant address at - * compile time, but to set the physical address only - * in the boot process. We allocate these special addresses - * from the end of virtual memory (0xfffff000) backwards. - * Also this lets us do fail-safe vmalloc(), we - * can guarantee that these special addresses and - * vmalloc()-ed addresses never overlap. - * - * these 'compile-time allocated' memory buffers are - * fixed-size 4k pages. (or larger if used with an increment - * highger than 1) use fixmap_set(idx,phys) to associate - * physical memory with fixmap indices. - * - * TLB entries of such buffers will not be flushed across - * task switches. - */ - -enum fixed_addresses { -#ifdef CONFIG_HIGHMEM_XXX - FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ - FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, -#endif - FIX_BLKRING_BASE, - FIX_NETRING0_BASE, - FIX_NETRING1_BASE, - FIX_NETRING2_BASE, - FIX_NETRING3_BASE, - FIX_SHARED_INFO, - -#ifdef CONFIG_VGA_CONSOLE -#define NR_FIX_BTMAPS 32 /* 128KB For the Dom0 VGA Console A0000-C0000 */ -#else -#define NR_FIX_BTMAPS 1 /* in case anyone wants it in future... */ -#endif - FIX_BTMAP_END, - FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1, - /* our bt_ioremap is permanent, unlike other architectures */ - - __end_of_permanent_fixed_addresses, - __end_of_fixed_addresses = __end_of_permanent_fixed_addresses -}; - -extern void __set_fixmap (enum fixed_addresses idx, - unsigned long phys, pgprot_t flags); - -#define set_fixmap(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL) -/* - * Some hardware wants to get fixmapped without caching. - */ -#define set_fixmap_nocache(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) - -extern void clear_fixmap(enum fixed_addresses idx); - -/* - * used by vmalloc.c. - * - * Leave one empty page between vmalloc'ed areas and - * the start of the fixmap, and leave one page empty - * at the top of mem.. - */ -#define FIXADDR_TOP (HYPERVISOR_VIRT_START - 2*PAGE_SIZE) -#define __FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) - -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) - -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without tranlation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static inline unsigned long fix_to_virt(unsigned int idx) -{ - return __fix_to_virt(idx); -} - -#endif diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/highmem.h b/xenolinux-2.4.25-sparse/include/asm-xeno/highmem.h deleted file mode 100644 index 7e56b1b32d..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/highmem.h +++ /dev/null @@ -1,2 +0,0 @@ -#error "Highmem unsupported!" - diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/hw_irq.h b/xenolinux-2.4.25-sparse/include/asm-xeno/hw_irq.h deleted file mode 100644 index d99d15bd24..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/hw_irq.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef _ASM_HW_IRQ_H -#define _ASM_HW_IRQ_H - -/* - * linux/include/asm/hw_irq.h - * - * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar - */ - -#include -#include -#include -#include - -#define SYSCALL_VECTOR 0x80 - -extern int irq_vector[NR_IRQS]; - -extern atomic_t irq_err_count; -extern atomic_t irq_mis_count; - -extern char _stext, _etext; - -extern unsigned long prof_cpu_mask; -extern unsigned int * prof_buffer; -extern unsigned long prof_len; -extern unsigned long prof_shift; - -/* - * x86 profiling function, SMP safe. We might want to do this in - * assembly totally? - */ -static inline void x86_do_profile (unsigned long eip) -{ - if (!prof_buffer) - return; - - /* - * Only measure the CPUs specified by /proc/irq/prof_cpu_mask. - * (default is all CPUs.) - */ - if (!((1<>= prof_shift; - /* - * Don't ignore out-of-bounds EIP values silently, - * put them into the last histogram slot, so if - * present, they will show up as a sharp peak. - */ - if (eip > prof_len-1) - eip = prof_len-1; - atomic_inc((atomic_t *)&prof_buffer[eip]); -} - -static inline void hw_resend_irq(struct hw_interrupt_type *h, - unsigned int i) -{} - -#endif /* _ASM_HW_IRQ_H */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/hypervisor.h b/xenolinux-2.4.25-sparse/include/asm-xeno/hypervisor.h deleted file mode 100644 index fe13471aa5..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/hypervisor.h +++ /dev/null @@ -1,465 +0,0 @@ -/****************************************************************************** - * hypervisor.h - * - * Linux-specific hypervisor handling. - * - * Copyright (c) 2002, K A Fraser - */ - -#ifndef __HYPERVISOR_H__ -#define __HYPERVISOR_H__ - -#include -#include -#include -#include -#include -#include - -/* arch/xeno/kernel/setup.c */ -union start_info_union -{ - start_info_t start_info; - char padding[512]; -}; -extern union start_info_union start_info_union; -#define start_info (start_info_union.start_info) - -/* arch/xeno/kernel/hypervisor.c */ -void do_hypervisor_callback(struct pt_regs *regs); - - -/* arch/xeno/mm/hypervisor.c */ -/* - * NB. ptr values should be PHYSICAL, not MACHINE. 'vals' should be already - * be MACHINE addresses. - */ - -extern unsigned int mmu_update_queue_idx; - -void queue_l1_entry_update(pte_t *ptr, unsigned long val); -void queue_l2_entry_update(pmd_t *ptr, unsigned long val); -void queue_pt_switch(unsigned long ptr); -void queue_tlb_flush(void); -void queue_invlpg(unsigned long ptr); -void queue_pgd_pin(unsigned long ptr); -void queue_pgd_unpin(unsigned long ptr); -void queue_pte_pin(unsigned long ptr); -void queue_pte_unpin(unsigned long ptr); -void queue_set_ldt(unsigned long ptr, unsigned long bytes); -#define MMU_UPDATE_DEBUG 0 - -#if MMU_UPDATE_DEBUG > 0 -typedef struct { - void *ptr; - unsigned long val, pteval; - void *ptep; - int line; char *file; -} page_update_debug_t; -extern page_update_debug_t update_debug_queue[]; -#define queue_l1_entry_update(_p,_v) ({ \ - update_debug_queue[mmu_update_queue_idx].ptr = (_p); \ - update_debug_queue[mmu_update_queue_idx].val = (_v); \ - update_debug_queue[mmu_update_queue_idx].line = __LINE__; \ - update_debug_queue[mmu_update_queue_idx].file = __FILE__; \ - queue_l1_entry_update((_p),(_v)); \ -}) -#define queue_l2_entry_update(_p,_v) ({ \ - update_debug_queue[mmu_update_queue_idx].ptr = (_p); \ - update_debug_queue[mmu_update_queue_idx].val = (_v); \ - update_debug_queue[mmu_update_queue_idx].line = __LINE__; \ - update_debug_queue[mmu_update_queue_idx].file = __FILE__; \ - queue_l2_entry_update((_p),(_v)); \ -}) -#endif - -#if MMU_UPDATE_DEBUG > 1 -#undef queue_l1_entry_update -#undef queue_l2_entry_update -#define queue_l1_entry_update(_p,_v) ({ \ - update_debug_queue[mmu_update_queue_idx].ptr = (_p); \ - update_debug_queue[mmu_update_queue_idx].val = (_v); \ - update_debug_queue[mmu_update_queue_idx].line = __LINE__; \ - update_debug_queue[mmu_update_queue_idx].file = __FILE__; \ - printk("L1 %s %d: %08lx (%08lx -> %08lx)\n", __FILE__, __LINE__, \ - (_p), pte_val(_p), \ - (unsigned long)(_v)); \ - queue_l1_entry_update((_p),(_v)); \ -}) -#define queue_l2_entry_update(_p,_v) ({ \ - update_debug_queue[mmu_update_queue_idx].ptr = (_p); \ - update_debug_queue[mmu_update_queue_idx].val = (_v); \ - update_debug_queue[mmu_update_queue_idx].line = __LINE__; \ - update_debug_queue[mmu_update_queue_idx].file = __FILE__; \ - printk("L2 %s %d: %08lx (%08lx -> %08lx)\n", __FILE__, __LINE__, \ - (_p), pmd_val(_p), \ - (unsigned long)(_v)); \ - queue_l2_entry_update((_p),(_v)); \ -}) -#define queue_pt_switch(_p) ({ \ - printk("PTSWITCH %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \ - queue_pt_switch(_p); \ -}) -#define queue_tlb_flush() ({ \ - printk("TLB FLUSH %s %d\n", __FILE__, __LINE__); \ - queue_tlb_flush(); \ -}) -#define queue_invlpg(_p) ({ \ - printk("INVLPG %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \ - queue_invlpg(_p); \ -}) -#define queue_pgd_pin(_p) ({ \ - printk("PGD PIN %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \ - queue_pgd_pin(_p); \ -}) -#define queue_pgd_unpin(_p) ({ \ - printk("PGD UNPIN %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \ - queue_pgd_unpin(_p); \ -}) -#define queue_pte_pin(_p) ({ \ - printk("PTE PIN %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \ - queue_pte_pin(_p); \ -}) -#define queue_pte_unpin(_p) ({ \ - printk("PTE UNPIN %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \ - queue_pte_unpin(_p); \ -}) -#define queue_set_ldt(_p,_l) ({ \ - printk("SETL LDT %s %d: %08lx %d\n", __FILE__, __LINE__, (_p), (_l)); \ - queue_set_ldt((_p), (_l)); \ -}) -#endif - -void _flush_page_update_queue(void); -static inline int flush_page_update_queue(void) -{ - unsigned int idx = mmu_update_queue_idx; - if ( idx != 0 ) _flush_page_update_queue(); - return idx; -} -#define XENO_flush_page_update_queue() (_flush_page_update_queue()) -void MULTICALL_flush_page_update_queue(void); - - -/* - * Assembler stubs for hyper-calls. - */ - -static inline int HYPERVISOR_set_trap_table(trap_info_t *table) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_set_trap_table), - "b" (table) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_mmu_update(mmu_update_t *req, int count) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_mmu_update), - "b" (req), "c" (count) : "memory" ); - - if ( unlikely(ret < 0) ) - { - extern void show_trace(unsigned long *); - show_trace(NULL); - panic("Failed mmu update: %p, %d", req, count); - } - - return ret; -} - -static inline int HYPERVISOR_set_gdt(unsigned long *frame_list, int entries) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_set_gdt), - "b" (frame_list), "c" (entries) : "memory" ); - - - return ret; -} - -static inline int HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_stack_switch), - "b" (ss), "c" (esp) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_set_callbacks( - unsigned long event_selector, unsigned long event_address, - unsigned long failsafe_selector, unsigned long failsafe_address) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_set_callbacks), - "b" (event_selector), "c" (event_address), - "d" (failsafe_selector), "S" (failsafe_address) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_net_io_op(netop_t *op) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_net_io_op), - "b" (op) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_fpu_taskswitch(void) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_fpu_taskswitch) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_yield(void) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_sched_op), - "b" (SCHEDOP_yield) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_block(void) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_sched_op), - "b" (SCHEDOP_block) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_exit(void) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_sched_op), - "b" (SCHEDOP_exit) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_stop(unsigned long srec) -{ - int ret; - /* NB. On suspend, control software expects a suspend record in %esi. */ - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_sched_op), - "b" (SCHEDOP_stop), "S" (srec) : "memory" ); - - return ret; -} - -static inline long HYPERVISOR_set_timer_op(u64 timeout) -{ - int ret; - unsigned long timeout_hi = (unsigned long)(timeout>>32); - unsigned long timeout_lo = (unsigned long)timeout; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_set_timer_op), - "b" (timeout_hi), "c" (timeout_lo) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_dom0_op(dom0_op_t *dom0_op) -{ - int ret; - dom0_op->interface_version = DOM0_INTERFACE_VERSION; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_dom0_op), - "b" (dom0_op) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_network_op(void *network_op) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_network_op), - "b" (network_op) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_block_io_op(void *block_io_op) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_block_io_op), - "b" (block_io_op) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_set_debugreg(int reg, unsigned long value) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_set_debugreg), - "b" (reg), "c" (value) : "memory" ); - - return ret; -} - -static inline unsigned long HYPERVISOR_get_debugreg(int reg) -{ - unsigned long ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_get_debugreg), - "b" (reg) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_update_descriptor( - unsigned long pa, unsigned long word1, unsigned long word2) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_update_descriptor), - "b" (pa), "c" (word1), "d" (word2) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_set_fast_trap(int idx) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_set_fast_trap), - "b" (idx) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_dom_mem_op(void *dom_mem_op) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_dom_mem_op), - "b" (dom_mem_op) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_multicall(void *call_list, int nr_calls) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_multicall), - "b" (call_list), "c" (nr_calls) : "memory" ); - - return ret; -} - -static inline long HYPERVISOR_kbd_op(unsigned char op, unsigned char val) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_kbd_op), - "b" (op), "c" (val) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_update_va_mapping( - unsigned long page_nr, pte_t new_val, unsigned long flags) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_update_va_mapping), - "b" (page_nr), "c" ((new_val).pte_low), "d" (flags) : "memory" ); - - if ( unlikely(ret < 0) ) - panic("Failed update VA mapping: %08lx, %08lx, %08lx", - page_nr, (new_val).pte_low, flags); - - return ret; -} - -static inline int HYPERVISOR_event_channel_op(void *op) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_event_channel_op), - "b" (op) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_xen_version(int cmd) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_xen_version), - "b" (cmd) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_console_io(int cmd, int count, char *str) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_console_io), - "b" (cmd), "c" (count), "d" (str) : "memory" ); - - return ret; -} - -static inline int HYPERVISOR_physdev_op(void *physdev_op) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_physdev_op), - "b" (physdev_op) : "memory" ); - - return ret; -} - -#endif /* __HYPERVISOR_H__ */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/io.h b/xenolinux-2.4.25-sparse/include/asm-xeno/io.h deleted file mode 100644 index 3d78e20950..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/io.h +++ /dev/null @@ -1,430 +0,0 @@ -#ifndef _ASM_IO_H -#define _ASM_IO_H - -#include - -/* - * This file contains the definitions for the x86 IO instructions - * inb/inw/inl/outb/outw/outl and the "string versions" of the same - * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" - * versions of the single-IO instructions (inb_p/inw_p/..). - * - * This file is not meant to be obfuscating: it's just complicated - * to (a) handle it all in a way that makes gcc able to optimize it - * as well as possible and (b) trying to avoid writing the same thing - * over and over again with slight variations and possibly making a - * mistake somewhere. - */ - -/* - * Thanks to James van Artsdalen for a better timing-fix than - * the two short jumps: using outb's to a nonexistent port seems - * to guarantee better timings even on fast machines. - * - * On the other hand, I'd like to be sure of a non-existent port: - * I feel a bit unsafe about using 0x80 (should be safe, though) - * - * Linus - */ - - /* - * Bit simplified and optimized by Jan Hubicka - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. - * - * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, - * isa_read[wl] and isa_write[wl] fixed - * - Arnaldo Carvalho de Melo - */ - -#define IO_SPACE_LIMIT 0xffff - -#define XQUAD_PORTIO_BASE 0xfe400000 -#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ -#define XQUAD_PORTIO_LEN 0x80000 /* Only remapping first 2 quads */ - -#ifdef __KERNEL__ - -#include - -/* - * Temporary debugging check to catch old code using - * unmapped ISA addresses. Will be removed in 2.4. - */ -#if CONFIG_DEBUG_IOVIRT - extern void *__io_virt_debug(unsigned long x, const char *file, int line); - extern unsigned long __io_phys_debug(unsigned long x, const char *file, int line); - #define __io_virt(x) __io_virt_debug((unsigned long)(x), __FILE__, __LINE__) -//#define __io_phys(x) __io_phys_debug((unsigned long)(x), __FILE__, __LINE__) -#else - #define __io_virt(x) ((void *)(x)) -//#define __io_phys(x) __pa(x) -#endif - -/** - * virt_to_phys - map virtual addresses to physical - * @address: address to remap - * - * The returned physical address is the physical (CPU) mapping for - * the memory address given. It is only valid to use this function on - * addresses directly mapped or allocated via kmalloc. - * - * This function does not give bus mappings for DMA transfers. In - * almost all conceivable cases a device driver should not be using - * this function - */ - -static inline unsigned long virt_to_phys(volatile void * address) -{ - return __pa(address); -} - -/** - * phys_to_virt - map physical address to virtual - * @address: address to remap - * - * The returned virtual address is a current CPU mapping for - * the memory address given. It is only valid to use this function on - * addresses that have a kernel mapping - * - * This function does not handle bus mappings for DMA transfers. In - * almost all conceivable cases a device driver should not be using - * this function - */ - -static inline void * phys_to_virt(unsigned long address) -{ - return __va(address); -} - -/* - * Change "struct page" to physical address. - */ -#ifdef CONFIG_HIGHMEM64G -#define page_to_phys(page) ((u64)(page - mem_map) << PAGE_SHIFT) -#else -#define page_to_phys(page) ((page - mem_map) << PAGE_SHIFT) -#endif - -extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); - -/** - * ioremap - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - */ - -static inline void * ioremap (unsigned long offset, unsigned long size) -{ - return __ioremap(offset, size, 0); -} - -/** - * ioremap_nocache - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap_nocache performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - * - * This version of ioremap ensures that the memory is marked uncachable - * on the CPU as well as honouring existing caching rules from things like - * the PCI bus. Note that there are other caches and buffers on many - * busses. In paticular driver authors should read up on PCI writes - * - * It's useful if some control registers are in such an area and - * write combining or read caching is not desirable: - */ - -static inline void * ioremap_nocache (unsigned long offset, unsigned long size) -{ - return __ioremap(offset, size, _PAGE_PCD); -} - -extern void iounmap(void *addr); - -/* - * bt_ioremap() and bt_iounmap() are for temporary early boot-time - * mappings, before the real ioremap() is functional. - * A boot-time mapping is currently limited to at most 16 pages. - */ -extern void *bt_ioremap(unsigned long offset, unsigned long size); -extern void bt_iounmap(void *addr, unsigned long size); - -/* - * IO bus memory addresses are also 1:1 with the physical address - */ -#define virt_to_bus(_x) phys_to_machine(virt_to_phys(_x)) -#define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x)) -#define page_to_bus(_x) phys_to_machine(page_to_phys(_x)) - -/* - * readX/writeX() are used to access memory mapped devices. On some - * architectures the memory mapped IO stuff needs to be accessed - * differently. On the x86 architecture, we just read/write the - * memory location directly. - */ - -#define readb(addr) (*(volatile unsigned char *) __io_virt(addr)) -#define readw(addr) (*(volatile unsigned short *) __io_virt(addr)) -#define readl(addr) (*(volatile unsigned int *) __io_virt(addr)) -#define __raw_readb readb -#define __raw_readw readw -#define __raw_readl readl - -#define writeb(b,addr) (*(volatile unsigned char *) __io_virt(addr) = (b)) -#define writew(b,addr) (*(volatile unsigned short *) __io_virt(addr) = (b)) -#define writel(b,addr) (*(volatile unsigned int *) __io_virt(addr) = (b)) -#define __raw_writeb writeb -#define __raw_writew writew -#define __raw_writel writel - -#define memset_io(a,b,c) __memset(__io_virt(a),(b),(c)) -#define memcpy_fromio(a,b,c) __memcpy((a),__io_virt(b),(c)) -#define memcpy_toio(a,b,c) __memcpy(__io_virt(a),(b),(c)) - -/* - * ISA space is 'always mapped' on a typical x86 system, no need to - * explicitly ioremap() it. The fact that the ISA IO space is mapped - * to PAGE_OFFSET is pure coincidence - it does not mean ISA values - * are physical addresses. The following constant pointer can be - * used as the IO-area pointer (it can be iounmapped as well, so the - * analogy with PCI is quite large): - */ -#define __ISA_IO_base ((char *)(PAGE_OFFSET)) - -#define isa_readb(a) readb(__ISA_IO_base + (a)) -#define isa_readw(a) readw(__ISA_IO_base + (a)) -#define isa_readl(a) readl(__ISA_IO_base + (a)) -#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a)) -#define isa_writew(w,a) writew(w,__ISA_IO_base + (a)) -#define isa_writel(l,a) writel(l,__ISA_IO_base + (a)) -#define isa_memset_io(a,b,c) memset_io(__ISA_IO_base + (a),(b),(c)) -#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),__ISA_IO_base + (b),(c)) -#define isa_memcpy_toio(a,b,c) memcpy_toio(__ISA_IO_base + (a),(b),(c)) - - -/* - * Again, i386 does not require mem IO specific function. - */ - -#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(b),(c),(d)) -#define isa_eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(__ISA_IO_base + (b)),(c),(d)) - -/** - * check_signature - find BIOS signatures - * @io_addr: mmio address to check - * @signature: signature block - * @length: length of signature - * - * Perform a signature comparison with the mmio address io_addr. This - * address should have been obtained by ioremap. - * Returns 1 on a match. - */ - -static inline int check_signature(unsigned long io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - -/** - * isa_check_signature - find BIOS signatures - * @io_addr: mmio address to check - * @signature: signature block - * @length: length of signature - * - * Perform a signature comparison with the ISA mmio address io_addr. - * Returns 1 on a match. - * - * This function is deprecated. New drivers should use ioremap and - * check_signature. - */ - - -static inline int isa_check_signature(unsigned long io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (isa_readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - -/* - * Cache management - * - * This needed for two cases - * 1. Out of order aware processors - * 2. Accidentally out of order processors (PPro errata #51) - */ - -#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) - -static inline void flush_write_buffers(void) -{ - __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory"); -} - -#define dma_cache_inv(_start,_size) flush_write_buffers() -#define dma_cache_wback(_start,_size) flush_write_buffers() -#define dma_cache_wback_inv(_start,_size) flush_write_buffers() - -#else - -/* Nothing to do */ - -#define dma_cache_inv(_start,_size) do { } while (0) -#define dma_cache_wback(_start,_size) do { } while (0) -#define dma_cache_wback_inv(_start,_size) do { } while (0) -#define flush_write_buffers() - -#endif - -#endif /* __KERNEL__ */ - -#ifdef SLOW_IO_BY_JUMPING -#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:" -#else -#define __SLOW_DOWN_IO "\noutb %%al,$0x80" -#endif - -#ifdef REALLY_SLOW_IO -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO -#else -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO -#endif - -#ifdef CONFIG_MULTIQUAD -extern void *xquad_portio; /* Where the IO area was mapped */ -#endif /* CONFIG_MULTIQUAD */ - -/* - * Talk about misusing macros.. - */ -#define __OUT1(s,x) \ -static inline void out##s(unsigned x value, unsigned short port) { - -#define __OUT2(s,s1,s2) \ -__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" - -#if defined (CONFIG_MULTIQUAD) && !defined(STANDALONE) -#define __OUTQ(s,ss,x) /* Do the equivalent of the portio op on quads */ \ -static inline void out##ss(unsigned x value, unsigned short port) { \ - if (xquad_portio) \ - write##s(value, (unsigned long) xquad_portio + port); \ - else /* We're still in early boot, running on quad 0 */ \ - out##ss##_local(value, port); \ -} \ -static inline void out##ss##_quad(unsigned x value, unsigned short port, int quad) { \ - if (xquad_portio) \ - write##s(value, (unsigned long) xquad_portio + (XQUAD_PORTIO_QUAD*quad)\ - + port); \ -} - -#define __INQ(s,ss) /* Do the equivalent of the portio op on quads */ \ -static inline RETURN_TYPE in##ss(unsigned short port) { \ - if (xquad_portio) \ - return read##s((unsigned long) xquad_portio + port); \ - else /* We're still in early boot, running on quad 0 */ \ - return in##ss##_local(port); \ -} \ -static inline RETURN_TYPE in##ss##_quad(unsigned short port, int quad) { \ - if (xquad_portio) \ - return read##s((unsigned long) xquad_portio + (XQUAD_PORTIO_QUAD*quad)\ - + port); \ - else\ - return 0;\ -} -#endif /* CONFIG_MULTIQUAD && !STANDALONE */ - -#if !defined(CONFIG_MULTIQUAD) || defined(STANDALONE) -#define __OUT(s,s1,x) \ -__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ -__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} -#else -/* Make the default portio routines operate on quad 0 */ -#define __OUT(s,s1,x) \ -__OUT1(s##_local,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ -__OUT1(s##_p_local,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \ -__OUTQ(s,s,x) \ -__OUTQ(s,s##_p,x) -#endif /* !CONFIG_MULTIQUAD || STANDALONE */ - -#define __IN1(s) \ -static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; - -#define __IN2(s,s1,s2) \ -__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" - -#if !defined(CONFIG_MULTIQUAD) || defined(STANDALONE) -#define __IN(s,s1,i...) \ -__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ -__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } -#else -/* Make the default portio routines operate on quad 0 */ -#define __IN(s,s1,i...) \ -__IN1(s##_local) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ -__IN1(s##_p_local) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ -__INQ(s,s) \ -__INQ(s,s##_p) -#endif /* !CONFIG_MULTIQUAD || STANDALONE */ - -#define __INS(s) \ -static inline void ins##s(unsigned short port, void * addr, unsigned long count) \ -{ __asm__ __volatile__ ("rep ; ins" #s \ -: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } - -#define __OUTS(s) \ -static inline void outs##s(unsigned short port, const void * addr, unsigned long count) \ -{ __asm__ __volatile__ ("rep ; outs" #s \ -: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } - -#define RETURN_TYPE unsigned char -__IN(b,"") -#undef RETURN_TYPE -#define RETURN_TYPE unsigned short -__IN(w,"") -#undef RETURN_TYPE -#define RETURN_TYPE unsigned int -__IN(l,"") -#undef RETURN_TYPE - -__OUT(b,"b",char) -__OUT(w,"w",short) -__OUT(l,,int) - -__INS(b) -__INS(w) -__INS(l) - -__OUTS(b) -__OUTS(w) -__OUTS(l) - -#endif diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/irq.h b/xenolinux-2.4.25-sparse/include/asm-xeno/irq.h deleted file mode 100644 index 917a05334d..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/irq.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef _ASM_IRQ_H -#define _ASM_IRQ_H - -/* - * linux/include/asm/irq.h - * - * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar - * - * IRQ/IPI changes taken from work by Thomas Radke - * - */ - -#include -#include -#include - -#define NR_IRQS 256 - -#define PHYS_IRQ_BASE 0 -#define NR_PHYS_IRQS 128 - -#define HYPEREVENT_IRQ_BASE 128 -#define NR_HYPEREVENT_IRQS 128 - -#define HYPEREVENT_IRQ(_ev) ((_ev) + HYPEREVENT_IRQ_BASE) -#define HYPEREVENT_FROM_IRQ(_irq) ((_irq) - HYPEREVENT_IRQ_BASE) - -extern void physirq_init(void); - -#define irq_cannonicalize(_irq) (_irq) - -extern void disable_irq(unsigned int); -extern void disable_irq_nosync(unsigned int); -extern void enable_irq(unsigned int); - -#ifdef CONFIG_X86_LOCAL_APIC -#define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ -#endif - -#endif /* _ASM_IRQ_H */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/keyboard.h b/xenolinux-2.4.25-sparse/include/asm-xeno/keyboard.h deleted file mode 100644 index 82e10faab2..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/keyboard.h +++ /dev/null @@ -1,98 +0,0 @@ -/* xenolinux/include/asm-xeno/keyboard.h */ -/* Portions copyright (c) 2003 James Scott, Intel Research Cambridge */ -/* - * Talks to hypervisor to get PS/2 keyboard and mouse events, and send keyboard - * and mouse commands - */ - -/* Based on: - * linux/include/asm-i386/keyboard.h - * - * Created 3 Nov 1996 by Geert Uytterhoeven - */ - -#ifndef _XENO_KEYBOARD_H -#define _XENO_KEYBOARD_H - -#ifdef __KERNEL__ - -#include -#include -#include -#include -#include - -extern int pckbd_setkeycode(unsigned int scancode, unsigned int keycode); -extern int pckbd_getkeycode(unsigned int scancode); -extern int pckbd_translate(unsigned char scancode, unsigned char *keycode, - char raw_mode); -extern char pckbd_unexpected_up(unsigned char keycode); -extern void pckbd_leds(unsigned char leds); -extern void pckbd_init_hw(void); -extern int pckbd_pm_resume(struct pm_dev *, pm_request_t, void *); - -extern pm_callback pm_kbd_request_override; -extern unsigned char pckbd_sysrq_xlate[128]; - -#define kbd_setkeycode pckbd_setkeycode -#define kbd_getkeycode pckbd_getkeycode -#define kbd_translate pckbd_translate -#define kbd_unexpected_up pckbd_unexpected_up -#define kbd_leds pckbd_leds -#define kbd_init_hw pckbd_init_hw -#define kbd_sysrq_xlate pckbd_sysrq_xlate - -#define SYSRQ_KEY 0x54 - - -/* THIS SECTION TALKS TO XEN TO DO PS2 SUPPORT */ -#include -#include - -#define kbd_controller_present xen_kbd_controller_present - -static inline int xen_kbd_controller_present () -{ - return start_info.flags & SIF_INITDOMAIN; -} - -/* resource allocation */ -#define kbd_request_region() \ - do { } while (0) -#define kbd_request_irq(handler) \ - request_irq(HYPEREVENT_IRQ(_EVENT_PS2), handler, 0, "ps/2", NULL) - -/* could implement these with command to xen to filter mouse stuff... */ -#define aux_request_irq(hand, dev_id) 0 -#define aux_free_irq(dev_id) do { } while(0) - -/* Some stoneage hardware needs delays after some operations. */ -#define kbd_pause() do { } while(0) - -static unsigned char kbd_current_scancode = 0; - -static unsigned char kbd_read_input(void) -{ - return kbd_current_scancode; -} - -static unsigned char kbd_read_status(void) -{ - long res; - res = HYPERVISOR_kbd_op(KBD_OP_READ,0); - if ( res<0 ) - { - kbd_current_scancode = 0; - return 0; /* error with our request - wrong domain? */ - } - kbd_current_scancode = KBD_CODE_SCANCODE(res); - return KBD_CODE_STATUS(res); -} - - -#define kbd_write_output(val) HYPERVISOR_kbd_op(KBD_OP_WRITEOUTPUT, val); -#define kbd_write_command(val) HYPERVISOR_kbd_op(KBD_OP_WRITECOMMAND, val); - - -#endif /* __KERNEL__ */ -#endif /* _XENO_KEYBOARD_H */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/mmu_context.h b/xenolinux-2.4.25-sparse/include/asm-xeno/mmu_context.h deleted file mode 100644 index 7972ce7d74..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/mmu_context.h +++ /dev/null @@ -1,74 +0,0 @@ -#ifndef __I386_MMU_CONTEXT_H -#define __I386_MMU_CONTEXT_H - -#include -#include -#include -#include - -/* - * hooks to add arch specific data into the mm struct. - * Note that destroy_context is called even if init_new_context - * fails. - */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm); -void destroy_context(struct mm_struct *mm); - -#ifdef CONFIG_SMP - -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) -{ - if(cpu_tlbstate[cpu].state == TLBSTATE_OK) - cpu_tlbstate[cpu].state = TLBSTATE_LAZY; -} -#else -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) -{ -} -#endif - -extern pgd_t *cur_pgd; - -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu) -{ - if (prev != next) { - /* stop flush ipis for the previous mm */ - clear_bit(cpu, &prev->cpu_vm_mask); -#ifdef CONFIG_SMP - cpu_tlbstate[cpu].state = TLBSTATE_OK; - cpu_tlbstate[cpu].active_mm = next; -#endif - - /* Re-load page tables */ - cur_pgd = next->pgd; - queue_pt_switch(__pa(cur_pgd)); - /* load_LDT, if either the previous or next thread - * has a non-default LDT. - */ - if (next->context.size+prev->context.size) - load_LDT(&next->context); - } -#ifdef CONFIG_SMP - else { - cpu_tlbstate[cpu].state = TLBSTATE_OK; - if(cpu_tlbstate[cpu].active_mm != next) - out_of_line_bug(); - if(!test_and_set_bit(cpu, &next->cpu_vm_mask)) { - /* We were in lazy tlb mode and leave_mm disabled - * tlb flush IPI delivery. We must reload %cr3. - */ - cur_pgd = next->pgd; - queue_pt_switch(__pa(cur_pgd)); - load_LDT(next); - } - } -#endif -} - -#define activate_mm(prev, next) \ -do { \ - switch_mm((prev),(next),NULL,smp_processor_id()); \ - flush_page_update_queue(); \ -} while ( 0 ) - -#endif diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/msr.h b/xenolinux-2.4.25-sparse/include/asm-xeno/msr.h deleted file mode 100644 index 1a2c8765a8..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/msr.h +++ /dev/null @@ -1,138 +0,0 @@ -#ifndef __ASM_MSR_H -#define __ASM_MSR_H - -/* - * Access to machine-specific registers (available on 586 and better only) - * Note: the rd* operations modify the parameters directly (without using - * pointer indirection), this allows gcc to optimize better - */ - -#define rdmsr(msr,val1,val2) \ -{ \ - dom0_op_t op; \ - op.cmd = DOM0_MSR; \ - op.u.msr.write = 0; \ - op.u.msr.msr = msr; \ - op.u.msr.cpu_mask = (1 << current->processor); \ - HYPERVISOR_dom0_op(&op); \ - val1 = op.u.msr.out1; \ - val2 = op.u.msr.out2; \ -} - -#define wrmsr(msr,val1,val2) \ -{ \ - dom0_op_t op; \ - op.cmd = DOM0_MSR; \ - op.u.msr.write = 1; \ - op.u.msr.cpu_mask = (1 << current->processor); \ - op.u.msr.msr = msr; \ - op.u.msr.in1 = val1; \ - op.u.msr.in2 = val2; \ - HYPERVISOR_dom0_op(&op); \ -} - -#define rdtsc(low,high) \ - __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) - -#define rdtscl(low) \ - __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx") - -#define rdtscll(val) \ - __asm__ __volatile__("rdtsc" : "=A" (val)) - -#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) - -#define rdpmc(counter,low,high) \ - __asm__ __volatile__("rdpmc" \ - : "=a" (low), "=d" (high) \ - : "c" (counter)) - -/* symbolic names for some interesting MSRs */ -/* Intel defined MSRs. */ -#define MSR_IA32_P5_MC_ADDR 0 -#define MSR_IA32_P5_MC_TYPE 1 -#define MSR_IA32_PLATFORM_ID 0x17 -#define MSR_IA32_EBL_CR_POWERON 0x2a - -#define MSR_IA32_APICBASE 0x1b -#define MSR_IA32_APICBASE_BSP (1<<8) -#define MSR_IA32_APICBASE_ENABLE (1<<11) -#define MSR_IA32_APICBASE_BASE (0xfffff<<12) - -#define MSR_IA32_UCODE_WRITE 0x79 -#define MSR_IA32_UCODE_REV 0x8b - -#define MSR_IA32_BBL_CR_CTL 0x119 - -#define MSR_IA32_MCG_CAP 0x179 -#define MSR_IA32_MCG_STATUS 0x17a -#define MSR_IA32_MCG_CTL 0x17b - -#define MSR_IA32_THERM_CONTROL 0x19a -#define MSR_IA32_THERM_INTERRUPT 0x19b -#define MSR_IA32_THERM_STATUS 0x19c -#define MSR_IA32_MISC_ENABLE 0x1a0 - -#define MSR_IA32_DEBUGCTLMSR 0x1d9 -#define MSR_IA32_LASTBRANCHFROMIP 0x1db -#define MSR_IA32_LASTBRANCHTOIP 0x1dc -#define MSR_IA32_LASTINTFROMIP 0x1dd -#define MSR_IA32_LASTINTTOIP 0x1de - -#define MSR_IA32_MC0_CTL 0x400 -#define MSR_IA32_MC0_STATUS 0x401 -#define MSR_IA32_MC0_ADDR 0x402 -#define MSR_IA32_MC0_MISC 0x403 - -#define MSR_P6_PERFCTR0 0xc1 -#define MSR_P6_PERFCTR1 0xc2 -#define MSR_P6_EVNTSEL0 0x186 -#define MSR_P6_EVNTSEL1 0x187 - -#define MSR_IA32_PERF_STATUS 0x198 -#define MSR_IA32_PERF_CTL 0x199 - -/* AMD Defined MSRs */ -#define MSR_K6_EFER 0xC0000080 -#define MSR_K6_STAR 0xC0000081 -#define MSR_K6_WHCR 0xC0000082 -#define MSR_K6_UWCCR 0xC0000085 -#define MSR_K6_EPMR 0xC0000086 -#define MSR_K6_PSOR 0xC0000087 -#define MSR_K6_PFIR 0xC0000088 - -#define MSR_K7_EVNTSEL0 0xC0010000 -#define MSR_K7_PERFCTR0 0xC0010004 -#define MSR_K7_HWCR 0xC0010015 -#define MSR_K7_CLK_CTL 0xC001001b -#define MSR_K7_FID_VID_CTL 0xC0010041 -#define MSR_K7_VID_STATUS 0xC0010042 - -/* Centaur-Hauls/IDT defined MSRs. */ -#define MSR_IDT_FCR1 0x107 -#define MSR_IDT_FCR2 0x108 -#define MSR_IDT_FCR3 0x109 -#define MSR_IDT_FCR4 0x10a - -#define MSR_IDT_MCR0 0x110 -#define MSR_IDT_MCR1 0x111 -#define MSR_IDT_MCR2 0x112 -#define MSR_IDT_MCR3 0x113 -#define MSR_IDT_MCR4 0x114 -#define MSR_IDT_MCR5 0x115 -#define MSR_IDT_MCR6 0x116 -#define MSR_IDT_MCR7 0x117 -#define MSR_IDT_MCR_CTRL 0x120 - -/* VIA Cyrix defined MSRs*/ -#define MSR_VIA_FCR 0x1107 -#define MSR_VIA_LONGHAUL 0x110a -#define MSR_VIA_BCR2 0x1147 - -/* Transmeta defined MSRs */ -#define MSR_TMTA_LONGRUN_CTRL 0x80868010 -#define MSR_TMTA_LONGRUN_FLAGS 0x80868011 -#define MSR_TMTA_LRTI_READOUT 0x80868018 -#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a - -#endif /* __ASM_MSR_H */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/multicall.h b/xenolinux-2.4.25-sparse/include/asm-xeno/multicall.h deleted file mode 100644 index f0ea5c3a66..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/multicall.h +++ /dev/null @@ -1,84 +0,0 @@ -/****************************************************************************** - * multicall.h - */ - -#ifndef __MULTICALL_H__ -#define __MULTICALL_H__ - -#include - -extern multicall_entry_t multicall_list[]; -extern int nr_multicall_ents; - -static inline void queue_multicall0(unsigned long op) -{ - int i = nr_multicall_ents; - multicall_list[i].op = op; - nr_multicall_ents = i+1; -} - -static inline void queue_multicall1(unsigned long op, unsigned long arg1) -{ - int i = nr_multicall_ents; - multicall_list[i].op = op; - multicall_list[i].args[0] = arg1; - nr_multicall_ents = i+1; -} - -static inline void queue_multicall2( - unsigned long op, unsigned long arg1, unsigned long arg2) -{ - int i = nr_multicall_ents; - multicall_list[i].op = op; - multicall_list[i].args[0] = arg1; - multicall_list[i].args[1] = arg2; - nr_multicall_ents = i+1; -} - -static inline void queue_multicall3( - unsigned long op, unsigned long arg1, unsigned long arg2, - unsigned long arg3) -{ - int i = nr_multicall_ents; - multicall_list[i].op = op; - multicall_list[i].args[0] = arg1; - multicall_list[i].args[1] = arg2; - multicall_list[i].args[2] = arg3; - nr_multicall_ents = i+1; -} - -static inline void queue_multicall4( - unsigned long op, unsigned long arg1, unsigned long arg2, - unsigned long arg3, unsigned long arg4) -{ - int i = nr_multicall_ents; - multicall_list[i].op = op; - multicall_list[i].args[0] = arg1; - multicall_list[i].args[1] = arg2; - multicall_list[i].args[2] = arg3; - multicall_list[i].args[3] = arg4; - nr_multicall_ents = i+1; -} - -static inline void queue_multicall5( - unsigned long op, unsigned long arg1, unsigned long arg2, - unsigned long arg3, unsigned long arg4, unsigned long arg5) -{ - int i = nr_multicall_ents; - multicall_list[i].op = op; - multicall_list[i].args[0] = arg1; - multicall_list[i].args[1] = arg2; - multicall_list[i].args[2] = arg3; - multicall_list[i].args[3] = arg4; - multicall_list[i].args[4] = arg5; - nr_multicall_ents = i+1; -} - -static inline void execute_multicall_list(void) -{ - if ( unlikely(nr_multicall_ents == 0) ) return; - (void)HYPERVISOR_multicall(multicall_list, nr_multicall_ents); - nr_multicall_ents = 0; -} - -#endif /* __MULTICALL_H__ */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/page.h b/xenolinux-2.4.25-sparse/include/asm-xeno/page.h deleted file mode 100644 index b7640a7d78..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/page.h +++ /dev/null @@ -1,173 +0,0 @@ -#ifndef _I386_PAGE_H -#define _I386_PAGE_H - -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 12 -#define PAGE_SIZE (1UL << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) - -#ifdef __KERNEL__ -#ifndef __ASSEMBLY__ - -#include -#include - -#ifdef CONFIG_X86_USE_3DNOW - -#include - -#define clear_page(page) mmx_clear_page((void *)(page)) -#define copy_page(to,from) mmx_copy_page(to,from) - -#else - -/* - * On older X86 processors its not a win to use MMX here it seems. - * Maybe the K6-III ? - */ - -#define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) -#define copy_page(to,from) memcpy((void *)(to), (void *)(from), PAGE_SIZE) - -#endif - -#define clear_user_page(page, vaddr) clear_page(page) -#define copy_user_page(to, from, vaddr) copy_page(to, from) - -/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ -extern unsigned long *phys_to_machine_mapping; -#define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)]) -#define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)]) -static inline unsigned long phys_to_machine(unsigned long phys) -{ - unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT); - machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK); - return machine; -} -static inline unsigned long machine_to_phys(unsigned long machine) -{ - unsigned long phys = mfn_to_pfn(machine >> PAGE_SHIFT); - phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK); - return phys; -} - -/* - * These are used to make use of C type-checking.. - */ -#if CONFIG_X86_PAE -typedef struct { unsigned long pte_low, pte_high; } pte_t; -typedef struct { unsigned long long pmd; } pmd_t; -typedef struct { unsigned long long pgd; } pgd_t; -#define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) -#else -typedef struct { unsigned long pte_low; } pte_t; -typedef struct { unsigned long pmd; } pmd_t; -typedef struct { unsigned long pgd; } pgd_t; -static inline unsigned long pte_val(pte_t x) -{ - unsigned long ret = x.pte_low; - if ( (ret & 1) ) ret = machine_to_phys(ret); - return ret; -} -#endif -#define PTE_MASK PAGE_MASK - -typedef struct { unsigned long pgprot; } pgprot_t; - -static inline unsigned long pmd_val(pmd_t x) -{ - unsigned long ret = x.pmd; - if ( (ret & 1) ) ret = machine_to_phys(ret); - return ret; -} -#define pgd_val(x) ({ BUG(); (unsigned long)0; }) -#define pgprot_val(x) ((x).pgprot) - -static inline pte_t __pte(unsigned long x) -{ - if ( (x & 1) ) x = phys_to_machine(x); - return ((pte_t) { (x) }); -} -static inline pmd_t __pmd(unsigned long x) -{ - if ( (x & 1) ) x = phys_to_machine(x); - return ((pmd_t) { (x) }); -} -#define __pgd(x) ({ BUG(); (pgprot_t) { 0 }; }) -#define __pgprot(x) ((pgprot_t) { (x) } ) - -#endif /* !__ASSEMBLY__ */ - -/* to align the pointer to the (next) page boundary */ -#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) - -/* - * This handles the memory map.. We could make this a config - * option, but too many people screw it up, and too few need - * it. - * - * A __PAGE_OFFSET of 0xC0000000 means that the kernel has - * a virtual address space of one gigabyte, which limits the - * amount of physical memory you can use to about 950MB. - * - * If you want more physical memory than this then see the CONFIG_HIGHMEM4G - * and CONFIG_HIGHMEM64G options in the kernel configuration. - */ - -#define __PAGE_OFFSET (0xC0000000) - -#ifndef __ASSEMBLY__ - -/* - * Tell the user there is some problem. Beep too, so we can - * see^H^H^Hhear bugs in early bootup as well! - * The offending file and line are encoded after the "officially - * undefined" opcode for parsing in the trap handler. - */ - -#if 1 /* Set to zero for a slightly smaller kernel */ -#define BUG() \ - __asm__ __volatile__( "ud2\n" \ - "\t.word %c0\n" \ - "\t.long %c1\n" \ - : : "i" (__LINE__), "i" (__FILE__)) -#else -#define BUG() __asm__ __volatile__("ud2\n") -#endif - -#define PAGE_BUG(page) do { \ - BUG(); \ -} while (0) - -/* Pure 2^n version of get_order */ -static __inline__ int get_order(unsigned long size) -{ - int order; - - size = (size-1) >> (PAGE_SHIFT-1); - order = -1; - do { - size >>= 1; - order++; - } while (size); - return order; -} - -#endif /* __ASSEMBLY__ */ - -#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) -#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) -#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) -#define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT)) -#define VALID_PAGE(page) ((page - mem_map) < max_mapnr) - -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -/* VIRT <-> MACHINE conversion */ -#define virt_to_machine(_a) (phys_to_machine(__pa(_a))) -#define machine_to_virt(_m) (__va(machine_to_phys(_m))) - -#endif /* __KERNEL__ */ - -#endif /* _I386_PAGE_H */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/pgalloc.h b/xenolinux-2.4.25-sparse/include/asm-xeno/pgalloc.h deleted file mode 100644 index 9a90cb1b1d..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/pgalloc.h +++ /dev/null @@ -1,274 +0,0 @@ -#ifndef _I386_PGALLOC_H -#define _I386_PGALLOC_H - -#include -#include -#include -#include -#include - -/* - * Quick lists are aligned so that least significant bits of array pointer - * are all zero when list is empty, and all one when list is full. - */ -#define QUICKLIST_ENTRIES 256 -#define QUICKLIST_EMPTY(_l) !((unsigned long)(_l) & ((QUICKLIST_ENTRIES*4)-1)) -#define QUICKLIST_FULL(_l) QUICKLIST_EMPTY((_l)+1) -#define pgd_quicklist (current_cpu_data.pgd_quick) -#define pmd_quicklist (current_cpu_data.pmd_quick) -#define pte_quicklist (current_cpu_data.pte_quick) -#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) - -#define pmd_populate(mm, pmd, pte) \ - do { \ - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \ - XENO_flush_page_update_queue(); \ - } while ( 0 ) - -/* - * Allocate and free page tables. - */ - -#if defined (CONFIG_X86_PAE) - -#error "no PAE support as yet" - -/* - * We can't include here, thus these uglinesses. - */ -struct kmem_cache_s; - -extern struct kmem_cache_s *pae_pgd_cachep; -extern void *kmem_cache_alloc(struct kmem_cache_s *, int); -extern void kmem_cache_free(struct kmem_cache_s *, void *); - - -static inline pgd_t *get_pgd_slow(void) -{ - int i; - pgd_t *pgd = kmem_cache_alloc(pae_pgd_cachep, GFP_KERNEL); - - if (pgd) { - for (i = 0; i < USER_PTRS_PER_PGD; i++) { - unsigned long pmd = __get_free_page(GFP_KERNEL); - if (!pmd) - goto out_oom; - clear_page(pmd); - set_pgd(pgd + i, __pgd(1 + __pa(pmd))); - } - memcpy(pgd + USER_PTRS_PER_PGD, - swapper_pg_dir + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); - } - return pgd; -out_oom: - for (i--; i >= 0; i--) - free_page((unsigned long)__va(pgd_val(pgd[i])-1)); - kmem_cache_free(pae_pgd_cachep, pgd); - return NULL; -} - -#else - -static inline pgd_t *get_pgd_slow(void) -{ - pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); - - if (pgd) { - memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); - memcpy(pgd + USER_PTRS_PER_PGD, - init_mm.pgd + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); - __make_page_readonly(pgd); - queue_pgd_pin(__pa(pgd)); - - } - return pgd; -} - -#endif /* CONFIG_X86_PAE */ - -static inline pgd_t *get_pgd_fast(void) -{ - unsigned long ret; - - if ( !QUICKLIST_EMPTY(pgd_quicklist) ) { - ret = *(--pgd_quicklist); - pgtable_cache_size--; - - } else - ret = (unsigned long)get_pgd_slow(); - return (pgd_t *)ret; -} - -static inline void free_pgd_slow(pgd_t *pgd) -{ -#if defined(CONFIG_X86_PAE) -#error - int i; - - for (i = 0; i < USER_PTRS_PER_PGD; i++) - free_page((unsigned long)__va(pgd_val(pgd[i])-1)); - kmem_cache_free(pae_pgd_cachep, pgd); -#else - queue_pgd_unpin(__pa(pgd)); - __make_page_writeable(pgd); - free_page((unsigned long)pgd); -#endif -} - -static inline void free_pgd_fast(pgd_t *pgd) -{ - if ( !QUICKLIST_FULL(pgd_quicklist) ) { - *(pgd_quicklist++) = (unsigned long)pgd; - pgtable_cache_size++; - } else - free_pgd_slow(pgd); -} - -static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) -{ - pte_t *pte; - - pte = (pte_t *) __get_free_page(GFP_KERNEL); - if (pte) - { - clear_page(pte); - __make_page_readonly(pte); - queue_pte_pin(__pa(pte)); - } - return pte; - -} - -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, - unsigned long address) -{ - unsigned long ret = 0; - if ( !QUICKLIST_EMPTY(pte_quicklist) ) { - ret = *(--pte_quicklist); - pgtable_cache_size--; - } - return (pte_t *)ret; -} - -static __inline__ void pte_free_slow(pte_t *pte) -{ - queue_pte_unpin(__pa(pte)); - __make_page_writeable(pte); - free_page((unsigned long)pte); -} - -static inline void pte_free_fast(pte_t *pte) -{ - if ( !QUICKLIST_FULL(pte_quicklist) ) { - *(pte_quicklist++) = (unsigned long)pte; - pgtable_cache_size++; - } else - pte_free_slow(pte); -} - -#define pte_free(pte) pte_free_fast(pte) -#define pgd_free(pgd) free_pgd_fast(pgd) -#define pgd_alloc(mm) get_pgd_fast() - -/* - * allocating and freeing a pmd is trivial: the 1-entry pmd is - * inside the pgd, so has no extra memory associated with it. - * (In the PAE case we free the pmds as part of the pgd.) - */ - -#define pmd_alloc_one_fast(mm, addr) ({ BUG(); ((pmd_t *)1); }) -#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) -#define pmd_free_slow(x) do { } while (0) -#define pmd_free_fast(x) do { } while (0) -#define pmd_free(x) do { } while (0) -#define pgd_populate(mm, pmd, pte) BUG() - -extern int do_check_pgt_cache(int, int); - -/* - * TLB flushing: - * - * - flush_tlb() flushes the current mm struct TLBs - * - flush_tlb_all() flushes all processes TLBs - * - flush_tlb_mm(mm) flushes the specified mm context TLB's - * - flush_tlb_page(vma, vmaddr) flushes one page - * - flush_tlb_range(mm, start, end) flushes a range of pages - * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables - * - * ..but the i386 has somewhat limited tlb flushing capabilities, - * and page-granular flushes are available only on i486 and up. - */ - -#ifndef CONFIG_SMP - -#define flush_tlb() __flush_tlb() -#define flush_tlb_all() __flush_tlb_all() -#define local_flush_tlb() __flush_tlb() - -static inline void flush_tlb_mm(struct mm_struct *mm) -{ - if (mm == current->active_mm) queue_tlb_flush(); - XENO_flush_page_update_queue(); -} - -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long addr) -{ - if (vma->vm_mm == current->active_mm) queue_invlpg(addr); - XENO_flush_page_update_queue(); -} - -static inline void flush_tlb_range(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - if (mm == current->active_mm) queue_tlb_flush(); - XENO_flush_page_update_queue(); -} - -#else -#error no guestos SMP support yet... -#include - -#define local_flush_tlb() \ - __flush_tlb() - -extern void flush_tlb_all(void); -extern void flush_tlb_current_task(void); -extern void flush_tlb_mm(struct mm_struct *); -extern void flush_tlb_page(struct vm_area_struct *, unsigned long); - -#define flush_tlb() flush_tlb_current_task() - -static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, unsigned long end) -{ - flush_tlb_mm(mm); -} - -#define TLBSTATE_OK 1 -#define TLBSTATE_LAZY 2 - -struct tlb_state -{ - struct mm_struct *active_mm; - int state; -} ____cacheline_aligned; -extern struct tlb_state cpu_tlbstate[NR_CPUS]; - -#endif /* CONFIG_SMP */ - -static inline void flush_tlb_pgtables(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - /* i386 does not keep any page table caches in TLB */ - XENO_flush_page_update_queue(); -} - -extern int direct_remap_area_pages(struct mm_struct *mm, - unsigned long address, - unsigned long machine_addr, - unsigned long size, - pgprot_t prot); - -#endif /* _I386_PGALLOC_H */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/pgtable-2level.h b/xenolinux-2.4.25-sparse/include/asm-xeno/pgtable-2level.h deleted file mode 100644 index c780f644c0..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/pgtable-2level.h +++ /dev/null @@ -1,71 +0,0 @@ -#ifndef _I386_PGTABLE_2LEVEL_H -#define _I386_PGTABLE_2LEVEL_H - -/* - * traditional i386 two-level paging structure: - */ - -#define PGDIR_SHIFT 22 -#define PTRS_PER_PGD 1024 - -/* - * the i386 is two-level, so we don't really have any - * PMD directory physically. - */ -#define PMD_SHIFT 22 -#define PTRS_PER_PMD 1 - -#define PTRS_PER_PTE 1024 - -#define pte_ERROR(e) \ - printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) -#define pmd_ERROR(e) \ - printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) -#define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) - -/* - * The "pgd_xxx()" functions here are trivial for a folded two-level - * setup: the pgd is never bad, and a pmd always exists (as it's folded - * into the pgd entry) - */ -static inline int pgd_none(pgd_t pgd) { return 0; } -static inline int pgd_bad(pgd_t pgd) { return 0; } -static inline int pgd_present(pgd_t pgd) { return 1; } -#define pgd_clear(xp) do { } while (0) - -#define set_pte(pteptr, pteval) queue_l1_entry_update(pteptr, (pteval).pte_low) -#define set_pte_atomic(pteptr, pteval) queue_l1_entry_update(pteptr, (pteval).pte_low) -#define set_pmd(pmdptr, pmdval) queue_l2_entry_update((pmdptr), (pmdval).pmd) -#define set_pgd(pgdptr, pgdval) ((void)0) - -#define pgd_page(pgd) \ -((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) - -static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) -{ - return (pmd_t *) dir; -} - -/* - * A note on implementation of this atomic 'get-and-clear' operation. - * This is actually very simple because XenoLinux can only run on a single - * processor. Therefore, we cannot race other processors setting the 'accessed' - * or 'dirty' bits on a page-table entry. - * Even if pages are shared between domains, that is not a problem because - * each domain will have separate page tables, with their own versions of - * accessed & dirty state. - */ -static inline pte_t ptep_get_and_clear(pte_t *xp) -{ - pte_t pte = *xp; - queue_l1_entry_update(xp, 0); - return pte; -} - -#define pte_same(a, b) ((a).pte_low == (b).pte_low) -#define pte_page(x) (mem_map+((unsigned long)((pte_val(x) >> PAGE_SHIFT)))) -#define pte_none(x) (!(x).pte_low) -#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) - -#endif /* _I386_PGTABLE_2LEVEL_H */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/pgtable.h b/xenolinux-2.4.25-sparse/include/asm-xeno/pgtable.h deleted file mode 100644 index 07087bdf39..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/pgtable.h +++ /dev/null @@ -1,370 +0,0 @@ -#ifndef _I386_PGTABLE_H -#define _I386_PGTABLE_H - -#include - -/* - * The Linux memory management assumes a three-level page table setup. On - * the i386, we use that, but "fold" the mid level into the top-level page - * table, so that we physically have the same two-level page table as the - * i386 mmu expects. - * - * This file contains the functions and defines necessary to modify and use - * the i386 page table tree. - */ -#ifndef __ASSEMBLY__ -#include -#include -#include -#include - -#ifndef _I386_BITOPS_H -#include -#endif - -#define swapper_pg_dir 0 -extern void paging_init(void); - -/* Caches aren't brain-dead on the intel. */ -#define flush_cache_all() do { } while (0) -#define flush_cache_mm(mm) do { } while (0) -#define flush_cache_range(mm, start, end) do { } while (0) -#define flush_cache_page(vma, vmaddr) do { } while (0) -#define flush_page_to_ram(page) do { } while (0) -#define flush_dcache_page(page) do { } while (0) -#define flush_icache_range(start, end) do { } while (0) -#define flush_icache_page(vma,pg) do { } while (0) -#define flush_icache_user_range(vma,pg,adr,len) do { } while (0) - -extern unsigned long pgkern_mask; - -#define __flush_tlb() ({ queue_tlb_flush(); XENO_flush_page_update_queue(); }) -#define __flush_tlb_global() __flush_tlb() -#define __flush_tlb_all() __flush_tlb_global() -#define __flush_tlb_one(addr) ({ queue_invlpg(addr); XENO_flush_page_update_queue(); }) -#define __flush_tlb_single(addr) ({ queue_invlpg(addr); XENO_flush_page_update_queue(); }) - -/* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -extern unsigned long empty_zero_page[1024]; -#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) - -#endif /* !__ASSEMBLY__ */ - -/* - * The Linux x86 paging architecture is 'compile-time dual-mode', it - * implements both the traditional 2-level x86 page tables and the - * newer 3-level PAE-mode page tables. - */ -#ifndef __ASSEMBLY__ -#if CONFIG_X86_PAE -# include - -/* - * Need to initialise the X86 PAE caches - */ -extern void pgtable_cache_init(void); - -#else -# include - -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) - -#endif -#endif - -#define PMD_SIZE (1UL << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) -#define PGDIR_SIZE (1UL << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) - -#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) -#define FIRST_USER_PGD_NR 0 - -#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) -#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) - -#define TWOLEVEL_PGDIR_SHIFT 22 -#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT) -#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS) - - -#ifndef __ASSEMBLY__ -/* 4MB is just a nice "safety zone". Also, we align to a fresh pde. */ -#define VMALLOC_OFFSET (4*1024*1024) -extern void * high_memory; -#define VMALLOC_START (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \ - ~(VMALLOC_OFFSET-1)) -#define VMALLOC_VMADDR(x) ((unsigned long)(x)) -#define VMALLOC_END (FIXADDR_START - 2*PAGE_SIZE) - -#define _PAGE_BIT_PRESENT 0 -#define _PAGE_BIT_RW 1 -#define _PAGE_BIT_USER 2 -#define _PAGE_BIT_PWT 3 -#define _PAGE_BIT_PCD 4 -#define _PAGE_BIT_ACCESSED 5 -#define _PAGE_BIT_DIRTY 6 -#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page, Pentium+, if present.. */ -#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ -#define _PAGE_BIT_IO 9 - -#define _PAGE_PRESENT 0x001 -#define _PAGE_RW 0x002 -#define _PAGE_USER 0x004 -#define _PAGE_PWT 0x008 -#define _PAGE_PCD 0x010 -#define _PAGE_ACCESSED 0x020 -#define _PAGE_DIRTY 0x040 -#define _PAGE_PSE 0x080 /* 4 MB (or 2MB) page, Pentium+, if present.. */ -#define _PAGE_GLOBAL 0x100 /* Global TLB entry PPro+ */ -#define _PAGE_IO 0x200 - -#define _PAGE_PROTNONE 0x080 /* If not present */ - -#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) -#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) - -#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) - -#define __PAGE_KERNEL \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) -#define __PAGE_KERNEL_NOCACHE \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED) -#define __PAGE_KERNEL_RO \ - (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED) - -#if 0 -#define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL) -#else -#define MAKE_GLOBAL(x) __pgprot(x) -#endif - -#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL) -#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO) -#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE) - -/* - * The i386 can't do page protection for execute, and considers that - * the same are read. Also, write permissions imply read permissions. - * This is the closest we can get.. - */ -#define __P000 PAGE_NONE -#define __P001 PAGE_READONLY -#define __P010 PAGE_COPY -#define __P011 PAGE_COPY -#define __P100 PAGE_READONLY -#define __P101 PAGE_READONLY -#define __P110 PAGE_COPY -#define __P111 PAGE_COPY - -#define __S000 PAGE_NONE -#define __S001 PAGE_READONLY -#define __S010 PAGE_SHARED -#define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY -#define __S101 PAGE_READONLY -#define __S110 PAGE_SHARED -#define __S111 PAGE_SHARED - -#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) -#define pte_clear(xp) queue_l1_entry_update(xp, 0) - -#define pmd_none(x) (!(x).pmd) -#define pmd_present(x) ((x).pmd & _PAGE_PRESENT) -#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) -#define pmd_bad(x) (((x).pmd & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) - - -#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) - -/* - * The following only work if pte_present() is true. - * Undefined behaviour if not.. - */ -static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; } -static inline int pte_exec(pte_t pte) { return (pte).pte_low & _PAGE_USER; } -static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; } -static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; } -static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; } -static inline int pte_io(pte_t pte) { return (pte).pte_low & _PAGE_IO; } - -static inline pte_t pte_rdprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; } -static inline pte_t pte_exprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; } -static inline pte_t pte_mkclean(pte_t pte) { (pte).pte_low &= ~_PAGE_DIRTY; return pte; } -static inline pte_t pte_mkold(pte_t pte) { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; } -static inline pte_t pte_wrprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_RW; return pte; } -static inline pte_t pte_mkread(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; } -static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; } -static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; } -static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } -static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } -static inline pte_t pte_mkio(pte_t pte) { (pte).pte_low |= _PAGE_IO; return pte; } - -static inline int ptep_test_and_clear_dirty(pte_t *ptep) -{ - unsigned long pteval = *(unsigned long *)ptep; - int ret = pteval & _PAGE_DIRTY; - if ( ret ) queue_l1_entry_update(ptep, pteval & ~_PAGE_DIRTY); - return ret; -} -static inline int ptep_test_and_clear_young(pte_t *ptep) -{ - unsigned long pteval = *(unsigned long *)ptep; - int ret = pteval & _PAGE_ACCESSED; - if ( ret ) queue_l1_entry_update(ptep, pteval & ~_PAGE_ACCESSED); - return ret; -} -static inline void ptep_set_wrprotect(pte_t *ptep) -{ - unsigned long pteval = *(unsigned long *)ptep; - if ( (pteval & _PAGE_RW) ) - queue_l1_entry_update(ptep, pteval & ~_PAGE_RW); -} -static inline void ptep_mkdirty(pte_t *ptep) -{ - unsigned long pteval = *(unsigned long *)ptep; - if ( !(pteval & _PAGE_DIRTY) ) - queue_l1_entry_update(ptep, pteval | _PAGE_DIRTY); -} - -/* - * Conversion functions: convert a page and protection to a page entry, - * and a page entry and page directory to the page they refer to. - */ - -#define mk_pte(page, pgprot) __mk_pte((page) - mem_map, (pgprot)) - -/* This takes a physical page address that is used by the remapping functions */ -#define mk_pte_phys(physpage, pgprot) __mk_pte((physpage) >> PAGE_SHIFT, pgprot) - -static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) -{ - pte.pte_low &= _PAGE_CHG_MASK; - pte.pte_low |= pgprot_val(newprot); - return pte; -} - -#define page_pte(page) page_pte_prot(page, __pgprot(0)) - -#define pmd_page(pmd) \ -((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) - -/* to find an entry in a page-table-directory. */ -#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) - -#define __pgd_offset(address) pgd_index(address) - -#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address)) - -/* to find an entry in a kernel page-table-directory */ -#define pgd_offset_k(address) pgd_offset(&init_mm, address) - -#define __pmd_offset(address) \ - (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) - -/* Find an entry in the third-level page table.. */ -#define __pte_offset(address) \ - ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) -#define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \ - __pte_offset(address)) - -/* - * The i386 doesn't have any external MMU info: the kernel page - * tables contain all the necessary information. - */ -#define update_mmu_cache(vma,address,pte) do { } while (0) - -/* Encode and de-code a swap entry */ -#define SWP_TYPE(x) (((x).val >> 1) & 0x3f) -#define SWP_OFFSET(x) ((x).val >> 8) -#define SWP_ENTRY(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) -#define pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) -#define swp_entry_to_pte(x) ((pte_t) { (x).val }) - -struct page; -int change_page_attr(struct page *, int, pgprot_t prot); - -static inline void __make_page_readonly(void *va) -{ - pgd_t *pgd = pgd_offset_k((unsigned long)va); - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); - pte_t *pte = pte_offset(pmd, (unsigned long)va); - queue_l1_entry_update(pte, (*(unsigned long *)pte)&~_PAGE_RW); -} - -static inline void __make_page_writeable(void *va) -{ - pgd_t *pgd = pgd_offset_k((unsigned long)va); - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); - pte_t *pte = pte_offset(pmd, (unsigned long)va); - queue_l1_entry_update(pte, (*(unsigned long *)pte)|_PAGE_RW); -} - -static inline void make_page_readonly(void *va) -{ - pgd_t *pgd = pgd_offset_k((unsigned long)va); - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); - pte_t *pte = pte_offset(pmd, (unsigned long)va); - queue_l1_entry_update(pte, (*(unsigned long *)pte)&~_PAGE_RW); - if ( (unsigned long)va >= VMALLOC_START ) - __make_page_readonly(machine_to_virt( - *(unsigned long *)pte&PAGE_MASK)); -} - -static inline void make_page_writeable(void *va) -{ - pgd_t *pgd = pgd_offset_k((unsigned long)va); - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); - pte_t *pte = pte_offset(pmd, (unsigned long)va); - queue_l1_entry_update(pte, (*(unsigned long *)pte)|_PAGE_RW); - if ( (unsigned long)va >= VMALLOC_START ) - __make_page_writeable(machine_to_virt( - *(unsigned long *)pte&PAGE_MASK)); -} - -static inline void make_pages_readonly(void *va, unsigned int nr) -{ - while ( nr-- != 0 ) - { - make_page_readonly(va); - va = (void *)((unsigned long)va + PAGE_SIZE); - } -} - -static inline void make_pages_writeable(void *va, unsigned int nr) -{ - while ( nr-- != 0 ) - { - make_page_writeable(va); - va = (void *)((unsigned long)va + PAGE_SIZE); - } -} - -static inline unsigned long arbitrary_virt_to_phys(void *va) -{ - pgd_t *pgd = pgd_offset_k((unsigned long)va); - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); - pte_t *pte = pte_offset(pmd, (unsigned long)va); - unsigned long pa = (*(unsigned long *)pte) & PAGE_MASK; - return pa | ((unsigned long)va & (PAGE_SIZE-1)); -} - -#endif /* !__ASSEMBLY__ */ - -/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ -#define PageSkip(page) (0) -#define kern_addr_valid(addr) (1) - -#define io_remap_page_range remap_page_range - -#endif /* _I386_PGTABLE_H */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/proc_cmd.h b/xenolinux-2.4.25-sparse/include/asm-xeno/proc_cmd.h deleted file mode 100644 index 2fddd2c243..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/proc_cmd.h +++ /dev/null @@ -1,28 +0,0 @@ -/****************************************************************************** - * proc_cmd.h - * - * Interface to /proc/cmd and /proc/xeno/privcmd. - */ - -#ifndef __PROC_CMD_H__ -#define __PROC_CMD_H__ - -typedef struct privcmd_hypercall -{ - unsigned long op; - unsigned long arg[5]; -} privcmd_hypercall_t; - -typedef struct privcmd_blkmsg -{ - unsigned long op; - void *buf; - int buf_size; -} privcmd_blkmsg_t; - -#define IOCTL_PRIVCMD_HYPERCALL \ - _IOC(_IOC_NONE, 'P', 0, sizeof(privcmd_hypercall_t)) -#define IOCTL_PRIVCMD_BLKMSG \ - _IOC(_IOC_NONE, 'P', 1, sizeof(privcmd_blkmsg_t)) - -#endif /* __PROC_CMD_H__ */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/processor.h b/xenolinux-2.4.25-sparse/include/asm-xeno/processor.h deleted file mode 100644 index 2b290252be..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/processor.h +++ /dev/null @@ -1,484 +0,0 @@ -/* - * include/asm-i386/processor.h - * - * Copyright (C) 1994 Linus Torvalds - */ - -#ifndef __ASM_I386_PROCESSOR_H -#define __ASM_I386_PROCESSOR_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Default implementation of macro that returns current - * instruction pointer ("program counter"). - */ -#define current_text_addr() ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; }) - -/* - * CPU type and hardware bug flags. Kept separately for each CPU. - * Members of this structure are referenced in head.S, so think twice - * before touching them. [mj] - */ - -struct cpuinfo_x86 { - __u8 x86; /* CPU family */ - __u8 x86_vendor; /* CPU vendor */ - __u8 x86_model; - __u8 x86_mask; - char wp_works_ok; /* It doesn't on 386's */ - char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */ - char hard_math; - char rfu; - int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ - __u32 x86_capability[NCAPINTS]; - char x86_vendor_id[16]; - char x86_model_id[64]; - int x86_cache_size; /* in KB - valid for CPUS which support this - call */ - int fdiv_bug; - int f00f_bug; - int coma_bug; - unsigned long loops_per_jiffy; - unsigned long *pgd_quick; - unsigned long *pmd_quick; - unsigned long *pte_quick; - unsigned long pgtable_cache_sz; -} __attribute__((__aligned__(SMP_CACHE_BYTES))); - -#define X86_VENDOR_INTEL 0 -#define X86_VENDOR_CYRIX 1 -#define X86_VENDOR_AMD 2 -#define X86_VENDOR_UMC 3 -#define X86_VENDOR_NEXGEN 4 -#define X86_VENDOR_CENTAUR 5 -#define X86_VENDOR_RISE 6 -#define X86_VENDOR_TRANSMETA 7 -#define X86_VENDOR_NSC 8 -#define X86_VENDOR_SIS 9 -#define X86_VENDOR_UNKNOWN 0xff - -/* - * capabilities of CPUs - */ - -extern struct cpuinfo_x86 boot_cpu_data; -extern struct tss_struct init_tss[NR_CPUS]; - -#ifdef CONFIG_SMP -extern struct cpuinfo_x86 cpu_data[]; -#define current_cpu_data cpu_data[smp_processor_id()] -#else -#define cpu_data (&boot_cpu_data) -#define current_cpu_data boot_cpu_data -#endif - -extern char ignore_irq13; - -extern void identify_cpu(struct cpuinfo_x86 *); -extern void print_cpu_info(struct cpuinfo_x86 *); - -/* - * EFLAGS bits - */ -#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ -#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ -#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ -#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ -#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ -#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ -#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ -#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ -#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ -#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ -#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ -#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ -#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ -#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ -#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ -#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ -#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ - -/* - * Generic CPUID function - */ -static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) -{ - __asm__("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (op)); -} - -/* - * CPUID functions returning a single datum - */ -static inline unsigned int cpuid_eax(unsigned int op) -{ - unsigned int eax; - - __asm__("cpuid" - : "=a" (eax) - : "0" (op) - : "bx", "cx", "dx"); - return eax; -} -static inline unsigned int cpuid_ebx(unsigned int op) -{ - unsigned int eax, ebx; - - __asm__("cpuid" - : "=a" (eax), "=b" (ebx) - : "0" (op) - : "cx", "dx" ); - return ebx; -} -static inline unsigned int cpuid_ecx(unsigned int op) -{ - unsigned int eax, ecx; - - __asm__("cpuid" - : "=a" (eax), "=c" (ecx) - : "0" (op) - : "bx", "dx" ); - return ecx; -} -static inline unsigned int cpuid_edx(unsigned int op) -{ - unsigned int eax, edx; - - __asm__("cpuid" - : "=a" (eax), "=d" (edx) - : "0" (op) - : "bx", "cx"); - return edx; -} - -/* - * Intel CPU features in CR4 - */ -#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ -#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ -#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ -#define X86_CR4_DE 0x0008 /* enable debugging extensions */ -#define X86_CR4_PSE 0x0010 /* enable page size extensions */ -#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ -#define X86_CR4_MCE 0x0040 /* Machine check enable */ -#define X86_CR4_PGE 0x0080 /* enable global pages */ -#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ -#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ -#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ - -#define load_cr3(pgdir) \ - asm volatile("movl %0,%%cr3": :"r" (__pa(pgdir))); - -extern unsigned long mmu_cr4_features; - -#include - -static inline void set_in_cr4 (unsigned long mask) -{ - BUG(); -} - -static inline void clear_in_cr4 (unsigned long mask) -{ - BUG(); -} - -/* - * Cyrix CPU configuration register indexes - */ -#define CX86_CCR0 0xc0 -#define CX86_CCR1 0xc1 -#define CX86_CCR2 0xc2 -#define CX86_CCR3 0xc3 -#define CX86_CCR4 0xe8 -#define CX86_CCR5 0xe9 -#define CX86_CCR6 0xea -#define CX86_CCR7 0xeb -#define CX86_DIR0 0xfe -#define CX86_DIR1 0xff -#define CX86_ARR_BASE 0xc4 -#define CX86_RCR_BASE 0xdc - -/* - * Cyrix CPU indexed register access macros - */ - -#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); }) - -#define setCx86(reg, data) do { \ - outb((reg), 0x22); \ - outb((data), 0x23); \ -} while (0) - -/* - * Bus types (default is ISA, but people can check others with these..) - */ -#ifdef CONFIG_EISA -extern int EISA_bus; -#else -#define EISA_bus (0) -#endif -extern int MCA_bus; - -/* from system description table in BIOS. Mostly for MCA use, but -others may find it useful. */ -extern unsigned int machine_id; -extern unsigned int machine_submodel_id; -extern unsigned int BIOS_revision; -extern unsigned int mca_pentium_flag; - -/* - * User space process size: 3GB (default). - */ -#define TASK_SIZE (PAGE_OFFSET) - -/* This decides where the kernel will search for a free chunk of vm - * space during mmap's. - */ -#define TASK_UNMAPPED_BASE (TASK_SIZE / 3) - -/* - * Size of io_bitmap in longwords: 32 is ports 0-0x3ff. - */ -#define IO_BITMAP_SIZE 32 -#define IO_BITMAP_BYTES (IO_BITMAP_SIZE * 4) -#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) -#define INVALID_IO_BITMAP_OFFSET 0x8000 - -struct i387_fsave_struct { - long cwd; - long swd; - long twd; - long fip; - long fcs; - long foo; - long fos; - long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ - long status; /* software status information */ -}; - -struct i387_fxsave_struct { - unsigned short cwd; - unsigned short swd; - unsigned short twd; - unsigned short fop; - long fip; - long fcs; - long foo; - long fos; - long mxcsr; - long reserved; - long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ - long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ - long padding[56]; -} __attribute__ ((aligned (16))); - -struct i387_soft_struct { - long cwd; - long swd; - long twd; - long fip; - long fcs; - long foo; - long fos; - long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ - unsigned char ftop, changed, lookahead, no_update, rm, alimit; - struct info *info; - unsigned long entry_eip; -}; - -union i387_union { - struct i387_fsave_struct fsave; - struct i387_fxsave_struct fxsave; - struct i387_soft_struct soft; -}; - -typedef struct { - unsigned long seg; -} mm_segment_t; - -struct tss_struct { - unsigned short back_link,__blh; - unsigned long esp0; - unsigned short ss0,__ss0h; - unsigned long esp1; - unsigned short ss1,__ss1h; - unsigned long esp2; - unsigned short ss2,__ss2h; - unsigned long __cr3; - unsigned long eip; - unsigned long eflags; - unsigned long eax,ecx,edx,ebx; - unsigned long esp; - unsigned long ebp; - unsigned long esi; - unsigned long edi; - unsigned short es, __esh; - unsigned short cs, __csh; - unsigned short ss, __ssh; - unsigned short ds, __dsh; - unsigned short fs, __fsh; - unsigned short gs, __gsh; - unsigned short ldt, __ldth; - unsigned short trace, bitmap; - unsigned long io_bitmap[IO_BITMAP_SIZE+1]; - /* - * pads the TSS to be cacheline-aligned (size is 0x100) - */ - unsigned long __cacheline_filler[5]; -}; - -struct thread_struct { - unsigned long esp0; - unsigned long eip; - unsigned long esp; - unsigned long fs; - unsigned long gs; - unsigned int io_pl; -/* Hardware debugging registers */ - unsigned long debugreg[8]; /* %%db0-7 debug registers */ -/* fault info */ - unsigned long cr2, trap_no, error_code; -/* floating point info */ - union i387_union i387; -/* virtual 86 mode info */ - struct vm86_struct * vm86_info; - unsigned long screen_bitmap; - unsigned long v86flags, v86mask, saved_esp0; -}; - -#define INIT_THREAD { sizeof(init_stack) + (long) &init_stack, \ - 0, 0, 0, 0, 0, 0, {0}, 0, 0, 0, {{0}}, 0, 0, 0, 0, 0 } - -#define INIT_TSS { \ - 0,0, /* back_link, __blh */ \ - sizeof(init_stack) + (long) &init_stack, /* esp0 */ \ - __KERNEL_DS, 0, /* ss0 */ \ - 0,0,0,0,0,0, /* stack1, stack2 */ \ - 0, /* cr3 */ \ - 0,0, /* eip,eflags */ \ - 0,0,0,0, /* eax,ecx,edx,ebx */ \ - 0,0,0,0, /* esp,ebp,esi,edi */ \ - 0,0,0,0,0,0, /* es,cs,ss */ \ - 0,0,0,0,0,0, /* ds,fs,gs */ \ - 0,0, /* ldt */ \ - 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \ - {~0, } /* ioperm */ \ -} - -#define start_thread(regs, new_eip, new_esp) do { \ - __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \ - set_fs(USER_DS); \ - regs->xds = __USER_DS; \ - regs->xes = __USER_DS; \ - regs->xss = __USER_DS; \ - regs->xcs = __USER_CS; \ - regs->eip = new_eip; \ - regs->esp = new_esp; \ -} while (0) - -/* Forward declaration, a strange C thing */ -struct task_struct; -struct mm_struct; - -/* Free all resources held by a thread. */ -extern void release_thread(struct task_struct *); -/* - * create a kernel thread without removing it from tasklists - */ -extern int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); - -/* Copy and release all segment info associated with a VM - * Unusable due to lack of error handling, use {init_new,destroy}_context - * instead. - */ -static inline void copy_segments(struct task_struct *p, struct mm_struct * mm) { } -static inline void release_segments(struct mm_struct * mm) { } - -/* - * Return saved PC of a blocked thread. - */ -static inline unsigned long thread_saved_pc(struct thread_struct *t) -{ - return ((unsigned long *)t->esp)[3]; -} - -unsigned long get_wchan(struct task_struct *p); -#define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019]) -#define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022]) - -#define THREAD_SIZE (2*PAGE_SIZE) -#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1)) -#define free_task_struct(p) free_pages((unsigned long) (p), 1) -#define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count) - -#define init_task (init_task_union.task) -#define init_stack (init_task_union.stack) - -struct microcode { - unsigned int hdrver; - unsigned int rev; - unsigned int date; - unsigned int sig; - unsigned int cksum; - unsigned int ldrver; - unsigned int pf; - unsigned int reserved[5]; - unsigned int bits[500]; -}; - -/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */ -#define MICROCODE_IOCFREE _IO('6',0) - -/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ -static inline void rep_nop(void) -{ - __asm__ __volatile__("rep;nop" ::: "memory"); -} - -#define cpu_relax() rep_nop() - -/* Prefetch instructions for Pentium III and AMD Athlon */ -#if defined(CONFIG_MPENTIUMIII) || defined (CONFIG_MPENTIUM4) - -#define ARCH_HAS_PREFETCH -extern inline void prefetch(const void *x) -{ - __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x)); -} - -#elif CONFIG_X86_USE_3DNOW - -#define ARCH_HAS_PREFETCH -#define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH - -extern inline void prefetch(const void *x) -{ - __asm__ __volatile__ ("prefetch (%0)" : : "r"(x)); -} - -extern inline void prefetchw(const void *x) -{ - __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x)); -} -#define spin_lock_prefetch(x) prefetchw(x) - -#endif - -#define TF_MASK 0x100 - -#endif /* __ASM_I386_PROCESSOR_H */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/ptrace.h b/xenolinux-2.4.25-sparse/include/asm-xeno/ptrace.h deleted file mode 100644 index 4457ac0b17..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/ptrace.h +++ /dev/null @@ -1,63 +0,0 @@ -#ifndef _I386_PTRACE_H -#define _I386_PTRACE_H - -#define EBX 0 -#define ECX 1 -#define EDX 2 -#define ESI 3 -#define EDI 4 -#define EBP 5 -#define EAX 6 -#define DS 7 -#define ES 8 -#define FS 9 -#define GS 10 -#define ORIG_EAX 11 -#define EIP 12 -#define CS 13 -#define EFL 14 -#define UESP 15 -#define SS 16 -#define FRAME_SIZE 17 - -/* this struct defines the way the registers are stored on the - stack during a system call. */ - -struct pt_regs { - long ebx; - long ecx; - long edx; - long esi; - long edi; - long ebp; - long eax; - int xds; - int xes; - long orig_eax; - long eip; - int xcs; - long eflags; - long esp; - int xss; -}; - -/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ -#define PTRACE_GETREGS 12 -#define PTRACE_SETREGS 13 -#define PTRACE_GETFPREGS 14 -#define PTRACE_SETFPREGS 15 -#define PTRACE_GETFPXREGS 18 -#define PTRACE_SETFPXREGS 19 - -#define PTRACE_SETOPTIONS 21 - -/* options set using PTRACE_SETOPTIONS */ -#define PTRACE_O_TRACESYSGOOD 0x00000001 - -#ifdef __KERNEL__ -#define user_mode(regs) ((regs) && (2 & (regs)->xcs)) -#define instruction_pointer(regs) ((regs) ? (regs)->eip : NULL) -extern void show_regs(struct pt_regs *); -#endif - -#endif diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/segment.h b/xenolinux-2.4.25-sparse/include/asm-xeno/segment.h deleted file mode 100644 index ca13028ce0..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/segment.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _ASM_SEGMENT_H -#define _ASM_SEGMENT_H - -#ifndef __ASSEMBLY__ -#include -#endif -#include - -#define __KERNEL_CS FLAT_RING1_CS -#define __KERNEL_DS FLAT_RING1_DS - -#define __USER_CS FLAT_RING3_CS -#define __USER_DS FLAT_RING3_DS - -#endif diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/smp.h b/xenolinux-2.4.25-sparse/include/asm-xeno/smp.h deleted file mode 100644 index 804b93c332..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/smp.h +++ /dev/null @@ -1,102 +0,0 @@ -#ifndef __ASM_SMP_H -#define __ASM_SMP_H - -/* - * We need the APIC definitions automatically as part of 'smp.h' - */ -#ifndef __ASSEMBLY__ -#include -#include -#include -#endif - -#ifdef CONFIG_X86_LOCAL_APIC -#ifndef __ASSEMBLY__ -#include -#include -#ifdef CONFIG_X86_IO_APIC -#include -#endif -#include -#endif -#endif - -#ifdef CONFIG_SMP -#ifndef __ASSEMBLY__ - -/* - * Private routines/data - */ - -extern void smp_alloc_memory(void); -extern unsigned long phys_cpu_present_map; -extern unsigned long cpu_online_map; -extern volatile unsigned long smp_invalidate_needed; -extern int pic_mode; -extern int smp_num_siblings; -extern int cpu_sibling_map[]; - -extern void smp_flush_tlb(void); -extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); -extern void smp_send_reschedule(int cpu); -extern void smp_invalidate_rcv(void); /* Process an NMI */ -extern void (*mtrr_hook) (void); -extern void zap_low_mappings (void); - -/* - * On x86 all CPUs are mapped 1:1 to the APIC space. - * This simplifies scheduling and IPI sending and - * compresses data structures. - */ -static inline int cpu_logical_map(int cpu) -{ - return cpu; -} -static inline int cpu_number_map(int cpu) -{ - return cpu; -} - -/* - * Some lowlevel functions might want to know about - * the real APIC ID <-> CPU # mapping. - */ -#define MAX_APICID 256 -extern volatile int cpu_to_physical_apicid[NR_CPUS]; -extern volatile int physical_apicid_to_cpu[MAX_APICID]; -extern volatile int cpu_to_logical_apicid[NR_CPUS]; -extern volatile int logical_apicid_to_cpu[MAX_APICID]; - -/* - * General functions that each host system must provide. - */ - -extern void smp_boot_cpus(void); -extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial udelay numbers */ - -/* - * This function is needed by all SMP systems. It must _always_ be valid - * from the initial startup. We map APIC_BASE very early in page_setup(), - * so this is correct in the x86 case. - */ - -#define smp_processor_id() (current->processor) - -#endif /* !__ASSEMBLY__ */ - -#define NO_PROC_ID 0xFF /* No processor magic marker */ - -/* - * This magic constant controls our willingness to transfer - * a process across CPUs. Such a transfer incurs misses on the L1 - * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My - * gut feeling is this will vary by board in value. For a board - * with separate L2 cache it probably depends also on the RSS, and - * for a board with shared L2 cache it ought to decay fast as other - * processes are run. - */ - -#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */ - -#endif -#endif diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/suspend.h b/xenolinux-2.4.25-sparse/include/asm-xeno/suspend.h deleted file mode 100644 index 337290dc95..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/suspend.h +++ /dev/null @@ -1,25 +0,0 @@ -/****************************************************************************** - * suspend.h - * - * NB. This file is part of the Xenolinux interface with Xenoserver control - * software. It can be included in such software without invoking the GPL. - * - * Copyright (c) 2003, K A Fraser - */ - -#ifndef __ASM_XENO_SUSPEND_H__ -#define __ASM_XENO_SUSPEND_H__ - -typedef struct suspend_record_st { - /* To be filled in before resume. */ - start_info_t resume_info; - /* - * The number of a machine frame containing, in sequence, the number of - * each machine frame that contains PFN -> MFN translation table data. - */ - unsigned long pfn_to_mfn_frame_list; - /* Number of entries in the PFN -> MFN translation table. */ - unsigned long nr_pfns; -} suspend_record_t; - -#endif /* __ASM_XENO_SUSPEND_H__ */ diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/system.h b/xenolinux-2.4.25-sparse/include/asm-xeno/system.h deleted file mode 100644 index 3b59252ca3..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/system.h +++ /dev/null @@ -1,400 +0,0 @@ -#ifndef __ASM_SYSTEM_H -#define __ASM_SYSTEM_H - -#include -#include -#include -#include -#include -#include /* for LOCK_PREFIX */ - -#ifdef __KERNEL__ - -struct task_struct; -extern void FASTCALL(__switch_to(struct task_struct *prev, - struct task_struct *next)); - -#define prepare_to_switch() \ -do { \ - struct thread_struct *__t = ¤t->thread; \ - __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (*(int *)&__t->fs) ); \ - __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (*(int *)&__t->gs) ); \ -} while (0) -#define switch_to(prev,next,last) do { \ - asm volatile("pushl %%esi\n\t" \ - "pushl %%edi\n\t" \ - "pushl %%ebp\n\t" \ - "movl %%esp,%0\n\t" /* save ESP */ \ - "movl %3,%%esp\n\t" /* restore ESP */ \ - "movl $1f,%1\n\t" /* save EIP */ \ - "pushl %4\n\t" /* restore EIP */ \ - "jmp __switch_to\n" \ - "1:\t" \ - "popl %%ebp\n\t" \ - "popl %%edi\n\t" \ - "popl %%esi\n\t" \ - :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \ - "=b" (last) \ - :"m" (next->thread.esp),"m" (next->thread.eip), \ - "a" (prev), "d" (next), \ - "b" (prev)); \ -} while (0) - -#define _set_base(addr,base) do { unsigned long __pr; \ -__asm__ __volatile__ ("movw %%dx,%1\n\t" \ - "rorl $16,%%edx\n\t" \ - "movb %%dl,%2\n\t" \ - "movb %%dh,%3" \ - :"=&d" (__pr) \ - :"m" (*((addr)+2)), \ - "m" (*((addr)+4)), \ - "m" (*((addr)+7)), \ - "0" (base) \ - ); } while(0) - -#define _set_limit(addr,limit) do { unsigned long __lr; \ -__asm__ __volatile__ ("movw %%dx,%1\n\t" \ - "rorl $16,%%edx\n\t" \ - "movb %2,%%dh\n\t" \ - "andb $0xf0,%%dh\n\t" \ - "orb %%dh,%%dl\n\t" \ - "movb %%dl,%2" \ - :"=&d" (__lr) \ - :"m" (*(addr)), \ - "m" (*((addr)+6)), \ - "0" (limit) \ - ); } while(0) - -#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) ) -#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1)>>12 ) - -static inline unsigned long _get_base(char * addr) -{ - unsigned long __base; - __asm__("movb %3,%%dh\n\t" - "movb %2,%%dl\n\t" - "shll $16,%%edx\n\t" - "movw %1,%%dx" - :"=&d" (__base) - :"m" (*((addr)+2)), - "m" (*((addr)+4)), - "m" (*((addr)+7))); - return __base; -} - -#define get_base(ldt) _get_base( ((char *)&(ldt)) ) - -/* - * Load a segment. Fall back on loading the zero - * segment if something goes wrong.. - */ -#define loadsegment(seg,value) \ - asm volatile("\n" \ - "1:\t" \ - "movl %0,%%" #seg "\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3:\t" \ - "pushl $0\n\t" \ - "popl %%" #seg "\n\t" \ - "jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".long 1b,3b\n" \ - ".previous" \ - : :"m" (*(unsigned int *)&(value))) - -#define clts() ((void)0) -#define read_cr0() ({ \ - unsigned int __dummy; \ - __asm__( \ - "movl %%cr0,%0\n\t" \ - :"=r" (__dummy)); \ - __dummy; \ -}) -#define write_cr0(x) \ - __asm__("movl %0,%%cr0": :"r" (x)); - -#define read_cr4() ({ \ - unsigned int __dummy; \ - __asm__( \ - "movl %%cr4,%0\n\t" \ - :"=r" (__dummy)); \ - __dummy; \ -}) -#define write_cr4(x) \ - __asm__("movl %0,%%cr4": :"r" (x)); -#define stts() (HYPERVISOR_fpu_taskswitch()) - -#endif /* __KERNEL__ */ - -#define wbinvd() \ - __asm__ __volatile__ ("wbinvd": : :"memory"); - -static inline unsigned long get_limit(unsigned long segment) -{ - unsigned long __limit; - __asm__("lsll %1,%0" - :"=r" (__limit):"r" (segment)); - return __limit+1; -} - -#define nop() __asm__ __volatile__ ("nop") - -#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) - -#define tas(ptr) (xchg((ptr),1)) - -struct __xchg_dummy { unsigned long a[100]; }; -#define __xg(x) ((struct __xchg_dummy *)(x)) - - -/* - * The semantics of XCHGCMP8B are a bit strange, this is why - * there is a loop and the loading of %%eax and %%edx has to - * be inside. This inlines well in most cases, the cached - * cost is around ~38 cycles. (in the future we might want - * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that - * might have an implicit FPU-save as a cost, so it's not - * clear which path to go.) - * - * chmxchg8b must be used with the lock prefix here to allow - * the instruction to be executed atomically, see page 3-102 - * of the instruction set reference 24319102.pdf. We need - * the reader side to see the coherent 64bit value. - */ -static inline void __set_64bit (unsigned long long * ptr, - unsigned int low, unsigned int high) -{ - __asm__ __volatile__ ( - "\n1:\t" - "movl (%0), %%eax\n\t" - "movl 4(%0), %%edx\n\t" - "lock cmpxchg8b (%0)\n\t" - "jnz 1b" - : /* no outputs */ - : "D"(ptr), - "b"(low), - "c"(high) - : "ax","dx","memory"); -} - -static inline void __set_64bit_constant (unsigned long long *ptr, - unsigned long long value) -{ - __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL)); -} -#define ll_low(x) *(((unsigned int*)&(x))+0) -#define ll_high(x) *(((unsigned int*)&(x))+1) - -static inline void __set_64bit_var (unsigned long long *ptr, - unsigned long long value) -{ - __set_64bit(ptr,ll_low(value), ll_high(value)); -} - -#define set_64bit(ptr,value) \ -(__builtin_constant_p(value) ? \ - __set_64bit_constant(ptr, value) : \ - __set_64bit_var(ptr, value) ) - -#define _set_64bit(ptr,value) \ -(__builtin_constant_p(value) ? \ - __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \ - __set_64bit(ptr, ll_low(value), ll_high(value)) ) - -/* - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway - * Note 2: xchg has side effect, so that attribute volatile is necessary, - * but generally the primitive is invalid, *ptr is output argument. --ANK - */ -static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) -{ - switch (size) { - case 1: - __asm__ __volatile__("xchgb %b0,%1" - :"=q" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - case 2: - __asm__ __volatile__("xchgw %w0,%1" - :"=r" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - case 4: - __asm__ __volatile__("xchgl %0,%1" - :"=r" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - } - return x; -} - -/* - * Atomic compare and exchange. Compare OLD with MEM, if identical, - * store NEW in MEM. Return the initial value in MEM. Success is - * indicated by comparing RETURN with OLD. - */ - -#ifdef CONFIG_X86_CMPXCHG -#define __HAVE_ARCH_CMPXCHG 1 - -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - -#define cmpxchg(ptr,o,n)\ - ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ - (unsigned long)(n),sizeof(*(ptr)))) - -#else -/* Compiling for a 386 proper. Is it worth implementing via cli/sti? */ -#endif - -/* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking - * to devices. - * - * For now, "wmb()" doesn't actually do anything, as all - * Intel CPU's follow what Intel calls a *Processor Order*, - * in which all writes are seen in the program order even - * outside the CPU. - * - * I expect future Intel CPU's to have a weaker ordering, - * but I'd also expect them to finally get their act together - * and add some real memory barriers if so. - * - * Some non intel clones support out of order store. wmb() ceases to be a - * nop for these. - */ - -#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") -#define rmb() mb() - -#ifdef CONFIG_X86_OOSTORE -#define wmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") -#else -#define wmb() __asm__ __volatile__ ("": : :"memory") -#endif - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() -#define set_mb(var, value) do { xchg(&var, value); } while (0) -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define set_mb(var, value) do { var = value; barrier(); } while (0) -#endif - -#define set_wmb(var, value) do { var = value; wmb(); } while (0) - - -#define __cli() \ -do { \ - clear_bit(EVENTS_MASTER_ENABLE_BIT, &HYPERVISOR_shared_info->events_mask);\ - barrier(); \ -} while (0) - -#define __sti() \ -do { \ - shared_info_t *_shared = HYPERVISOR_shared_info; \ - set_bit(EVENTS_MASTER_ENABLE_BIT, &_shared->events_mask); \ - barrier(); \ - if ( unlikely(_shared->events) ) do_hypervisor_callback(NULL); \ -} while (0) - -#define __save_flags(x) \ -do { \ - (x) = test_bit(EVENTS_MASTER_ENABLE_BIT, \ - &HYPERVISOR_shared_info->events_mask); \ - barrier(); \ -} while (0) - -#define __restore_flags(x) do { if (x) __sti(); } while (0) - -#define safe_halt() ((void)0) - -#define __save_and_cli(x) do { __save_flags(x); __cli(); } while(0); -#define __save_and_sti(x) do { __save_flags(x); __sti(); } while(0); - -#define local_irq_save(x) \ -do { \ - (x) = test_and_clear_bit(EVENTS_MASTER_ENABLE_BIT, \ - &HYPERVISOR_shared_info->events_mask); \ - barrier(); \ -} while (0) -#define local_irq_restore(x) __restore_flags(x) -#define local_irq_disable() __cli() -#define local_irq_enable() __sti() - - -#ifdef CONFIG_SMP -#error no SMP -extern void __global_cli(void); -extern void __global_sti(void); -extern unsigned long __global_save_flags(void); -extern void __global_restore_flags(unsigned long); -#define cli() __global_cli() -#define sti() __global_sti() -#define save_flags(x) ((x)=__global_save_flags()) -#define restore_flags(x) __global_restore_flags(x) -#define save_and_cli(x) do { save_flags(x); cli(); } while(0); -#define save_and_sti(x) do { save_flags(x); sti(); } while(0); - -#else - -#define cli() __cli() -#define sti() __sti() -#define save_flags(x) __save_flags(x) -#define restore_flags(x) __restore_flags(x) -#define save_and_cli(x) __save_and_cli(x) -#define save_and_sti(x) __save_and_sti(x) - -#endif - -/* - * disable hlt during certain critical i/o operations - */ -#define HAVE_DISABLE_HLT -void disable_hlt(void); -void enable_hlt(void); - -extern unsigned long dmi_broken; -extern int is_sony_vaio_laptop; - -#define BROKEN_ACPI_Sx 0x0001 -#define BROKEN_INIT_AFTER_S1 0x0002 -#define BROKEN_PNP_BIOS 0x0004 - -#endif diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/vga.h b/xenolinux-2.4.25-sparse/include/asm-xeno/vga.h deleted file mode 100644 index d0624cf480..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/vga.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Access to VGA videoram - * - * (c) 1998 Martin Mares - */ - -#ifndef _LINUX_ASM_VGA_H_ -#define _LINUX_ASM_VGA_H_ - -#include - -extern unsigned char *vgacon_mmap; - -static unsigned long VGA_MAP_MEM(unsigned long x) -{ - if( vgacon_mmap == NULL ) - { - /* This is our first time in this function. This whole thing - is a rather grim hack. We know we're going to get asked - to map a 32KB region between 0xb0000 and 0xb8000 because - that's what VGAs are. We used the boot time permanent - fixed map region, and map it to machine pages. - */ - if( x != 0xb8000 ) - panic("Argghh! VGA Console is weird. 1:%08lx\n",x); - - vgacon_mmap = (unsigned char*) bt_ioremap( 0xa0000, 128*1024 ); - return (unsigned long) (vgacon_mmap+x-0xa0000); - } - else - { - if( x != 0xc0000 && x != 0xa0000 ) /* vidmem_end or charmap fonts */ - panic("Argghh! VGA Console is weird. 2:%08lx\n",x); - return (unsigned long) (vgacon_mmap+x-0xa0000); - } - return 0; -} - -static inline unsigned char vga_readb(unsigned char * x) { return (*(x)); } -static inline void vga_writeb(unsigned char x, unsigned char *y) { *(y) = (x); } - -#endif diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/xeno_proc.h b/xenolinux-2.4.25-sparse/include/asm-xeno/xeno_proc.h deleted file mode 100644 index d794b733f5..0000000000 --- a/xenolinux-2.4.25-sparse/include/asm-xeno/xeno_proc.h +++ /dev/null @@ -1,13 +0,0 @@ - -#ifndef __ASM_XENO_PROC_H__ -#define __ASM_XENO_PROC_H__ - -#include -#include - -extern struct proc_dir_entry *create_xeno_proc_entry( - const char *name, mode_t mode); -extern void remove_xeno_proc_entry( - const char *name); - -#endif /* __ASM_XENO_PROC_H__ */ diff --git a/xenolinux-2.4.25-sparse/include/linux/blk.h b/xenolinux-2.4.25-sparse/include/linux/blk.h index 4618a527f1..e7c7575454 100644 --- a/xenolinux-2.4.25-sparse/include/linux/blk.h +++ b/xenolinux-2.4.25-sparse/include/linux/blk.h @@ -55,9 +55,9 @@ extern int xpram_init(void); extern int tapeblock_init(void); #endif /* CONFIG_ARCH_S390 */ -#if defined(CONFIG_XENOLINUX_BLOCK) +#if defined(CONFIG_XEN_VBD) extern int xlblk_init(void); -#endif /* CONFIG_ARCH_XENO */ +#endif /* CONFIG_XEN_VBD */ extern void set_device_ro(kdev_t dev,int flag); void add_blkdev_randomness(int major); diff --git a/xenolinux-2.4.25-sparse/init/do_mounts.c b/xenolinux-2.4.25-sparse/init/do_mounts.c index 8e2b89fbb3..6d09888fc8 100644 --- a/xenolinux-2.4.25-sparse/init/do_mounts.c +++ b/xenolinux-2.4.25-sparse/init/do_mounts.c @@ -255,7 +255,7 @@ static struct dev_name_struct { { "ftld", 0x2c18 }, { "mtdblock", 0x1f00 }, { "nb", 0x2b00 }, -#if defined(CONFIG_XENOLINUX_BLOCK) +#if defined(CONFIG_XEN_VBD) { "xvda", 0x7D00 }, { "xvdb", 0x7D10 }, { "xvdc", 0x7D20 }, { "xvdd", 0x7D30 }, { "xvde", 0x7D40 }, { "xvdf", 0x7D50 }, diff --git a/xenolinux-2.4.25-sparse/kernel/panic.c b/xenolinux-2.4.25-sparse/kernel/panic.c index 284bd434a3..9c96d92e40 100644 --- a/xenolinux-2.4.25-sparse/kernel/panic.c +++ b/xenolinux-2.4.25-sparse/kernel/panic.c @@ -109,7 +109,7 @@ NORET_TYPE void panic(const char * fmt, ...) panic_blink(); #endif CHECK_EMERGENCY_SYNC -#if defined(CONFIG_XENO) +#if defined(CONFIG_XEN) HYPERVISOR_exit(); #endif } diff --git a/xenolinux-2.4.25-sparse/kernel/time.c b/xenolinux-2.4.25-sparse/kernel/time.c index fe6ecde6d8..b4f8b55e8a 100644 --- a/xenolinux-2.4.25-sparse/kernel/time.c +++ b/xenolinux-2.4.25-sparse/kernel/time.c @@ -64,7 +64,7 @@ asmlinkage long sys_time(int * tloc) return i; } -#if !defined(CONFIG_XENO) +#if !defined(CONFIG_XEN) /* * sys_stime() can be implemented in user-level using diff --git a/xenolinux-2.4.25-sparse/mkbuildtree b/xenolinux-2.4.25-sparse/mkbuildtree index 64818b45da..08b6ae1e5d 100755 --- a/xenolinux-2.4.25-sparse/mkbuildtree +++ b/xenolinux-2.4.25-sparse/mkbuildtree @@ -103,16 +103,16 @@ relative_lndir ${RS} rm -f mkbuildtree ## There are a whole bunch of special symlinks, mostly for files -## which are identical in the i386 and xeno-i386 architecture-dependent +## which are identical in the i386 and xen-i386 architecture-dependent ## subdirectories. # This first symlink is special: it links to shared files in Xen's source tree -rm -rf ${AD}/include/asm-xeno/hypervisor-ifs -mkdir ${AD}/include/asm-xeno/hypervisor-ifs -cd ${AD}/include/asm-xeno/hypervisor-ifs +rm -rf ${AD}/include/asm-xen/hypervisor-ifs +mkdir ${AD}/include/asm-xen/hypervisor-ifs +cd ${AD}/include/asm-xen/hypervisor-ifs relative_lndir ../../../${RS}/../xen/include/hypervisor-ifs -# The remainder are the i386 -> xeno-i386 links +# The remainder are the i386 -> xen-i386 links cd .. ln -sf ../asm-i386/a.out.h ln -sf ../asm-i386/apicdef.h @@ -198,7 +198,7 @@ ln -sf ../asm-i386/unistd.h ln -sf ../asm-i386/user.h ln -sf ../asm-i386/xor.h -cd ../../arch/xeno/kernel +cd ../../arch/xen/kernel ln -sf ../../i386/kernel/i387.c ln -sf ../../i386/kernel/init_task.c ln -sf ../../i386/kernel/ptrace.c diff --git a/xenolinux-2.4.25-sparse/mm/memory.c b/xenolinux-2.4.25-sparse/mm/memory.c index 6e6bfac75c..312dbfa7c0 100644 --- a/xenolinux-2.4.25-sparse/mm/memory.c +++ b/xenolinux-2.4.25-sparse/mm/memory.c @@ -153,7 +153,7 @@ void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr) free_one_pgd(page_dir); page_dir++; } while (--nr); - XENO_flush_page_update_queue(); + XEN_flush_page_update_queue(); spin_unlock(&mm->page_table_lock); /* keep the page table cache within bounds */ @@ -249,7 +249,7 @@ skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK; /* If it's a COW mapping, write protect it both in the parent and the child */ if (cow && pte_write(pte)) { - /* XENO modification: modified ordering here to avoid RaW hazard. */ + /* XEN modification: modified ordering here to avoid RaW hazard. */ pte = *src_pte; pte = pte_wrprotect(pte); ptep_set_wrprotect(src_pte); @@ -318,7 +318,7 @@ static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long ad continue; if (pte_present(pte)) { struct page *page = pte_page(pte); -#if defined(CONFIG_XENO_PRIV) +#if defined(CONFIG_XEN_PRIVILEGED_GUEST) if (pte_io(pte)) { queue_l1_entry_update(ptep, 0); continue; @@ -918,9 +918,9 @@ int remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long */ static inline void establish_pte(struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t entry) { -#ifdef CONFIG_XENO +#ifdef CONFIG_XEN if ( likely(vma->vm_mm == current->mm) ) { - XENO_flush_page_update_queue(); + XEN_flush_page_update_queue(); HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, entry, UVMF_INVLPG); } else { set_pte(page_table, entry); @@ -1193,13 +1193,13 @@ static int do_swap_page(struct mm_struct * mm, flush_page_to_ram(page); flush_icache_page(vma, page); -#ifdef CONFIG_XENO +#ifdef CONFIG_XEN if ( likely(vma->vm_mm == current->mm) ) { - XENO_flush_page_update_queue(); + XEN_flush_page_update_queue(); HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, pte, 0); } else { set_pte(page_table, pte); - XENO_flush_page_update_queue(); + XEN_flush_page_update_queue(); } #else set_pte(page_table, pte); @@ -1248,13 +1248,13 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, mark_page_accessed(page); } -#ifdef CONFIG_XENO +#ifdef CONFIG_XEN if ( likely(vma->vm_mm == current->mm) ) { - XENO_flush_page_update_queue(); + XEN_flush_page_update_queue(); HYPERVISOR_update_va_mapping(addr>>PAGE_SHIFT, entry, 0); } else { set_pte(page_table, entry); - XENO_flush_page_update_queue(); + XEN_flush_page_update_queue(); } #else set_pte(page_table, entry); @@ -1333,13 +1333,13 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, entry = mk_pte(new_page, vma->vm_page_prot); if (write_access) entry = pte_mkwrite(pte_mkdirty(entry)); -#ifdef CONFIG_XENO +#ifdef CONFIG_XEN if ( likely(vma->vm_mm == current->mm) ) { - XENO_flush_page_update_queue(); + XEN_flush_page_update_queue(); HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, entry, 0); } else { set_pte(page_table, entry); - XENO_flush_page_update_queue(); + XEN_flush_page_update_queue(); } #else set_pte(page_table, entry); @@ -1487,7 +1487,7 @@ pte_t *pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) /* "fast" allocation can happen without dropping the lock.. */ new = pte_alloc_one_fast(mm, address); if (!new) { - XENO_flush_page_update_queue(); + XEN_flush_page_update_queue(); spin_unlock(&mm->page_table_lock); new = pte_alloc_one(mm, address); spin_lock(&mm->page_table_lock); diff --git a/xenolinux-2.4.25-sparse/mm/mprotect.c b/xenolinux-2.4.25-sparse/mm/mprotect.c index e618a3b74d..9d2ed3c24c 100644 --- a/xenolinux-2.4.25-sparse/mm/mprotect.c +++ b/xenolinux-2.4.25-sparse/mm/mprotect.c @@ -288,7 +288,7 @@ asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot if (!vma || vma->vm_start > start) goto out; -#if defined(CONFIG_XENO_PRIV) +#if defined(CONFIG_XEN_PRIVILEGED_GUEST) /* mprotect() unsupported for I/O mappings in Xenolinux. */ error = -EINVAL; if (vma->vm_flags & VM_IO) diff --git a/xenolinux-2.4.25-sparse/mm/mremap.c b/xenolinux-2.4.25-sparse/mm/mremap.c index 5af0b100f9..517854c86c 100644 --- a/xenolinux-2.4.25-sparse/mm/mremap.c +++ b/xenolinux-2.4.25-sparse/mm/mremap.c @@ -115,11 +115,11 @@ static int move_page_tables(struct mm_struct * mm, * the old page tables) */ oops_we_failed: - XENO_flush_page_update_queue(); + XEN_flush_page_update_queue(); flush_cache_range(mm, new_addr, new_addr + len); while ((offset += PAGE_SIZE) < len) move_one_page(mm, new_addr + offset, old_addr + offset); - XENO_flush_page_update_queue(); + XEN_flush_page_update_queue(); zap_page_range(mm, new_addr, len); return -1; } @@ -309,7 +309,7 @@ unsigned long do_mremap(unsigned long addr, !vm_enough_memory((new_len - old_len) >> PAGE_SHIFT)) goto out; -#if defined(CONFIG_XENO_PRIV) +#if defined(CONFIG_XEN_PRIVILEGED_GUEST) /* mremap() unsupported for I/O mappings in Xenolinux. */ ret = -EINVAL; if (vma->vm_flags & VM_IO) diff --git a/xenolinux-2.4.25-sparse/mm/swapfile.c b/xenolinux-2.4.25-sparse/mm/swapfile.c index de04a376d2..8345e27146 100644 --- a/xenolinux-2.4.25-sparse/mm/swapfile.c +++ b/xenolinux-2.4.25-sparse/mm/swapfile.c @@ -464,7 +464,7 @@ static void unuse_process(struct mm_struct * mm, pgd_t * pgd = pgd_offset(mm, vma->vm_start); unuse_vma(vma, pgd, entry, page); } - XENO_flush_page_update_queue(); + XEN_flush_page_update_queue(); spin_unlock(&mm->page_table_lock); return; } diff --git a/xenolinux-2.4.25-sparse/mm/vmalloc.c b/xenolinux-2.4.25-sparse/mm/vmalloc.c index b5b4da9161..dc1c7fe550 100644 --- a/xenolinux-2.4.25-sparse/mm/vmalloc.c +++ b/xenolinux-2.4.25-sparse/mm/vmalloc.c @@ -248,7 +248,7 @@ void vfree(void * addr) for (p = &vmlist ; (tmp = *p) ; p = &tmp->next) { if (tmp->addr == addr) { *p = tmp->next; -#ifdef CONFIG_XENO_PRIV +#ifdef CONFIG_XEN_PRIVILEGED_GUEST if (tmp->flags & VM_IOREMAP) zap_page_range(&init_mm, VMALLOC_VMADDR(tmp->addr), tmp->size); else