3f72f1bdJPsV3JCnBqs9ddL9tr6D2g xen/COPYING
3ddb79bcbOVHh38VJzc97-JEGD4dJQ xen/Makefile
3ddb79bcWnTwYsQRWl_PaneJfa6p0w xen/Rules.mk
-3ddb79bcZbRBzT3elFWSX7u6NtMagQ xen/arch/i386/Makefile
-3ddb79bcBQF85CfLS4i1WGZ4oLLaCA xen/arch/i386/Rules.mk
-3e5636e5FAYZ5_vQnmgwFJfSdmO5Mw xen/arch/i386/acpi.c
-3ddb79bcsjinG9k1KcvbVBuas1R2dA xen/arch/i386/apic.c
-3ddb79bcSC_LvnmFlX-T5iTgaR0SKg xen/arch/i386/boot/boot.S
-3ddb79bcUrk2EIaM5VsT6wUudH1kkg xen/arch/i386/delay.c
-3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/arch/i386/domain_page.c
-3ddb79bcecupHj56ZbTa3B0FxDowMg xen/arch/i386/entry.S
-3ddb79bcY5zW7KhvI9gvfuPi3ZumEg xen/arch/i386/extable.c
-3fe443fdDDb0Sw6NQBCk4GQapayfTA xen/arch/i386/flushtlb.c
-3ddb79bcesE5E-lS4QhRhlqXxqj9cA xen/arch/i386/i387.c
-3ddb79bcCAq6IpdkHueChoVTfXqEQQ xen/arch/i386/i8259.c
-3ddb79bcBit4xJXbwtX0kb1hh2uO1Q xen/arch/i386/idle0_task.c
-3ddb79bcKIkRR0kqWaJhe5VUDkMdxg xen/arch/i386/io_apic.c
-3ddb79bc1uNlAtc-84Ioq4qfcnI_CQ xen/arch/i386/ioremap.c
-3ddb79bdqfIcjkz_h9Hvtp8Tk_19Zw xen/arch/i386/irq.c
-3ddb79bcHwuCQDjBICDTSis52hWguw xen/arch/i386/mm.c
-3ddb79bdS4UeWWXDH-FaBKqcpMFcnw xen/arch/i386/mpparse.c
-3f12cff65EV3qOG2j37Qm0ShgvXGRw xen/arch/i386/nmi.c
-3ddb79bcnL-_Dtsbtjgxl7vJU3vBiQ xen/arch/i386/pci-dma.c
-3ddb79bdeJ7_86z03yTAPIeeywOg3Q xen/arch/i386/pci-i386.c
-3ddb79bdIKgipvGoqExEQ7jawfVowA xen/arch/i386/pci-i386.h
-3ddb79bdHe6_Uij4-glW91vInNtBYQ xen/arch/i386/pci-irq.c
-3ddb79bcZ_2FxINljqNSkqa17ISyJw xen/arch/i386/pci-pc.c
-40a4dfced2dnSzbKgJFlD3chKHexjQ xen/arch/i386/pdb-linux.c
-4022a73czgX7d-2zfF_cb33oVemApQ xen/arch/i386/pdb-stub.c
-3ddb79bc1_2bAt67x9MFCP4AZrQnvQ xen/arch/i386/process.c
-3ddb79bc7KxGCEJsgBnkDX7XjD_ZEQ xen/arch/i386/rwlock.c
-3ddb79bcrD6Z_rUvSDgrvjyb4846Eg xen/arch/i386/setup.c
-3ddb79bcSx2e8JSR3pdSGa8x1ScYzA xen/arch/i386/smp.c
-3ddb79bcfUN3-UBCPzX26IU8bq-3aw xen/arch/i386/smpboot.c
-3ddb79bc-Udq7ol-NX4q9XsYnN7A2Q xen/arch/i386/time.c
-3ddb79bccYVzXZJyVaxuv5T42Z1Fsw xen/arch/i386/trampoline.S
-3ddb79bcOftONV9h4QCxXOfiT0h91w xen/arch/i386/traps.c
-3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen/arch/i386/usercopy.c
-3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen/arch/i386/xen.lds
-404f1b91uzXgPOtIhs8UZPGbZvlHfg xen/arch/x86_64/Rules.mk
+3ddb79bcZbRBzT3elFWSX7u6NtMagQ xen/arch/x86/Makefile
+3ddb79bcBQF85CfLS4i1WGZ4oLLaCA xen/arch/x86/Rules.mk
+3e5636e5FAYZ5_vQnmgwFJfSdmO5Mw xen/arch/x86/acpi.c
+3ddb79bcsjinG9k1KcvbVBuas1R2dA xen/arch/x86/apic.c
+3ddb79bcSC_LvnmFlX-T5iTgaR0SKg xen/arch/x86/boot/boot.S
+3ddb79bcUrk2EIaM5VsT6wUudH1kkg xen/arch/x86/delay.c
+3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/arch/x86/domain_page.c
+3ddb79bcecupHj56ZbTa3B0FxDowMg xen/arch/x86/entry.S
+3ddb79bcY5zW7KhvI9gvfuPi3ZumEg xen/arch/x86/extable.c
+3fe443fdDDb0Sw6NQBCk4GQapayfTA xen/arch/x86/flushtlb.c
+3ddb79bcesE5E-lS4QhRhlqXxqj9cA xen/arch/x86/i387.c
+3ddb79bcCAq6IpdkHueChoVTfXqEQQ xen/arch/x86/i8259.c
+3ddb79bcBit4xJXbwtX0kb1hh2uO1Q xen/arch/x86/idle0_task.c
+3ddb79bcKIkRR0kqWaJhe5VUDkMdxg xen/arch/x86/io_apic.c
+3ddb79bdqfIcjkz_h9Hvtp8Tk_19Zw xen/arch/x86/irq.c
+3ddb79bcHwuCQDjBICDTSis52hWguw xen/arch/x86/mm.c
+3ddb79bdS4UeWWXDH-FaBKqcpMFcnw xen/arch/x86/mpparse.c
+3f12cff65EV3qOG2j37Qm0ShgvXGRw xen/arch/x86/nmi.c
+3ddb79bdHe6_Uij4-glW91vInNtBYQ xen/arch/x86/pci-irq.c
+3ddb79bcZ_2FxINljqNSkqa17ISyJw xen/arch/x86/pci-pc.c
+3ddb79bdeJ7_86z03yTAPIeeywOg3Q xen/arch/x86/pci-x86.c
+3ddb79bdIKgipvGoqExEQ7jawfVowA xen/arch/x86/pci-x86.h
+40a4dfced2dnSzbKgJFlD3chKHexjQ xen/arch/x86/pdb-linux.c
+4022a73czgX7d-2zfF_cb33oVemApQ xen/arch/x86/pdb-stub.c
+3ddb79bc1_2bAt67x9MFCP4AZrQnvQ xen/arch/x86/process.c
+3ddb79bc7KxGCEJsgBnkDX7XjD_ZEQ xen/arch/x86/rwlock.c
+3ddb79bcrD6Z_rUvSDgrvjyb4846Eg xen/arch/x86/setup.c
+3ddb79bcSx2e8JSR3pdSGa8x1ScYzA xen/arch/x86/smp.c
+3ddb79bcfUN3-UBCPzX26IU8bq-3aw xen/arch/x86/smpboot.c
+3ddb79bc-Udq7ol-NX4q9XsYnN7A2Q xen/arch/x86/time.c
+3ddb79bccYVzXZJyVaxuv5T42Z1Fsw xen/arch/x86/trampoline.S
+3ddb79bcOftONV9h4QCxXOfiT0h91w xen/arch/x86/traps.c
+3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen/arch/x86/usercopy.c
+3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen/arch/x86/xen.lds
3ddb79bdff-gj-jFGKjOejeHLqL8Lg xen/common/Makefile
3e397e66AyyD5fYraAySWuwi9uqSXg xen/common/ac_timer.c
4022a73c_BbDFd2YJ_NQYVvKX5Oz7w xen/common/debug-linux.c
40715b2dKRW7A71SNaeV6zfrEzYxPw xen/include/acpi/platform/acenv.h
40715b2d8fYydJMcODFrV1ocLklGDg xen/include/acpi/platform/acgcc.h
40715b2d1yZkqyAt0kgx2xEwsatuuA xen/include/acpi/platform/aclinux.h
-40715b2dWe0tDhx9LkLXzTQkvD49RA xen/include/asm-i386/acpi.h
-3ddb79c3l4IiQtf6MS2jIzcd-hJS8g xen/include/asm-i386/apic.h
-3ddb79c3QJYWr8LLGdonLbWmNb9pQQ xen/include/asm-i386/apicdef.h
-3ddb79c3OiG9eTsi9Dy3F_OkuRAzKA xen/include/asm-i386/atomic.h
-3ddb79c3rM-Ote0Xn6Ytg8Y6YqAG-A xen/include/asm-i386/bitops.h
-3ddb79c3KhTI0F_Iw_hRL9QEyOVK-g xen/include/asm-i386/cache.h
-404f1b920OQVnrbnXnySS-WxrH9Wzw xen/include/asm-i386/config.h
-3ddb79c2LLt11EQHjrd6sB7FUqvFfA xen/include/asm-i386/cpufeature.h
-3ddb79c2ADvRmdexd9y3AYK9_NTx-Q xen/include/asm-i386/current.h
-3ddb79c2jFkPAZTDmU35L6IUssYMgQ xen/include/asm-i386/debugreg.h
-3ddb79c3r9-31dIsewPV3P3i8HALsQ xen/include/asm-i386/delay.h
-3ddb79c34BFiXjBJ_cCKB0aCsV1IDw xen/include/asm-i386/desc.h
-40715b2dTokMLYGSuD58BnxOqyWVew xen/include/asm-i386/div64.h
-3e564149UkU91RX7onzpCAmbj_IFjw xen/include/asm-i386/dma.h
-3e20b82fl1jmQiKdLy7fxMcutfpjWA xen/include/asm-i386/domain_page.h
-3ddb79c3NU8Zy40OTrq3D-i30Y3t4A xen/include/asm-i386/fixmap.h
-3e2d29944GI24gf7vOP_7x8EyuqxeA xen/include/asm-i386/flushtlb.h
-3ddb79c39o75zPP0T1aQQ4mNrCAN2w xen/include/asm-i386/hardirq.h
-3ddb79c3BFEIwXR4IsWbwp4BoL4DkA xen/include/asm-i386/hdreg.h
-3ddb79c3TMDjkxVndKFKnGiwY0HzDg xen/include/asm-i386/i387.h
-3ddb79c3otbjpnqFDSzSeD0J-0xcwg xen/include/asm-i386/ide.h
-3ddb79c3fQ_O3o5NHK2N8AJdk0Ea4Q xen/include/asm-i386/io.h
-3ddb79c2TKeScYHQZreTdHqYNLbehQ xen/include/asm-i386/io_apic.h
-3ddb79c2L7rTlFzazOLW1XuSZefpFw xen/include/asm-i386/irq.h
-404f1b93OjLO4bFfBXYNaJdIqlNz-Q xen/include/asm-i386/ldt.h
-3ddb79c3I98vWcQR8xEo34JMJ4Ahyw xen/include/asm-i386/mc146818rtc.h
-3ddb79c3n_UbPuxlkNxvvLycClIkxA xen/include/asm-i386/mpspec.h
-3ddb79c2wa0dA_LGigxOelSGbJ284Q xen/include/asm-i386/msr.h
-3ddb79c3xjYnrv5t3VqYlR4tNEOl4Q xen/include/asm-i386/page.h
-3e450943kzme29HPCtq5HNOVQkddfw xen/include/asm-i386/param.h
-3ddb79c3ysKUbxZuwKBRK3WXU2TlEg xen/include/asm-i386/pci.h
-4022a73diKn2Ax4-R4gzk59lm1YdDg xen/include/asm-i386/pdb.h
-3ddb79c3nm2zdzeO6Mj8g7ex3txgGw xen/include/asm-i386/pgalloc.h
-3ddb79c2QF5-pZGzuX4QukPCDAl59A xen/include/asm-i386/processor.h
-3ddb79c3mbqEM7QQr3zVq7NiBNhouA xen/include/asm-i386/ptrace.h
-3ddb79c2plf7ciNgoNjU-RsbUzawsw xen/include/asm-i386/rwlock.h
-3ddb79c2mJI9YuGMScjofPlD8EdtgA xen/include/asm-i386/scatterlist.h
-3ddb79c3Hgbb2g8CyWLMCK-6_ZVQSQ xen/include/asm-i386/smp.h
-3ddb79c3jn8ALV_S9W5aeTYUQRKBpg xen/include/asm-i386/smpboot.h
-3ddb79c3e9DCEoR-WzNxcOQDzLu7BQ xen/include/asm-i386/softirq.h
-3ddb79c3NiyQE2vQnyGiaBnNjBO1rA xen/include/asm-i386/spinlock.h
-3e7f358aG11EvMI9VJ4_9hD4LUO7rQ xen/include/asm-i386/string.h
-3ddb79c3ezddh34MdelJpa5tNR00Dw xen/include/asm-i386/system.h
-3e397e66xPNc8eaSqC9pPbyAtRGzHA xen/include/asm-i386/time.h
-3e450943TfE-iovQIY_tMO_VdGsPhA xen/include/asm-i386/timex.h
-3ddb79c4HugMq7IYGxcQKFBpKwKhzA xen/include/asm-i386/types.h
-3ddb79c3M2n1ROZH6xk3HbyN4CPDqg xen/include/asm-i386/uaccess.h
-3ddb79c3uPGcP_l_2xyGgBSWd5aC-Q xen/include/asm-i386/unaligned.h
-404f1b95z0B0jb2IfvZJ7uvmYqsqpg xen/include/asm-x86_64/apic.h
-404f1b95_OZH-rw_durHSa_Kgdo95A xen/include/asm-x86_64/apicdef.h
-404f1b967UWSPkB0cwT9v-rilNzkHw xen/include/asm-x86_64/atomic.h
-404f1b97UDomt73PizniyrCaxVRkXQ xen/include/asm-x86_64/bitops.h
-404f1b99W-dMUlFpsvt--tVpQvNgEQ xen/include/asm-x86_64/cache.h
-404f1b9b_phpQlRnyiWqP6RodfZDpg xen/include/asm-x86_64/config.h
-404f1b9cz7UV611DK6CTY1ZAiwGtTw xen/include/asm-x86_64/cpufeature.h
-404f1b9ceJeGVaPNIENm2FkK0AgEOQ xen/include/asm-x86_64/current.h
-404f1b9d854xae6HKv-9W8lLSgROdQ xen/include/asm-x86_64/debugreg.h
-404f1b9eRm9rtcM29P5O2nrPFOGSow xen/include/asm-x86_64/delay.h
-404f1b9fl6AQ_a-T1TDK3fuwTPXmHw xen/include/asm-x86_64/desc.h
-404f1ba05mjpUREtosjzz3PPL5cTJA xen/include/asm-x86_64/dma.h
-404f1ba13mnjeZT2ytPm0DB63703nA xen/include/asm-x86_64/domain_page.h
-404f1ba31i0gS-cdqvd0RZX1HVnxsA xen/include/asm-x86_64/fixmap.h
-404f1ba4KXQ_V7HOkenF04KRU7Tl7w xen/include/asm-x86_64/flushtlb.h
-404f1ba5Sqzc22eXORShvCF9-rpMbA xen/include/asm-x86_64/hardirq.h
-404f1ba6_nDjomU9HJVvUugj63LvEg xen/include/asm-x86_64/hdreg.h
-404f1ba7Q-lF892SDZLWjJ62wmauSA xen/include/asm-x86_64/i387.h
-404f1ba8yxfnHH0NWC1B-wmd6bK2wg xen/include/asm-x86_64/ide.h
-404f1ba9_7NIylhSRmokesN8TNIiNg xen/include/asm-x86_64/io.h
-404f1baaiXXy7vChbzKmluSyJ5LWIw xen/include/asm-x86_64/io_apic.h
-404f1baceMqjaYFs7oZoNsPkaZJ0WQ xen/include/asm-x86_64/irq.h
-404f1badfXZJZ2sU8sh9PS2EZvd19Q xen/include/asm-x86_64/ldt.h
-404f1bae_yI5vMg-_k4EySMERbbz2Q xen/include/asm-x86_64/mc146818rtc.h
-404f1bafYfNwntXQGIggyj7D6YruJQ xen/include/asm-x86_64/mpspec.h
-404f1bb0asrts1dyLQhyARCgzhL0NA xen/include/asm-x86_64/msr.h
-404f1bb1LSCqrMDSfRAti5NdMQPJBQ xen/include/asm-x86_64/page.h
-404f1bb2IUaGWD82SrQFaacyBixVFw xen/include/asm-x86_64/param.h
-404f1bb3zSQfhMuQ24xNtq9Ed09jGw xen/include/asm-x86_64/pci.h
-404f1bb41Yl-5ZjIWnG66HDCj6OIWA xen/include/asm-x86_64/pda.h
-404f1bb5toGAnZVAlJ2fWWMv28DFJQ xen/include/asm-x86_64/pdb.h
-404f1bb6pz982jtehZacFKhFUac0ug xen/include/asm-x86_64/pgalloc.h
-404f1bb756fZfxk5HDx7J7BW3R-1jQ xen/include/asm-x86_64/processor.h
-404f1bb86rAXB3aLS1vYdcqpJiEcyg xen/include/asm-x86_64/ptrace.h
-404f1bb9K0pcyDrV4Ctva1HUczoueQ xen/include/asm-x86_64/rwlock.h
-404f1bbaIdS7vc3sE032fQG6EnY8AQ xen/include/asm-x86_64/scatterlist.h
-404f1bbbR5n83SiPof3joEPv9xWPPA xen/include/asm-x86_64/smp.h
-404f1bbc67CEECfR8ATd7dPD1ajLng xen/include/asm-x86_64/smpboot.h
-404f1bbdXaaPrIp5AUIjC8Hsp2H0Aw xen/include/asm-x86_64/softirq.h
-404f1bbeomkO5YarnkIRWxVhlB5EJA xen/include/asm-x86_64/spinlock.h
-404f1bbf82VK-kyDVBmR7CTvtTBKaw xen/include/asm-x86_64/string.h
-404f1bc0laOnGpDxFpgdiuZpEyOOKw xen/include/asm-x86_64/system.h
-404f1bc1FnfxOhmgWYHP97TPqA40Pw xen/include/asm-x86_64/time.h
-404f1bc2mx9ZbazcdFh-AN70ZvNMJQ xen/include/asm-x86_64/timex.h
-404f1bc3R2o0PIpQme8bDWeHcqHNGw xen/include/asm-x86_64/types.h
-404f1bc4tWkB9Qr8RkKtZGW5eMQzhw xen/include/asm-x86_64/uaccess.h
-404f1bc5idyWKKROGo_hvHVx58Gmkw xen/include/asm-x86_64/unaligned.h
+40715b2dWe0tDhx9LkLXzTQkvD49RA xen/include/asm-x86/acpi.h
+3ddb79c3l4IiQtf6MS2jIzcd-hJS8g xen/include/asm-x86/apic.h
+3ddb79c3QJYWr8LLGdonLbWmNb9pQQ xen/include/asm-x86/apicdef.h
+3ddb79c3OiG9eTsi9Dy3F_OkuRAzKA xen/include/asm-x86/atomic.h
+3ddb79c3rM-Ote0Xn6Ytg8Y6YqAG-A xen/include/asm-x86/bitops.h
+3ddb79c3KhTI0F_Iw_hRL9QEyOVK-g xen/include/asm-x86/cache.h
+404f1b920OQVnrbnXnySS-WxrH9Wzw xen/include/asm-x86/config.h
+3ddb79c2LLt11EQHjrd6sB7FUqvFfA xen/include/asm-x86/cpufeature.h
+3ddb79c2ADvRmdexd9y3AYK9_NTx-Q xen/include/asm-x86/current.h
+3ddb79c2jFkPAZTDmU35L6IUssYMgQ xen/include/asm-x86/debugreg.h
+3ddb79c3r9-31dIsewPV3P3i8HALsQ xen/include/asm-x86/delay.h
+3ddb79c34BFiXjBJ_cCKB0aCsV1IDw xen/include/asm-x86/desc.h
+40715b2dTokMLYGSuD58BnxOqyWVew xen/include/asm-x86/div64.h
+3e20b82fl1jmQiKdLy7fxMcutfpjWA xen/include/asm-x86/domain_page.h
+3ddb79c3NU8Zy40OTrq3D-i30Y3t4A xen/include/asm-x86/fixmap.h
+3e2d29944GI24gf7vOP_7x8EyuqxeA xen/include/asm-x86/flushtlb.h
+3ddb79c39o75zPP0T1aQQ4mNrCAN2w xen/include/asm-x86/hardirq.h
+3ddb79c3TMDjkxVndKFKnGiwY0HzDg xen/include/asm-x86/i387.h
+3ddb79c3fQ_O3o5NHK2N8AJdk0Ea4Q xen/include/asm-x86/io.h
+3ddb79c2TKeScYHQZreTdHqYNLbehQ xen/include/asm-x86/io_apic.h
+3ddb79c2L7rTlFzazOLW1XuSZefpFw xen/include/asm-x86/irq.h
+404f1b93OjLO4bFfBXYNaJdIqlNz-Q xen/include/asm-x86/ldt.h
+3ddb79c3I98vWcQR8xEo34JMJ4Ahyw xen/include/asm-x86/mc146818rtc.h
+3ddb79c3n_UbPuxlkNxvvLycClIkxA xen/include/asm-x86/mpspec.h
+3ddb79c2wa0dA_LGigxOelSGbJ284Q xen/include/asm-x86/msr.h
+3ddb79c3xjYnrv5t3VqYlR4tNEOl4Q xen/include/asm-x86/page.h
+3e450943kzme29HPCtq5HNOVQkddfw xen/include/asm-x86/param.h
+3ddb79c3ysKUbxZuwKBRK3WXU2TlEg xen/include/asm-x86/pci.h
+4022a73diKn2Ax4-R4gzk59lm1YdDg xen/include/asm-x86/pdb.h
+3ddb79c2QF5-pZGzuX4QukPCDAl59A xen/include/asm-x86/processor.h
+3ddb79c3mbqEM7QQr3zVq7NiBNhouA xen/include/asm-x86/ptrace.h
+3ddb79c2plf7ciNgoNjU-RsbUzawsw xen/include/asm-x86/rwlock.h
+3ddb79c3Hgbb2g8CyWLMCK-6_ZVQSQ xen/include/asm-x86/smp.h
+3ddb79c3jn8ALV_S9W5aeTYUQRKBpg xen/include/asm-x86/smpboot.h
+3ddb79c3e9DCEoR-WzNxcOQDzLu7BQ xen/include/asm-x86/softirq.h
+3ddb79c3NiyQE2vQnyGiaBnNjBO1rA xen/include/asm-x86/spinlock.h
+3e7f358aG11EvMI9VJ4_9hD4LUO7rQ xen/include/asm-x86/string.h
+3ddb79c3ezddh34MdelJpa5tNR00Dw xen/include/asm-x86/system.h
+3e397e66xPNc8eaSqC9pPbyAtRGzHA xen/include/asm-x86/time.h
+3e450943TfE-iovQIY_tMO_VdGsPhA xen/include/asm-x86/timex.h
+3ddb79c4HugMq7IYGxcQKFBpKwKhzA xen/include/asm-x86/types.h
+3ddb79c3M2n1ROZH6xk3HbyN4CPDqg xen/include/asm-x86/uaccess.h
+3ddb79c3uPGcP_l_2xyGgBSWd5aC-Q xen/include/asm-x86/unaligned.h
+404f1b9b_phpQlRnyiWqP6RodfZDpg xen/include/asm-x86/x86_64/config.h
+404f1b9ceJeGVaPNIENm2FkK0AgEOQ xen/include/asm-x86/x86_64/current.h
+404f1b9fl6AQ_a-T1TDK3fuwTPXmHw xen/include/asm-x86/x86_64/desc.h
+404f1badfXZJZ2sU8sh9PS2EZvd19Q xen/include/asm-x86/x86_64/ldt.h
+404f1bb1LSCqrMDSfRAti5NdMQPJBQ xen/include/asm-x86/x86_64/page.h
+404f1bb41Yl-5ZjIWnG66HDCj6OIWA xen/include/asm-x86/x86_64/pda.h
+404f1bb756fZfxk5HDx7J7BW3R-1jQ xen/include/asm-x86/x86_64/processor.h
+404f1bb86rAXB3aLS1vYdcqpJiEcyg xen/include/asm-x86/x86_64/ptrace.h
+404f1bc4tWkB9Qr8RkKtZGW5eMQzhw xen/include/asm-x86/x86_64/uaccess.h
400304fcmRQmDdFYEzDh0wcBba9alg xen/include/hypervisor-ifs/COPYING
-404f1bc68SXxmv0zQpXBWGrCzSyp8w xen/include/hypervisor-ifs/arch-i386/hypervisor-if.h
+404f1bc68SXxmv0zQpXBWGrCzSyp8w xen/include/hypervisor-ifs/arch-x86/hypervisor-if.h
404f1bc7IwU-qnH8mJeVu0YsNGMrcw xen/include/hypervisor-ifs/arch-x86_64/hypervisor-if.h
3ddb79c2PMeWTK86y4C3F4MzHw4A1g xen/include/hypervisor-ifs/dom0_ops.h
403cd194j2pyLqXD8FJ-ukvZzkPenw xen/include/hypervisor-ifs/event_channel.h
perfc ?= n
trace ?= n
-COMPILE_ARCH := $(shell uname -m | sed -e s/i.86/i386/)
-TARGET_ARCH ?= $(COMPILE_ARCH)
+# Currently supported architectures:
+# {COMPILE,TARGET}_ARCH := x86
+# {COMPILE,TARGET}_SUBARCH := x86_32 | x86_64
+COMPILE_ARCH := x86
+COMPILE_SUBARCH := $(shell uname -m | sed -e s/i.86/x86_32/)
+
+TARGET_ARCH ?= $(COMPILE_ARCH)
+TARGET_SUBARCH ?= $(COMPILE_SUBARCH)
TARGET := $(BASEDIR)/xen
HDRS := $(wildcard $(BASEDIR)/include/xen/*.h)
+++ /dev/null
-
-include $(BASEDIR)/Rules.mk
-
-ifneq ($(debugger),y)
-OBJS := $(subst pdb-linux.o,,$(OBJS))
-OBJS := $(subst pdb-stub.o,,$(OBJS))
-endif
-
-# What happens here? We link monitor object files together, starting
-# at MONITOR_BASE (a very high address). But bootloader cannot put
-# things there, so we initially load at LOAD_BASE. A hacky little
-# tool called `elf-reloc' is used to modify segment offsets from
-# MONITOR_BASE-relative to LOAD_BASE-relative.
-# (NB. Linux gets round this by turning its image into raw binary, then
-# wrapping that with a low-memory bootstrapper.)
-default: boot/boot.o $(OBJS)
- $(LD) -r -o arch.o $(OBJS)
- $(LD) $(LDFLAGS) boot/boot.o $(ALL_OBJS) -o $(TARGET).dbg
- objcopy -R .note -R .comment -S $(TARGET).dbg $(TARGET)
- $(BASEDIR)/tools/elf-reloc $(MONITOR_BASE) $(LOAD_BASE) $(TARGET)
-
-clean:
- rm -f *.o *~ core boot/*.o boot/*~ boot/core
+++ /dev/null
-########################################
-# x86-specific definitions
-
-CC := gcc
-LD := ld
-# Linker should relocate monitor to this address
-MONITOR_BASE := 0xFC500000
-# Bootloader should load monitor to this real address
-LOAD_BASE := 0x00100000
-CFLAGS := -nostdinc -fno-builtin -fno-common -fno-strict-aliasing -O3
-CFLAGS += -iwithprefix include -Wall -Werror -DMONITOR_BASE=$(MONITOR_BASE)
-CFLAGS += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__
-CFLAGS += -Wno-pointer-arith -Wredundant-decls -m32
-TARGET_CPU := i686
-CFLAGS += -march=$(TARGET_CPU)
-LDARCHFLAGS := --oformat elf32-i386
-LDFLAGS := -T xen.lds -N
-
-
+++ /dev/null
-/*
- * acpi.c - Architecture-Specific Low-Level ACPI Support
- *
- * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- * Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
- * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#include <xen/config.h>
-#include <xen/kernel.h>
-#include <xen/init.h>
-#include <xen/types.h>
-/*#include <xen/stddef.h>*/
-#include <xen/slab.h>
-#include <xen/pci.h>
-/*#include <xen/bootmem.h>*/
-#include <xen/irq.h>
-#include <xen/acpi.h>
-#include <asm/mpspec.h>
-#include <asm/io.h>
-#include <asm/apic.h>
-#include <asm/apicdef.h>
-#include <asm/page.h>
-/*#include <asm/pgtable.h>*/
-#include <asm/pgalloc.h>
-#include <asm/io_apic.h>
-#include <asm/acpi.h>
-/*#include <asm/save_state.h>*/
-#include <asm/smpboot.h>
-
-
-#define PREFIX "ACPI: "
-
-int acpi_lapic = 0;
-int acpi_ioapic = 0;
-
-/* --------------------------------------------------------------------------
- Boot-time Configuration
- -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI_BOOT
-int acpi_noirq __initdata = 0; /* skip ACPI IRQ initialization */
-int acpi_ht __initdata = 1; /* enable HT */
-
-enum acpi_irq_model_id acpi_irq_model;
-
-
-/*
- * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
- * to map the target physical address. The problem is that set_fixmap()
- * provides a single page, and it is possible that the page is not
- * sufficient.
- * By using this area, we can map up to MAX_IO_APICS pages temporarily,
- * i.e. until the next __va_range() call.
- *
- * Important Safety Note: The fixed I/O APIC page numbers are *subtracted*
- * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and
- * count idx down while incrementing the phys address.
- */
-char *__acpi_map_table(unsigned long phys, unsigned long size)
-{
- unsigned long base, offset, mapped_size;
- int idx;
-
- if (phys + size < 8*1024*1024)
- return __va(phys);
-
- offset = phys & (PAGE_SIZE - 1);
- mapped_size = PAGE_SIZE - offset;
- set_fixmap(FIX_ACPI_END, phys);
- base = fix_to_virt(FIX_ACPI_END);
-
- /*
- * Most cases can be covered by the below.
- */
- idx = FIX_ACPI_END;
- while (mapped_size < size) {
- if (--idx < FIX_ACPI_BEGIN)
- return 0; /* cannot handle this */
- phys += PAGE_SIZE;
- set_fixmap(idx, phys);
- mapped_size += PAGE_SIZE;
- }
-
- return ((unsigned char *) base + offset);
-}
-
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
-static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
-
-
-static int __init
-acpi_parse_madt (
- unsigned long phys_addr,
- unsigned long size)
-{
- struct acpi_table_madt *madt = NULL;
-
- if (!phys_addr || !size)
- return -EINVAL;
-
- madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size);
- if (!madt) {
- printk(KERN_WARNING PREFIX "Unable to map MADT\n");
- return -ENODEV;
- }
-
- if (madt->lapic_address)
- acpi_lapic_addr = (u64) madt->lapic_address;
-
- printk(KERN_INFO PREFIX "Local APIC address 0x%08x\n",
- madt->lapic_address);
-
- detect_clustered_apic(madt->header.oem_id, madt->header.oem_table_id);
-
- return 0;
-}
-
-
-static int __init
-acpi_parse_lapic (
- acpi_table_entry_header *header)
-{
- struct acpi_table_lapic *processor = NULL;
-
- processor = (struct acpi_table_lapic*) header;
- if (!processor)
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- mp_register_lapic (
- processor->id, /* APIC ID */
- processor->flags.enabled); /* Enabled? */
-
- return 0;
-}
-
-
-static int __init
-acpi_parse_lapic_addr_ovr (
- acpi_table_entry_header *header)
-{
- struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL;
-
- lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header;
- if (!lapic_addr_ovr)
- return -EINVAL;
-
- acpi_lapic_addr = lapic_addr_ovr->address;
-
- return 0;
-}
-
-static int __init
-acpi_parse_lapic_nmi (
- acpi_table_entry_header *header)
-{
- struct acpi_table_lapic_nmi *lapic_nmi = NULL;
-
- lapic_nmi = (struct acpi_table_lapic_nmi*) header;
- if (!lapic_nmi)
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- if (lapic_nmi->lint != 1)
- printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
-
- return 0;
-}
-
-#endif /*CONFIG_X86_LOCAL_APIC*/
-
-#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
-
-static int __init
-acpi_parse_ioapic (
- acpi_table_entry_header *header)
-{
- struct acpi_table_ioapic *ioapic = NULL;
-
- ioapic = (struct acpi_table_ioapic*) header;
- if (!ioapic)
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- mp_register_ioapic (
- ioapic->id,
- ioapic->address,
- ioapic->global_irq_base);
-
- return 0;
-}
-
-
-static int __init
-acpi_parse_int_src_ovr (
- acpi_table_entry_header *header)
-{
- struct acpi_table_int_src_ovr *intsrc = NULL;
-
- intsrc = (struct acpi_table_int_src_ovr*) header;
- if (!intsrc)
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- mp_override_legacy_irq (
- intsrc->bus_irq,
- intsrc->flags.polarity,
- intsrc->flags.trigger,
- intsrc->global_irq);
-
- return 0;
-}
-
-
-static int __init
-acpi_parse_nmi_src (
- acpi_table_entry_header *header)
-{
- struct acpi_table_nmi_src *nmi_src = NULL;
-
- nmi_src = (struct acpi_table_nmi_src*) header;
- if (!nmi_src)
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- /* TBD: Support nimsrc entries? */
-
- return 0;
-}
-
-#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/
-
-
-static unsigned long __init
-acpi_scan_rsdp (
- unsigned long start,
- unsigned long length)
-{
- unsigned long offset = 0;
- unsigned long sig_len = sizeof("RSD PTR ") - 1;
-
- /*
- * Scan all 16-byte boundaries of the physical memory region for the
- * RSDP signature.
- */
- for (offset = 0; offset < length; offset += 16) {
- if (strncmp((char *) (start + offset), "RSD PTR ", sig_len))
- continue;
- return (start + offset);
- }
-
- return 0;
-}
-
-
-unsigned long __init
-acpi_find_rsdp (void)
-{
- unsigned long rsdp_phys = 0;
-
- /*
- * Scan memory looking for the RSDP signature. First search EBDA (low
- * memory) paragraphs and then search upper memory (E0000-FFFFF).
- */
- rsdp_phys = acpi_scan_rsdp (0, 0x400);
- if (!rsdp_phys)
- rsdp_phys = acpi_scan_rsdp (0xE0000, 0xFFFFF);
-
- return rsdp_phys;
-}
-
-
-/*
- * acpi_boot_init()
- * called from setup_arch(), always.
- * 1. maps ACPI tables for later use
- * 2. enumerates lapics
- * 3. enumerates io-apics
- *
- * side effects:
- * acpi_lapic = 1 if LAPIC found
- * acpi_ioapic = 1 if IOAPIC found
- * if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
- * if acpi_blacklisted() acpi_disabled = 1;
- * acpi_irq_model=...
- * ...
- *
- * return value: (currently ignored)
- * 0: success
- * !0: failure
- */
-int __init
-acpi_boot_init (void)
-{
- int result = 0;
-
- if (acpi_disabled && !acpi_ht)
- return(1);
-
- /*
- * The default interrupt routing model is PIC (8259). This gets
- * overriden if IOAPICs are enumerated (below).
- */
- acpi_irq_model = ACPI_IRQ_MODEL_PIC;
-
- /*
- * Initialize the ACPI boot-time table parser.
- */
- result = acpi_table_init();
- if (result) {
- acpi_disabled = 1;
- return result;
- }
-
- result = acpi_blacklisted();
- if (result) {
- printk(KERN_NOTICE PREFIX "BIOS listed in blacklist, disabling ACPI support\n");
- acpi_disabled = 1;
- return result;
- }
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
- /*
- * MADT
- * ----
- * Parse the Multiple APIC Description Table (MADT), if exists.
- * Note that this table provides platform SMP configuration
- * information -- the successor to MPS tables.
- */
-
- result = acpi_table_parse(ACPI_APIC, acpi_parse_madt);
- if (!result) {
- return 0;
- }
- else if (result < 0) {
- printk(KERN_ERR PREFIX "Error parsing MADT\n");
- return result;
- }
- else if (result > 1)
- printk(KERN_WARNING PREFIX "Multiple MADT tables exist\n");
-
- /*
- * Local APIC
- * ----------
- * Note that the LAPIC address is obtained from the MADT (32-bit value)
- * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
- */
-
- result = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr);
- if (result < 0) {
- printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n");
- return result;
- }
-
- mp_register_lapic_address(acpi_lapic_addr);
-
- result = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic);
- if (!result) {
- printk(KERN_ERR PREFIX "No LAPIC entries present\n");
- /* TBD: Cleanup to allow fallback to MPS */
- return -ENODEV;
- }
- else if (result < 0) {
- printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
- /* TBD: Cleanup to allow fallback to MPS */
- return result;
- }
-
- result = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi);
- if (result < 0) {
- printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
- /* TBD: Cleanup to allow fallback to MPS */
- return result;
- }
-
- acpi_lapic = 1;
-
-#endif /*CONFIG_X86_LOCAL_APIC*/
-
-#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
-
- /*
- * I/O APIC
- * --------
- */
-
- /*
- * ACPI interpreter is required to complete interrupt setup,
- * so if it is off, don't enumerate the io-apics with ACPI.
- * If MPS is present, it will handle them,
- * otherwise the system will stay in PIC mode
- */
- if (acpi_disabled || acpi_noirq) {
- return 1;
- }
-
- /*
- * if "noapic" boot option, don't look for IO-APICs
- */
- if (ioapic_setup_disabled()) {
- printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
- "due to 'noapic' option.\n");
- return 1;
- }
-
-
- result = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic);
- if (!result) {
- printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
- return -ENODEV;
- }
- else if (result < 0) {
- printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
- return result;
- }
-
- /* Build a default routing table for legacy (ISA) interrupts. */
- mp_config_acpi_legacy_irqs();
-
- result = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr);
- if (result < 0) {
- printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n");
- /* TBD: Cleanup to allow fallback to MPS */
- return result;
- }
-
- result = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src);
- if (result < 0) {
- printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
- /* TBD: Cleanup to allow fallback to MPS */
- return result;
- }
-
- acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
-
- acpi_irq_balance_set(NULL);
-
- acpi_ioapic = 1;
-
- if (acpi_lapic && acpi_ioapic)
- smp_found_config = 1;
-
-#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/
-
- return 0;
-}
-
-#endif /*CONFIG_ACPI_BOOT*/
-
-#ifdef CONFIG_ACPI_BUS
-/*
- * "acpi_pic_sci=level" (current default)
- * programs the PIC-mode SCI to Level Trigger.
- * (NO-OP if the BIOS set Level Trigger already)
- *
- * If a PIC-mode SCI is not recogznied or gives spurious IRQ7's
- * it may require Edge Trigger -- use "acpi_pic_sci=edge"
- * (NO-OP if the BIOS set Edge Trigger already)
- *
- * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
- * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge.
- * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0)
- * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0)
- */
-
-static __initdata int acpi_pic_sci_trigger; /* 0: level, 1: edge */
-
-void __init
-acpi_pic_sci_set_trigger(unsigned int irq)
-{
- unsigned char mask = 1 << (irq & 7);
- unsigned int port = 0x4d0 + (irq >> 3);
- unsigned char val = inb(port);
-
-
- printk(PREFIX "IRQ%d SCI:", irq);
- if (!(val & mask)) {
- printk(" Edge");
-
- if (!acpi_pic_sci_trigger) {
- printk(" set to Level");
- outb(val | mask, port);
- }
- } else {
- printk(" Level");
-
- if (acpi_pic_sci_trigger) {
- printk(" set to Edge");
- outb(val | mask, port);
- }
- }
- printk(" Trigger.\n");
-}
-
-int __init
-acpi_pic_sci_setup(char *str)
-{
- while (str && *str) {
- if (strncmp(str, "level", 5) == 0)
- acpi_pic_sci_trigger = 0; /* force level trigger */
- if (strncmp(str, "edge", 4) == 0)
- acpi_pic_sci_trigger = 1; /* force edge trigger */
- str = strchr(str, ',');
- if (str)
- str += strspn(str, ", \t");
- }
- return 1;
-}
-
-__setup("acpi_pic_sci=", acpi_pic_sci_setup);
-
-#endif /* CONFIG_ACPI_BUS */
-
-
-
-/* --------------------------------------------------------------------------
- Low-Level Sleep Support
- -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI_SLEEP
-
-#define DEBUG
-
-#ifdef DEBUG
-#include <xen/serial.h>
-#endif
-
-/* address in low memory of the wakeup routine. */
-unsigned long acpi_wakeup_address = 0;
-
-/* new page directory that we will be using */
-static pmd_t *pmd;
-
-/* saved page directory */
-static pmd_t saved_pmd;
-
-/* page which we'll use for the new page directory */
-static pte_t *ptep;
-
-extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
-
-/*
- * acpi_create_identity_pmd
- *
- * Create a new, identity mapped pmd.
- *
- * Do this by creating new page directory, and marking all the pages as R/W
- * Then set it as the new Page Middle Directory.
- * And, of course, flush the TLB so it takes effect.
- *
- * We save the address of the old one, for later restoration.
- */
-static void acpi_create_identity_pmd (void)
-{
- pgd_t *pgd;
- int i;
-
- ptep = (pte_t*)__get_free_page(GFP_KERNEL);
-
- /* fill page with low mapping */
- for (i = 0; i < PTRS_PER_PTE; i++)
- set_pte(ptep + i, mk_pte_phys(i << PAGE_SHIFT, PAGE_SHARED));
-
- pgd = pgd_offset(current->active_mm, 0);
- pmd = pmd_alloc(current->mm,pgd, 0);
-
- /* save the old pmd */
- saved_pmd = *pmd;
-
- /* set the new one */
- set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(ptep)));
-
- /* flush the TLB */
- local_flush_tlb();
-}
-
-/*
- * acpi_restore_pmd
- *
- * Restore the old pmd saved by acpi_create_identity_pmd and
- * free the page that said function alloc'd
- */
-static void acpi_restore_pmd (void)
-{
- set_pmd(pmd, saved_pmd);
- local_flush_tlb();
- free_page((unsigned long)ptep);
-}
-
-/**
- * acpi_save_state_mem - save kernel state
- *
- * Create an identity mapped page table and copy the wakeup routine to
- * low memory.
- */
-int acpi_save_state_mem (void)
-{
- acpi_create_identity_pmd();
- acpi_copy_wakeup_routine(acpi_wakeup_address);
-
- return 0;
-}
-
-/**
- * acpi_save_state_disk - save kernel state to disk
- *
- */
-int acpi_save_state_disk (void)
-{
- return 1;
-}
-
-/*
- * acpi_restore_state
- */
-void acpi_restore_state_mem (void)
-{
- acpi_restore_pmd();
-}
-
-/**
- * acpi_reserve_bootmem - do _very_ early ACPI initialisation
- *
- * We allocate a page in low memory for the wakeup
- * routine for when we come back from a sleep state. The
- * runtime allocator allows specification of <16M pages, but not
- * <1M pages.
- */
-void __init acpi_reserve_bootmem(void)
-{
- acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE);
- printk(KERN_DEBUG "ACPI: have wakeup address 0x%8.8lx\n", acpi_wakeup_address);
-}
-
-void do_suspend_lowlevel_s4bios(int resume)
-{
- if (!resume) {
- save_processor_context();
- acpi_save_register_state((unsigned long)&&acpi_sleep_done);
- acpi_enter_sleep_state_s4bios();
- return;
- }
-acpi_sleep_done:
- restore_processor_context();
-}
-
-
-#endif /*CONFIG_ACPI_SLEEP*/
-
+++ /dev/null
-/*
- * Local APIC handling, local APIC timers
- *
- * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- * Fixes
- * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
- * thanks to Eric Gilmore
- * and Rolf G. Tews
- * for testing these extensively.
- * Maciej W. Rozycki : Various updates and fixes.
- * Mikael Pettersson : Power Management for UP-APIC.
- */
-
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/sched.h>
-#include <xen/irq.h>
-#include <xen/delay.h>
-#include <asm/mc146818rtc.h>
-#include <asm/msr.h>
-#include <xen/errno.h>
-#include <asm/atomic.h>
-#include <xen/smp.h>
-#include <xen/interrupt.h>
-#include <asm/mpspec.h>
-#include <asm/pgalloc.h>
-#include <asm/hardirq.h>
-#include <asm/apic.h>
-#include <xen/mm.h>
-#include <asm/io_apic.h>
-#include <asm/timex.h>
-#include <xen/ac_timer.h>
-#include <xen/perfc.h>
-
-
-/* Using APIC to generate smp_local_timer_interrupt? */
-int using_apic_timer = 0;
-
-static int enabled_via_apicbase;
-
-int get_maxlvt(void)
-{
- unsigned int v, ver, maxlvt;
-
- v = apic_read(APIC_LVR);
- ver = GET_APIC_VERSION(v);
- /* 82489DXs do not report # of LVT entries. */
- maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
- return maxlvt;
-}
-
-void clear_local_APIC(void)
-{
- int maxlvt;
- unsigned long v;
-
- maxlvt = get_maxlvt();
-
- /*
- * Masking an LVT entry on a P6 can trigger a local APIC error
- * if the vector is zero. Mask LVTERR first to prevent this.
- */
- if (maxlvt >= 3) {
- v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
- apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED);
- }
- /*
- * Careful: we have to set masks only first to deassert
- * any level-triggered sources.
- */
- v = apic_read(APIC_LVTT);
- apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
- v = apic_read(APIC_LVT0);
- apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
- v = apic_read(APIC_LVT1);
- apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED);
- if (maxlvt >= 4) {
- v = apic_read(APIC_LVTPC);
- apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
- }
-
- /*
- * Clean APIC state for other OSs:
- */
- apic_write_around(APIC_LVTT, APIC_LVT_MASKED);
- apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
- apic_write_around(APIC_LVT1, APIC_LVT_MASKED);
- if (maxlvt >= 3)
- apic_write_around(APIC_LVTERR, APIC_LVT_MASKED);
- if (maxlvt >= 4)
- apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
- v = GET_APIC_VERSION(apic_read(APIC_LVR));
- if (APIC_INTEGRATED(v)) { /* !82489DX */
- if (maxlvt > 3)
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- }
-}
-
-void __init connect_bsp_APIC(void)
-{
- if (pic_mode) {
- /*
- * Do not trust the local APIC being empty at bootup.
- */
- clear_local_APIC();
- /*
- * PIC mode, enable APIC mode in the IMCR, i.e.
- * connect BSP's local APIC to INT and NMI lines.
- */
- printk("leaving PIC mode, enabling APIC mode.\n");
- outb(0x70, 0x22);
- outb(0x01, 0x23);
- }
-}
-
-void disconnect_bsp_APIC(void)
-{
- if (pic_mode) {
- /*
- * Put the board back into PIC mode (has an effect
- * only on certain older boards). Note that APIC
- * interrupts, including IPIs, won't work beyond
- * this point! The only exception are INIT IPIs.
- */
- printk("disabling APIC mode, entering PIC mode.\n");
- outb(0x70, 0x22);
- outb(0x00, 0x23);
- }
-}
-
-void disable_local_APIC(void)
-{
- unsigned long value;
-
- clear_local_APIC();
-
- /*
- * Disable APIC (implies clearing of registers
- * for 82489DX!).
- */
- value = apic_read(APIC_SPIV);
- value &= ~APIC_SPIV_APIC_ENABLED;
- apic_write_around(APIC_SPIV, value);
-
- if (enabled_via_apicbase) {
- unsigned int l, h;
- rdmsr(MSR_IA32_APICBASE, l, h);
- l &= ~MSR_IA32_APICBASE_ENABLE;
- wrmsr(MSR_IA32_APICBASE, l, h);
- }
-}
-
-/*
- * This is to verify that we're looking at a real local APIC.
- * Check these against your board if the CPUs aren't getting
- * started for no apparent reason.
- */
-int __init verify_local_APIC(void)
-{
- unsigned int reg0, reg1;
-
- /*
- * The version register is read-only in a real APIC.
- */
- reg0 = apic_read(APIC_LVR);
- Dprintk("Getting VERSION: %x\n", reg0);
- apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
- reg1 = apic_read(APIC_LVR);
- Dprintk("Getting VERSION: %x\n", reg1);
-
- /*
- * The two version reads above should print the same
- * numbers. If the second one is different, then we
- * poke at a non-APIC.
- */
- if (reg1 != reg0)
- return 0;
-
- /*
- * Check if the version looks reasonably.
- */
- reg1 = GET_APIC_VERSION(reg0);
- if (reg1 == 0x00 || reg1 == 0xff)
- return 0;
- reg1 = get_maxlvt();
- if (reg1 < 0x02 || reg1 == 0xff)
- return 0;
-
- /*
- * The ID register is read/write in a real APIC.
- */
- reg0 = apic_read(APIC_ID);
- Dprintk("Getting ID: %x\n", reg0);
- apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
- reg1 = apic_read(APIC_ID);
- Dprintk("Getting ID: %x\n", reg1);
- apic_write(APIC_ID, reg0);
- if (reg1 != (reg0 ^ APIC_ID_MASK))
- return 0;
-
- /*
- * The next two are just to see if we have sane values.
- * They're only really relevant if we're in Virtual Wire
- * compatibility mode, but most boxes are anymore.
- */
- reg0 = apic_read(APIC_LVT0);
- Dprintk("Getting LVT0: %x\n", reg0);
- reg1 = apic_read(APIC_LVT1);
- Dprintk("Getting LVT1: %x\n", reg1);
-
- return 1;
-}
-
-void __init sync_Arb_IDs(void)
-{
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- Dprintk("Synchronizing Arb IDs.\n");
- apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
- | APIC_DM_INIT);
-}
-
-extern void __error_in_apic_c (void);
-
-/*
- * WAS: An initial setup of the virtual wire mode.
- * NOW: We don't bother doing anything. All we need at this point
- * is to receive timer ticks, so that 'jiffies' is incremented.
- * If we're SMP, then we can assume BIOS did setup for us.
- * If we're UP, then the APIC should be disabled (it is at reset).
- * If we're UP and APIC is enabled, then BIOS is clever and has
- * probably done initial interrupt routing for us.
- */
-void __init init_bsp_APIC(void)
-{
-}
-
-static unsigned long calculate_ldr(unsigned long old)
-{
- unsigned long id = 1UL << smp_processor_id();
- return (old & ~APIC_LDR_MASK)|SET_APIC_LOGICAL_ID(id);
-}
-
-void __init setup_local_APIC (void)
-{
- unsigned long value, ver, maxlvt;
-
- value = apic_read(APIC_LVR);
- ver = GET_APIC_VERSION(value);
-
- if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f)
- __error_in_apic_c();
-
- /* Double-check wether this APIC is really registered. */
- if (!test_bit(GET_APIC_ID(apic_read(APIC_ID)), &phys_cpu_present_map))
- BUG();
-
- /*
- * Intel recommends to set DFR, LDR and TPR before enabling
- * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116). So here it goes...
- */
-
- /*
- * In clustered apic mode, the firmware does this for us
- * Put the APIC into flat delivery mode.
- * Must be "all ones" explicitly for 82489DX.
- */
- apic_write_around(APIC_DFR, APIC_DFR_FLAT);
-
- /*
- * Set up the logical destination ID.
- */
- value = apic_read(APIC_LDR);
- apic_write_around(APIC_LDR, calculate_ldr(value));
-
- /*
- * Set Task Priority to 'accept all'. We never change this
- * later on.
- */
- value = apic_read(APIC_TASKPRI);
- value &= ~APIC_TPRI_MASK;
- apic_write_around(APIC_TASKPRI, value);
-
- /*
- * Now that we are all set up, enable the APIC
- */
- value = apic_read(APIC_SPIV);
- value &= ~APIC_VECTOR_MASK;
- /*
- * Enable APIC
- */
- value |= APIC_SPIV_APIC_ENABLED;
-
- /* Enable focus processor (bit==0) */
- value &= ~APIC_SPIV_FOCUS_DISABLED;
-
- /* Set spurious IRQ vector */
- value |= SPURIOUS_APIC_VECTOR;
- apic_write_around(APIC_SPIV, value);
-
- /*
- * Set up LVT0, LVT1:
- *
- * set up through-local-APIC on the BP's LINT0. This is not
- * strictly necessery in pure symmetric-IO mode, but sometimes
- * we delegate interrupts to the 8259A.
- */
- /*
- * TODO: set up through-local-APIC from through-I/O-APIC? --macro
- */
- value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
- if (!smp_processor_id()) {
- value = APIC_DM_EXTINT;
- printk("enabled ExtINT on CPU#%d\n", smp_processor_id());
- } else {
- value = APIC_DM_EXTINT | APIC_LVT_MASKED;
- printk("masked ExtINT on CPU#%d\n", smp_processor_id());
- }
- apic_write_around(APIC_LVT0, value);
-
- /*
- * only the BP should see the LINT1 NMI signal, obviously.
- */
- if (!smp_processor_id())
- value = APIC_DM_NMI;
- else
- value = APIC_DM_NMI | APIC_LVT_MASKED;
- if (!APIC_INTEGRATED(ver)) /* 82489DX */
- value |= APIC_LVT_LEVEL_TRIGGER;
- apic_write_around(APIC_LVT1, value);
-
- if (APIC_INTEGRATED(ver)) { /* !82489DX */
- maxlvt = get_maxlvt();
- if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
- apic_write(APIC_ESR, 0);
- value = apic_read(APIC_ESR);
- printk("ESR value before enabling vector: %08lx\n", value);
-
- value = ERROR_APIC_VECTOR; /* enables sending errors */
- apic_write_around(APIC_LVTERR, value);
- /* spec says clear errors after enabling vector. */
- if (maxlvt > 3)
- apic_write(APIC_ESR, 0);
- value = apic_read(APIC_ESR);
- printk("ESR value after enabling vector: %08lx\n", value);
- } else {
- printk("No ESR for 82489DX.\n");
- }
-
- if ( (smp_processor_id() == 0) && (nmi_watchdog == NMI_LOCAL_APIC) )
- setup_apic_nmi_watchdog();
-}
-
-
-static inline void apic_pm_init1(void) { }
-static inline void apic_pm_init2(void) { }
-
-
-/*
- * Detect and enable local APICs on non-SMP boards.
- * Original code written by Keir Fraser.
- */
-
-static int __init detect_init_APIC (void)
-{
- u32 h, l, features;
- extern void get_cpu_vendor(struct cpuinfo_x86*);
-
- /* Workaround for us being called before identify_cpu(). */
- get_cpu_vendor(&boot_cpu_data);
-
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1)
- break;
- if (boot_cpu_data.x86 == 15 && cpu_has_apic)
- break;
- goto no_apic;
- case X86_VENDOR_INTEL:
- if (boot_cpu_data.x86 == 6 ||
- (boot_cpu_data.x86 == 15 && cpu_has_apic) ||
- (boot_cpu_data.x86 == 5 && cpu_has_apic))
- break;
- goto no_apic;
- default:
- goto no_apic;
- }
-
- if (!cpu_has_apic) {
- /*
- * Some BIOSes disable the local APIC in the
- * APIC_BASE MSR. This can only be done in
- * software for Intel P6 and AMD K7 (Model > 1).
- */
- rdmsr(MSR_IA32_APICBASE, l, h);
- if (!(l & MSR_IA32_APICBASE_ENABLE)) {
- printk("Local APIC disabled by BIOS -- reenabling.\n");
- l &= ~MSR_IA32_APICBASE_BASE;
- l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
- wrmsr(MSR_IA32_APICBASE, l, h);
- enabled_via_apicbase = 1;
- }
- }
-
- /* The APIC feature bit should now be enabled in `cpuid' */
- features = cpuid_edx(1);
- if (!(features & (1 << X86_FEATURE_APIC))) {
- printk("Could not enable APIC!\n");
- return -1;
- }
-
- set_bit(X86_FEATURE_APIC, &boot_cpu_data.x86_capability);
- mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
- boot_cpu_physical_apicid = 0;
-
- /* The BIOS may have set up the APIC at some other address */
- rdmsr(MSR_IA32_APICBASE, l, h);
- if (l & MSR_IA32_APICBASE_ENABLE)
- mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
-
- if (nmi_watchdog != NMI_NONE)
- nmi_watchdog = NMI_LOCAL_APIC;
-
- printk("Found and enabled local APIC!\n");
- apic_pm_init1();
- return 0;
-
- no_apic:
- printk("No local APIC present or hardware disabled\n");
- return -1;
-}
-
-void __init init_apic_mappings(void)
-{
- unsigned long apic_phys = 0;
-
- /*
- * If no local APIC can be found then set up a fake all zeroes page to
- * simulate the local APIC and another one for the IO-APIC.
- */
- if (!smp_found_config && detect_init_APIC()) {
- apic_phys = get_free_page(GFP_KERNEL);
- apic_phys = __pa(apic_phys);
- } else
- apic_phys = mp_lapic_addr;
-
- set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
- Dprintk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys);
-
- /*
- * Fetch the APIC ID of the BSP in case we have a
- * default configuration (or the MP table is broken).
- */
- if (boot_cpu_physical_apicid == -1U)
- boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
-
-#ifdef CONFIG_X86_IO_APIC
- {
- unsigned long ioapic_phys = 0, idx = FIX_IO_APIC_BASE_0;
- int i;
-
- for (i = 0; i < nr_ioapics; i++) {
- if (smp_found_config)
- ioapic_phys = mp_ioapics[i].mpc_apicaddr;
- set_fixmap_nocache(idx, ioapic_phys);
- Dprintk("mapped IOAPIC to %08lx (%08lx)\n",
- __fix_to_virt(idx), ioapic_phys);
- idx++;
- }
- }
-#endif
-}
-
-/*****************************************************************************
- * APIC calibration
- *
- * The APIC is programmed in bus cycles.
- * Timeout values should specified in real time units.
- * The "cheapest" time source is the cyclecounter.
- *
- * Thus, we need a mappings from: bus cycles <- cycle counter <- system time
- *
- * The calibration is currently a bit shoddy since it requires the external
- * timer chip to generate periodic timer interupts.
- *****************************************************************************/
-
-/* used for system time scaling */
-static unsigned int bus_freq;
-static u32 bus_cycle; /* length of one bus cycle in pico-seconds */
-static u32 bus_scale; /* scaling factor convert ns to bus cycles */
-
-/*
- * The timer chip is already set up at HZ interrupts per second here,
- * but we do not accept timer interrupts yet. We only allow the BP
- * to calibrate.
- */
-static unsigned int __init get_8254_timer_count(void)
-{
- /*extern spinlock_t i8253_lock;*/
- /*unsigned long flags;*/
- unsigned int count;
- /*spin_lock_irqsave(&i8253_lock, flags);*/
- outb_p(0x00, 0x43);
- count = inb_p(0x40);
- count |= inb_p(0x40) << 8;
- /*spin_unlock_irqrestore(&i8253_lock, flags);*/
- return count;
-}
-
-void __init wait_8254_wraparound(void)
-{
- unsigned int curr_count, prev_count=~0;
- int delta;
- curr_count = get_8254_timer_count();
- do {
- prev_count = curr_count;
- curr_count = get_8254_timer_count();
- delta = curr_count-prev_count;
- /*
- * This limit for delta seems arbitrary, but it isn't, it's slightly
- * above the level of error a buggy Mercury/Neptune chipset timer can
- * cause.
- */
- } while (delta < 300);
-}
-
-/*
- * This function sets up the local APIC timer, with a timeout of
- * 'clocks' APIC bus clock. During calibration we actually call
- * this function with a very large value and read the current time after
- * a well defined period of time as expired.
- *
- * Calibration is only performed once, for CPU0!
- *
- * We do reads before writes even if unnecessary, to get around the
- * P5 APIC double write bug.
- */
-#define APIC_DIVISOR 1
-static void __setup_APIC_LVTT(unsigned int clocks)
-{
- unsigned int lvtt1_value, tmp_value;
- lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV)|LOCAL_TIMER_VECTOR;
- apic_write_around(APIC_LVTT, lvtt1_value);
- tmp_value = apic_read(APIC_TDCR);
- apic_write_around(APIC_TDCR, (tmp_value | APIC_TDR_DIV_1));
- apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
-}
-
-/*
- * this is done for every CPU from setup_APIC_clocks() below.
- * We setup each local APIC with a zero timeout value for now.
- * Unlike Linux, we don't have to wait for slices etc.
- */
-void setup_APIC_timer(void * data)
-{
- unsigned long flags;
- __save_flags(flags);
- __sti();
- __setup_APIC_LVTT(0);
- __restore_flags(flags);
-}
-
-/*
- * In this function we calibrate APIC bus clocks to the external timer.
- *
- * As a result we have the Bys Speed and CPU speed in Hz.
- *
- * We want to do the calibration only once (for CPU0). CPUs connected by the
- * same APIC bus have the very same bus frequency.
- *
- * This bit is a bit shoddy since we use the very same periodic timer interrupt
- * we try to eliminate to calibrate the APIC.
- */
-
-int __init calibrate_APIC_clock(void)
-{
- unsigned long long t1 = 0, t2 = 0;
- long tt1, tt2;
- long result;
- int i;
- const int LOOPS = HZ/10;
-
- printk("Calibrating APIC timer for CPU%d...\n", smp_processor_id());
-
- /* Put whatever arbitrary (but long enough) timeout
- * value into the APIC clock, we just want to get the
- * counter running for calibration. */
- __setup_APIC_LVTT(1000000000);
-
- /* The timer chip counts down to zero. Let's wait
- * for a wraparound to start exact measurement:
- * (the current tick might have been already half done) */
- wait_8254_wraparound();
-
- /* We wrapped around just now. Let's start: */
- rdtscll(t1);
- tt1 = apic_read(APIC_TMCCT);
-
- /* Let's wait LOOPS wraprounds: */
- for (i = 0; i < LOOPS; i++)
- wait_8254_wraparound();
-
- tt2 = apic_read(APIC_TMCCT);
- rdtscll(t2);
-
- /* The APIC bus clock counter is 32 bits only, it
- * might have overflown, but note that we use signed
- * longs, thus no extra care needed.
- * underflown to be exact, as the timer counts down ;) */
- result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
-
- printk("..... CPU speed is %ld.%04ld MHz.\n",
- ((long)(t2-t1)/LOOPS) / (1000000/HZ),
- ((long)(t2-t1)/LOOPS) % (1000000/HZ));
-
- printk("..... Bus speed is %ld.%04ld MHz.\n",
- result / (1000000/HZ),
- result % (1000000/HZ));
-
- /*
- * KAF: Moved this to time.c where it's calculated relative to the TSC.
- * Therefore works on machines with no local APIC.
- */
- /*cpu_freq = (u64)(((t2-t1)/LOOPS)*HZ);*/
-
- /* set up multipliers for accurate timer code */
- bus_freq = result*HZ;
- bus_cycle = (u32) (1000000000000LL/bus_freq); /* in pico seconds */
- bus_scale = (1000*262144)/bus_cycle;
-
- printk("..... bus_scale = 0x%08X\n", bus_scale);
- /* reset APIC to zero timeout value */
- __setup_APIC_LVTT(0);
- return result;
-}
-
-/*
- * initialise the APIC timers for all CPUs
- * we start with the first and find out processor frequency and bus speed
- */
-void __init setup_APIC_clocks (void)
-{
- printk("Using local APIC timer interrupts.\n");
- using_apic_timer = 1;
- __cli();
- /* calibrate CPU0 for CPU speed and BUS speed */
- bus_freq = calibrate_APIC_clock();
- /* Now set up the timer for real. */
- setup_APIC_timer((void *)bus_freq);
- __sti();
- /* and update all other cpus */
- smp_call_function(setup_APIC_timer, (void *)bus_freq, 1, 1);
-}
-
-#undef APIC_DIVISOR
-
-/*
- * reprogram the APIC timer. Timeoutvalue is in ns from start of boot
- * returns 1 on success
- * returns 0 if the timeout value is too small or in the past.
- */
-int reprogram_ac_timer(s_time_t timeout)
-{
- s_time_t now;
- s_time_t expire;
- u64 apic_tmict;
-
- /*
- * We use this value because we don't trust zero (we think it may just
- * cause an immediate interrupt). At least this is guaranteed to hold it
- * off for ages (esp. since the clock ticks on bus clock, not cpu clock!).
- */
- if ( timeout == 0 )
- {
- apic_tmict = 0xffffffff;
- goto reprogram;
- }
-
- now = NOW();
- expire = timeout - now; /* value from now */
-
- if ( expire <= 0 )
- {
- Dprintk("APICT[%02d] Timeout in the past 0x%08X%08X > 0x%08X%08X\n",
- smp_processor_id(), (u32)(now>>32),
- (u32)now, (u32)(timeout>>32),(u32)timeout);
- return 0;
- }
-
- /*
- * If we don't have local APIC then we just poll the timer list off the
- * PIT interrupt. Cheesy but good enough to work on eg. VMware :-)
- */
- if ( !cpu_has_apic )
- return 1;
-
- /* conversion to bus units */
- apic_tmict = (((u64)bus_scale) * expire)>>18;
-
- if ( apic_tmict >= 0xffffffff )
- {
- Dprintk("APICT[%02d] Timeout value too large\n", smp_processor_id());
- apic_tmict = 0xffffffff;
- }
-
- if ( apic_tmict == 0 )
- {
- Dprintk("APICT[%02d] timeout value too small\n", smp_processor_id());
- return 0;
- }
-
- reprogram:
- /* Program the timer. */
- apic_write(APIC_TMICT, (unsigned long)apic_tmict);
-
- return 1;
-}
-
-unsigned int apic_timer_irqs [NR_CPUS];
-
-void smp_apic_timer_interrupt(struct pt_regs * regs)
-{
- int cpu = smp_processor_id();
-
- ack_APIC_irq();
-
- apic_timer_irqs[cpu]++;
- perfc_incrc(apic_timer);
-
- __cpu_raise_softirq(cpu, AC_TIMER_SOFTIRQ);
-}
-
-/*
- * This interrupt should _never_ happen with our APIC/SMP architecture
- */
-asmlinkage void smp_spurious_interrupt(void)
-{
- unsigned long v;
-
- /*
- * Check if this really is a spurious interrupt and ACK it
- * if it is a vectored one. Just in case...
- * Spurious interrupts should not be ACKed.
- */
- v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
- if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
- ack_APIC_irq();
-
- /* see sw-dev-man vol 3, chapter 7.4.13.5 */
- printk("spurious APIC interrupt on CPU#%d, should never happen.\n",
- smp_processor_id());
-}
-
-/*
- * This interrupt should never happen with our APIC/SMP architecture
- */
-
-asmlinkage void smp_error_interrupt(void)
-{
- unsigned long v, v1;
-
- /* First tickle the hardware, only then report what went on. -- REW */
- v = apic_read(APIC_ESR);
- apic_write(APIC_ESR, 0);
- v1 = apic_read(APIC_ESR);
- ack_APIC_irq();
- atomic_inc(&irq_err_count);
-
- /* Here is what the APIC error bits mean:
- 0: Send CS error
- 1: Receive CS error
- 2: Send accept error
- 3: Receive accept error
- 4: Reserved
- 5: Send illegal vector
- 6: Received illegal vector
- 7: Illegal register address
- */
- printk ("APIC error on CPU%d: %02lx(%02lx)\n",
- smp_processor_id(), v , v1);
-}
-
-/*
- * This initializes the IO-APIC and APIC hardware if this is
- * a UP kernel.
- */
-int __init APIC_init_uniprocessor (void)
-{
- if (!smp_found_config && !cpu_has_apic)
- return -1;
-
- /*
- * Complain if the BIOS pretends there is one.
- */
- if (!cpu_has_apic&&APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]))
- {
- printk("BIOS bug, local APIC #%d not detected!...\n",
- boot_cpu_physical_apicid);
- return -1;
- }
-
- verify_local_APIC();
-
- connect_bsp_APIC();
-
-#ifdef CONFIG_SMP
- cpu_online_map = 1;
-#endif
- phys_cpu_present_map = 1;
- apic_write_around(APIC_ID, boot_cpu_physical_apicid);
-
- apic_pm_init2();
-
- setup_local_APIC();
-
-#ifdef CONFIG_X86_IO_APIC
- if (smp_found_config && nr_ioapics)
- setup_IO_APIC();
-#endif
- setup_APIC_clocks();
-
- return 0;
-}
+++ /dev/null
-#include <xen/config.h>
-#include <hypervisor-ifs/hypervisor-if.h>
-#include <asm/page.h>
-
-#define SECONDARY_CPU_FLAG 0xA5A5A5A5
-
- .text
-
-ENTRY(start)
- jmp hal_entry
-
- .align 4
-
-/*** MULTIBOOT HEADER ****/
- /* Magic number indicating a Multiboot header. */
- .long 0x1BADB002
- /* Flags to bootloader (see Multiboot spec). */
- .long 0x00000002
- /* Checksum: must be the negated sum of the first two fields. */
- .long -0x1BADB004
-
-hal_entry:
- /* Set up a few descriptors: on entry only CS is guaranteed good. */
- lgdt %cs:nopaging_gdt_descr-__PAGE_OFFSET
- mov $(__HYPERVISOR_DS),%ecx
- mov %ecx,%ds
- mov %ecx,%es
- mov %ecx,%fs
- mov %ecx,%gs
- ljmp $(__HYPERVISOR_CS),$(1f)-__PAGE_OFFSET
-1: lss stack_start-__PAGE_OFFSET,%esp
-
- /* Reset EFLAGS (subsumes CLI and CLD). */
- pushl $0
- popf
-
- /* CPU type checks. We need P6+. */
- mov $0x200000,%edx
- pushfl
- pop %ecx
- and %edx,%ecx
- jne bad_cpu # ID bit should be clear
- pushl %edx
- popfl
- pushfl
- pop %ecx
- and %edx,%ecx
- je bad_cpu # ID bit should be set
-
- /* Set up CR0. */
- mov %cr0,%ecx
- and $0x00000011,%ecx # save ET and PE
- or $0x00050022,%ecx # set AM, WP, NE and MP
- mov %ecx,%cr0
-
- /* Set up FPU. */
- fninit
-
- /* Set up CR4, except global flag which Intel requires should be */
- /* left until after paging is enabled (IA32 Manual Vol. 3, Sec. 2.5) */
- mov %cr4,%ecx
- or mmu_cr4_features-__PAGE_OFFSET,%ecx
- mov %ecx,mmu_cr4_features-__PAGE_OFFSET
- and $0x7f,%ecx /* disable GLOBAL bit */
- mov %ecx,%cr4
-
-#ifdef CONFIG_SMP
- /* Is this a non-boot processor? */
- cmp $(SECONDARY_CPU_FLAG),%ebx
- jne continue_boot_cpu
-
- call start_paging
- lidt idt_descr
- jmp start_secondary
-#endif
-
-continue_boot_cpu:
- add $__PAGE_OFFSET,%ebx
- push %ebx /* Multiboot info struct */
- push %eax /* Multiboot magic value */
-
- /* Initialize BSS (no nasty surprises!) */
- mov $__bss_start-__PAGE_OFFSET,%edi
- mov $_end-__PAGE_OFFSET,%ecx
- sub %edi,%ecx
- xor %eax,%eax
- rep stosb
-
- /* Copy all modules (dom0 + initrd if present) out of the Xen heap */
- mov (%esp),%eax
- cmp $0x2BADB002,%eax
- jne skip_dom0_copy
- sub $__PAGE_OFFSET,%ebx /* turn back into a phys addr */
- mov 0x14(%ebx),%edi /* mbi->mods_count */
- dec %edi /* mbi->mods_count-- */
- jb skip_dom0_copy /* skip if no modules */
- mov 0x18(%ebx),%eax /* mbi->mods_addr */
- mov (%eax),%ebx /* %ebx = mod[0]->mod_start */
- shl $4,%edi
- add %edi,%eax
- mov 0x4(%eax),%eax /* %eax = mod[mod_count-1]->end */
- mov %eax,%ecx
- sub %ebx,%ecx /* %ecx = byte len of all mods */
- mov $(MAX_DIRECTMAP_ADDRESS), %edi
- add %ecx, %edi /* %edi = src + length */
- shr $2,%ecx /* %ecx = length/4 */
-1: sub $4,%eax /* %eax = src, %edi = dst */
- sub $4,%edi
- mov (%eax),%ebx
- mov %ebx,(%edi)
- loop 1b
-skip_dom0_copy:
-
- /* Initialize low and high mappings of all memory with 4MB pages */
- mov $idle_pg_table-__PAGE_OFFSET,%edi
- mov $0x1e3,%eax /* PRESENT+RW+A+D+4MB+GLOBAL */
-1: mov %eax,__PAGE_OFFSET>>20(%edi) /* high mapping */
- stosl /* low mapping */
- add $(1<<L2_PAGETABLE_SHIFT),%eax
- cmp $MAX_DIRECTMAP_ADDRESS+0x1e3,%eax
- jne 1b
-
- call start_paging
- call setup_idt
- lidt idt_descr
-
- /* Call into main C routine. This should never return.*/
- call cmain
- ud2 /* Force a panic (invalid opcode). */
-
-start_paging:
- mov $idle_pg_table-__PAGE_OFFSET,%eax
- mov %eax,%cr3
- mov %cr0,%eax
- or $0x80010000,%eax /* set PG and WP bits */
- mov %eax,%cr0
- jmp 1f
-1: /* Install relocated selectors (FS/GS unused). */
- lgdt gdt_descr
- mov $(__HYPERVISOR_DS),%ecx
- mov %ecx,%ds
- mov %ecx,%es
- mov %ecx,%ss
- ljmp $(__HYPERVISOR_CS),$1f
-1: /* Paging enabled, so we can now enable GLOBAL mappings in CR4. */
- movl mmu_cr4_features,%ecx
- movl %ecx,%cr4
- /* Relocate ESP */
- add $__PAGE_OFFSET,%esp
- /* Relocate EIP via return jump */
- pop %ecx
- add $__PAGE_OFFSET,%ecx
- jmp *%ecx
-
-
-/*** INTERRUPT INITIALISATION ***/
-
-setup_idt:
- lea ignore_int,%edx
- mov $(__HYPERVISOR_CS << 16),%eax
- mov %dx,%ax /* selector = 0x0010 = cs */
- mov $0x8E00,%dx /* interrupt gate - dpl=0, present */
-
- lea SYMBOL_NAME(idt_table),%edi
- mov $256,%ecx
-1: mov %eax,(%edi)
- mov %edx,4(%edi)
- add $8,%edi
- loop 1b
- ret
-
-/* This is the default interrupt handler. */
-int_msg:
- .asciz "Unknown interrupt\n"
- ALIGN
-ignore_int:
- cld
- push %eax
- push %ecx
- push %edx
- pushl %es
- pushl %ds
- mov $(__HYPERVISOR_DS),%eax
- mov %eax,%ds
- mov %eax,%es
- pushl $int_msg
- call SYMBOL_NAME(printf)
-1: jmp 1b
-
-bad_cpu_msg:
- .asciz "Bad CPU type. Need P6+."
- ALIGN
-bad_cpu:
- pushl $bad_cpu_msg
- call SYMBOL_NAME(printf)
-1: jmp 1b
-
-/*** STACK LOCATION ***/
-
-ENTRY(stack_start)
- .long SYMBOL_NAME(cpu0_stack) + 8100 - __PAGE_OFFSET
- .long __HYPERVISOR_DS
-
-/*** DESCRIPTOR TABLES ***/
-
-.globl SYMBOL_NAME(idt)
-.globl SYMBOL_NAME(gdt)
-
- ALIGN
-
- .word 0
-idt_descr:
- .word 256*8-1
-SYMBOL_NAME(idt):
- .long SYMBOL_NAME(idt_table)
-
- .word 0
-gdt_descr:
- .word (LAST_RESERVED_GDT_ENTRY*8)+7
-SYMBOL_NAME(gdt):
- .long SYMBOL_NAME(gdt_table) /* gdt base */
-
- .word 0
-nopaging_gdt_descr:
- .word (LAST_RESERVED_GDT_ENTRY*8)+7
- .long SYMBOL_NAME(gdt_table)-__PAGE_OFFSET
-
- ALIGN
-/* NB. Rings != 0 get access up to 0xFC400000. This allows access to the */
-/* machine->physical mapping table. Ring 0 can access all memory. */
-ENTRY(gdt_table)
- .fill FIRST_RESERVED_GDT_ENTRY,8,0
- .quad 0x0000000000000000 /* unused */
- .quad 0x00cf9a000000ffff /* 0x0808 ring 0 4.00GB code at 0x0 */
- .quad 0x00cf92000000ffff /* 0x0810 ring 0 4.00GB data at 0x0 */
- .quad 0x00cfba000000c3ff /* 0x0819 ring 1 3.95GB code at 0x0 */
- .quad 0x00cfb2000000c3ff /* 0x0821 ring 1 3.95GB data at 0x0 */
- .quad 0x00cffa000000c3ff /* 0x082b ring 3 3.95GB code at 0x0 */
- .quad 0x00cff2000000c3ff /* 0x0833 ring 3 3.95GB data at 0x0 */
- .quad 0x0000000000000000 /* unused */
- .fill 2*NR_CPUS,8,0 /* space for TSS and LDT per CPU */
-
- .org 0x1000
-ENTRY(idle_pg_table) # Initial page directory is 4kB
- .org 0x2000
-ENTRY(cpu0_stack) # Initial stack is 8kB
- .org 0x4000
-ENTRY(stext)
-ENTRY(_stext)
+++ /dev/null
-/*
- * Precise Delay Loops for i386
- *
- * Copyright (C) 1993 Linus Torvalds
- * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
- *
- * The __delay function must _NOT_ be inlined as its execution time
- * depends wildly on alignment on many x86 processors. The additional
- * jump magic is needed to get the timing stable on all the CPU's
- * we have to worry about.
- */
-
-#include <xen/config.h>
-#include <xen/delay.h>
-#include <asm/msr.h>
-#include <asm/processor.h>
-
-void __udelay(unsigned long usecs)
-{
- unsigned long ticks = usecs * ticks_per_usec;
- unsigned long s, e;
-
- rdtscl(s);
- do
- {
- rep_nop();
- rdtscl(e);
- } while ((e-s) < ticks);
-}
+++ /dev/null
-/******************************************************************************
- * domain_page.h
- *
- * Allow temporary mapping of domain pages. Based on ideas from the
- * Linux PKMAP code -- the copyrights and credits are retained below.
- */
-
-/*
- * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
- * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de *
- * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
- */
-
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <xen/mm.h>
-#include <xen/perfc.h>
-#include <asm/domain_page.h>
-#include <asm/pgalloc.h>
-
-unsigned long *mapcache;
-static unsigned int map_idx, shadow_map_idx[NR_CPUS];
-static spinlock_t map_lock = SPIN_LOCK_UNLOCKED;
-
-/* Use a spare PTE bit to mark entries ready for recycling. */
-#define READY_FOR_TLB_FLUSH (1<<10)
-
-static void flush_all_ready_maps(void)
-{
- unsigned long *cache = mapcache;
-
- /* A bit skanky -- depends on having an aligned PAGE_SIZE set of PTEs. */
- do { if ( (*cache & READY_FOR_TLB_FLUSH) ) *cache = 0; }
- while ( ((unsigned long)(++cache) & ~PAGE_MASK) != 0 );
-
- perfc_incrc(domain_page_tlb_flush);
- local_flush_tlb();
-}
-
-
-void *map_domain_mem(unsigned long pa)
-{
- unsigned long va;
- unsigned int idx, cpu = smp_processor_id();
- unsigned long *cache = mapcache;
- unsigned long flags;
-
- perfc_incrc(map_domain_mem_count);
-
- spin_lock_irqsave(&map_lock, flags);
-
- /* Has some other CPU caused a wrap? We must flush if so. */
- if ( map_idx < shadow_map_idx[cpu] )
- {
- perfc_incrc(domain_page_tlb_flush);
- local_flush_tlb();
- }
-
- for ( ; ; )
- {
- idx = map_idx = (map_idx + 1) & (MAPCACHE_ENTRIES - 1);
- if ( idx == 0 ) flush_all_ready_maps();
- if ( cache[idx] == 0 ) break;
- }
-
- cache[idx] = (pa & PAGE_MASK) | __PAGE_HYPERVISOR;
-
- spin_unlock_irqrestore(&map_lock, flags);
-
- shadow_map_idx[cpu] = idx;
-
- va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT) + (pa & ~PAGE_MASK);
- return (void *)va;
-}
-
-void unmap_domain_mem(void *va)
-{
- unsigned int idx;
- idx = ((unsigned long)va - MAPCACHE_VIRT_START) >> PAGE_SHIFT;
- mapcache[idx] |= READY_FOR_TLB_FLUSH;
-}
+++ /dev/null
-/*
- * linux/arch/i386/entry.S
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- */
-
-/*
- * entry.S contains the system-call and fault low-level handling routines.
- * This also contains the timer-interrupt handler, as well as all interrupts
- * and faults that can result in a task-switch.
- *
- * Stack layout in 'ret_from_system_call':
- * 0(%esp) - %ebx
- * 4(%esp) - %ecx
- * 8(%esp) - %edx
- * C(%esp) - %esi
- * 10(%esp) - %edi
- * 14(%esp) - %ebp
- * 18(%esp) - %eax
- * 1C(%esp) - %ds
- * 20(%esp) - %es
- * 24(%esp) - %fs
- * 28(%esp) - %gs
- * 2C(%esp) - orig_eax
- * 30(%esp) - %eip
- * 34(%esp) - %cs
- * 38(%esp) - %eflags
- * 3C(%esp) - %oldesp
- * 40(%esp) - %oldss
- *
- * "current" is in register %ebx during any slow entries.
- */
-/* The idea for callbacks from monitor -> guest OS.
- *
- * First, we require that all callbacks (either via a supplied
- * interrupt-descriptor-table, or via the special event or failsafe callbacks
- * in the shared-info-structure) are to ring 1. This just makes life easier,
- * in that it means we don't have to do messy GDT/LDT lookups to find
- * out which the privilege-level of the return code-selector. That code
- * would just be a hassle to write, and would need to account for running
- * off the end of the GDT/LDT, for example. For all callbacks we check
- * that the provided
- * return CS is not == __HYPERVISOR_{CS,DS}. Apart from that we're safe as
- * don't allow a guest OS to install ring-0 privileges into the GDT/LDT.
- * It's up to the guest OS to ensure all returns via the IDT are to ring 1.
- * If not, we load incorrect SS/ESP values from the TSS (for ring 1 rather
- * than the correct ring) and bad things are bound to ensue -- IRET is
- * likely to fault, and we may end up killing the domain (no harm can
- * come to the hypervisor itself, though).
- *
- * When doing a callback, we check if the return CS is in ring 0. If so,
- * callback is delayed until next return to ring != 0.
- * If return CS is in ring 1, then we create a callback frame
- * starting at return SS/ESP. The base of the frame does an intra-privilege
- * interrupt-return.
- * If return CS is in ring > 1, we create a callback frame starting
- * at SS/ESP taken from appropriate section of the current TSS. The base
- * of the frame does an inter-privilege interrupt-return.
- *
- * Note that the "failsafe callback" uses a special stackframe:
- * { return_DS, return_ES, return_FS, return_GS, return_EIP,
- * return_CS, return_EFLAGS[, return_ESP, return_SS] }
- * That is, original values for DS/ES/FS/GS are placed on stack rather than
- * in DS/ES/FS/GS themselves. Why? It saves us loading them, only to have them
- * saved/restored in guest OS. Furthermore, if we load them we may cause
- * a fault if they are invalid, which is a hassle to deal with. We avoid
- * that problem if we don't load them :-) This property allows us to use
- * the failsafe callback as a fallback: if we ever fault on loading DS/ES/FS/GS
- * on return to ring != 0, we can simply package it up as a return via
- * the failsafe callback, and let the guest OS sort it out (perhaps by
- * killing an application process). Note that we also do this for any
- * faulting IRET -- just let the guest OS handle it via the event
- * callback.
- *
- * We terminate a domain in the following cases:
- * - creating a callback stack frame (due to bad ring-1 stack).
- * - faulting IRET on entry to failsafe callback handler.
- * So, each domain must keep its ring-1 %ss/%esp and failsafe callback
- * handler in good order (absolutely no faults allowed!).
- */
-
-#include <xen/config.h>
-#include <xen/errno.h>
-#include <hypervisor-ifs/hypervisor-if.h>
-
-EBX = 0x00
-ECX = 0x04
-EDX = 0x08
-ESI = 0x0C
-EDI = 0x10
-EBP = 0x14
-EAX = 0x18
-DS = 0x1C
-ES = 0x20
-FS = 0x24
-GS = 0x28
-ORIG_EAX = 0x2C
-EIP = 0x30
-CS = 0x34
-EFLAGS = 0x38
-OLDESP = 0x3C
-OLDSS = 0x40
-
-/* Offsets in task_struct */
-PROCESSOR = 0
-HYP_EVENTS = 2
-SHARED_INFO = 4
-EVENT_SEL = 8
-EVENT_ADDR = 12
-FAILSAFE_BUFFER = 16
-FAILSAFE_SEL = 32
-FAILSAFE_ADDR = 36
-
-/* Offsets in shared_info_t */
-#define UPCALL_PENDING /* 0 */
-#define UPCALL_MASK 1
-
-/* Offsets in guest_trap_bounce */
-GTB_ERROR_CODE = 0
-GTB_CR2 = 4
-GTB_FLAGS = 8
-GTB_CS = 10
-GTB_EIP = 12
-GTBF_TRAP = 1
-GTBF_TRAP_NOCODE = 2
-GTBF_TRAP_CR2 = 4
-
-CF_MASK = 0x00000001
-IF_MASK = 0x00000200
-NT_MASK = 0x00004000
-
-
-
-#define SAVE_ALL_NOSEGREGS \
- cld; \
- pushl %gs; \
- pushl %fs; \
- pushl %es; \
- pushl %ds; \
- pushl %eax; \
- pushl %ebp; \
- pushl %edi; \
- pushl %esi; \
- pushl %edx; \
- pushl %ecx; \
- pushl %ebx; \
-
-#define SAVE_ALL \
- SAVE_ALL_NOSEGREGS \
- movl $(__HYPERVISOR_DS),%edx; \
- movl %edx,%ds; \
- movl %edx,%es; \
- movl %edx,%fs; \
- movl %edx,%gs; \
- sti;
-
-#define GET_CURRENT(reg) \
- movl $4096-4, reg; \
- orl %esp, reg; \
- andl $~3,reg; \
- movl (reg),reg;
-
-ENTRY(continue_nonidle_task)
- GET_CURRENT(%ebx)
- jmp test_all_events
-
- ALIGN
-/*
- * HYPERVISOR_multicall(call_list, nr_calls)
- * Execute a list of 'nr_calls' system calls, pointed at by 'call_list'.
- * This is fairly easy except that:
- * 1. We may fault reading the call list, and must patch that up; and
- * 2. We cannot recursively call HYPERVISOR_multicall, or a malicious
- * caller could cause our stack to blow up.
- */
-do_multicall:
- popl %eax
- cmpl $SYMBOL_NAME(multicall_return_from_call),%eax
- je multicall_return_from_call
- pushl %ebx
- movl 4(%esp),%ebx /* EBX == call_list */
- movl 8(%esp),%ecx /* ECX == nr_calls */
-multicall_loop:
- pushl %ecx
-multicall_fault1:
- pushl 20(%ebx) # args[4]
-multicall_fault2:
- pushl 16(%ebx) # args[3]
-multicall_fault3:
- pushl 12(%ebx) # args[2]
-multicall_fault4:
- pushl 8(%ebx) # args[1]
-multicall_fault5:
- pushl 4(%ebx) # args[0]
-multicall_fault6:
- movl (%ebx),%eax # op
- andl $255,%eax
- call *SYMBOL_NAME(hypervisor_call_table)(,%eax,4)
-multicall_return_from_call:
-multicall_fault7:
- movl %eax,24(%ebx) # args[5] == result
- addl $20,%esp
- popl %ecx
- addl $(ARGS_PER_MULTICALL_ENTRY*4),%ebx
- loop multicall_loop
- popl %ebx
- xorl %eax,%eax
- jmp ret_from_hypervisor_call
-
-.section __ex_table,"a"
- .align 4
- .long multicall_fault1, multicall_fixup1
- .long multicall_fault2, multicall_fixup2
- .long multicall_fault3, multicall_fixup3
- .long multicall_fault4, multicall_fixup4
- .long multicall_fault5, multicall_fixup5
- .long multicall_fault6, multicall_fixup6
-.previous
-
-.section .fixup,"ax"
-multicall_fixup6:
- addl $4,%esp
-multicall_fixup5:
- addl $4,%esp
-multicall_fixup4:
- addl $4,%esp
-multicall_fixup3:
- addl $4,%esp
-multicall_fixup2:
- addl $4,%esp
-multicall_fixup1:
- addl $4,%esp
- popl %ebx
- movl $-EFAULT,%eax
- jmp ret_from_hypervisor_call
-.previous
-
- ALIGN
-restore_all_guest:
- # First, may need to restore %ds if clobbered by create_bounce_frame
- pushl %ss
- popl %ds
- # Second, create a failsafe copy of DS,ES,FS,GS in case any are bad
- leal DS(%esp),%esi
- leal FAILSAFE_BUFFER(%ebx),%edi
- movsl
- movsl
- movsl
- movsl
- # Finally, restore guest registers -- faults will cause failsafe
- popl %ebx
- popl %ecx
- popl %edx
- popl %esi
- popl %edi
- popl %ebp
- popl %eax
-1: popl %ds
-2: popl %es
-3: popl %fs
-4: popl %gs
- addl $4,%esp
-5: iret
-.section .fixup,"ax"
-10: subl $4,%esp
- pushl %gs
-9: pushl %fs
-8: pushl %es
-7: pushl %ds
-6: pushl %eax
- pushl %ebp
- pushl %edi
- pushl %esi
- pushl %edx
- pushl %ecx
- pushl %ebx
- pushl %ss
- popl %ds
- pushl %ss
- popl %es
- jmp failsafe_callback
-.previous
-.section __ex_table,"a"
- .align 4
- .long 1b,6b
- .long 2b,7b
- .long 3b,8b
- .long 4b,9b
- .long 5b,10b
-.previous
-
-/* No special register assumptions */
-failsafe_callback:
- GET_CURRENT(%ebx)
- movzwl PROCESSOR(%ebx),%eax
- shl $4,%eax
- lea guest_trap_bounce(%eax),%edx
- movl FAILSAFE_ADDR(%ebx),%eax
- movl %eax,GTB_EIP(%edx)
- movl FAILSAFE_SEL(%ebx),%eax
- movw %ax,GTB_CS(%edx)
- call create_bounce_frame
- subl $16,%esi # add DS/ES/FS/GS to failsafe stack frame
- leal FAILSAFE_BUFFER(%ebx),%ebp
- movl 0(%ebp),%eax # DS
-FAULT1: movl %eax,(%esi)
- movl 4(%ebp),%eax # ES
-FAULT2: movl %eax,4(%esi)
- movl 8(%ebp),%eax # FS
-FAULT3: movl %eax,8(%esi)
- movl 12(%ebp),%eax # GS
-FAULT4: movl %eax,12(%esi)
- movl %esi,OLDESP(%esp)
- popl %ebx
- popl %ecx
- popl %edx
- popl %esi
- popl %edi
- popl %ebp
- popl %eax
- addl $20,%esp # skip DS/ES/FS/GS/ORIG_EAX
-FAULT5: iret
-
-
- ALIGN
-# Simple restore -- we should never fault as we we will only interrupt ring 0
-# when sane values have been placed in all registers. The only exception is
-# NMI, which may interrupt before good values have been placed in DS-GS.
-# The NMI return code deals with this problem itself.
-restore_all_xen:
- popl %ebx
- popl %ecx
- popl %edx
- popl %esi
- popl %edi
- popl %ebp
- popl %eax
- popl %ds
- popl %es
- popl %fs
- popl %gs
- addl $4,%esp
- iret
-
- ALIGN
-ENTRY(hypervisor_call)
- pushl %eax # save orig_eax
- SAVE_ALL
- GET_CURRENT(%ebx)
- andl $255,%eax
- call *SYMBOL_NAME(hypervisor_call_table)(,%eax,4)
-
-ret_from_hypervisor_call:
- movl %eax,EAX(%esp) # save the return value
-
-test_all_events:
- xorl %ecx,%ecx
- notl %ecx
- cli # tests must not race interrupts
-/*test_softirqs:*/
- movzwl PROCESSOR(%ebx),%eax
- shl $6,%eax # sizeof(irq_cpustat) == 64
- test %ecx,SYMBOL_NAME(irq_stat)(%eax,1)
- jnz process_softirqs
-/*test_hyp_events:*/
- testw %cx, HYP_EVENTS(%ebx)
- jnz process_hyp_events
-/*test_guest_events:*/
- movl SHARED_INFO(%ebx),%eax
- testb $0xFF,UPCALL_MASK(%eax)
- jnz restore_all_guest
- testb $0xFF,UPCALL_PENDING(%eax)
- jz restore_all_guest
- movb $1,UPCALL_MASK(%eax) # Upcalls are masked during delivery
-/*process_guest_events:*/
- movzwl PROCESSOR(%ebx),%edx
- shl $4,%edx # sizeof(guest_trap_bounce) == 16
- lea guest_trap_bounce(%edx),%edx
- movl EVENT_ADDR(%ebx),%eax
- movl %eax,GTB_EIP(%edx)
- movl EVENT_SEL(%ebx),%eax
- movw %ax,GTB_CS(%edx)
- call create_bounce_frame
- jmp restore_all_guest
-
- ALIGN
-process_softirqs:
- sti
- call SYMBOL_NAME(do_softirq)
- jmp test_all_events
-
- ALIGN
-process_hyp_events:
- sti
- call SYMBOL_NAME(do_hyp_events)
- jmp test_all_events
-
-/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK: */
-/* {EIP, CS, EFLAGS, [ESP, SS]} */
-/* %edx == guest_trap_bounce, %ebx == task_struct */
-/* %eax,%ecx are clobbered. %ds:%esi contain new OLDSS/OLDESP. */
-create_bounce_frame:
- mov CS+4(%esp),%cl
- test $2,%cl
- jz 1f /* jump if returning to an existing ring-1 activation */
- /* obtain ss/esp from TSS -- no current ring-1 activations */
- movzwl PROCESSOR(%ebx),%eax
- /* next 4 lines multiply %eax by 8320, which is sizeof(tss_struct) */
- movl %eax, %ecx
- shll $7, %ecx
- shll $13, %eax
- addl %ecx,%eax
- addl $init_tss + 12,%eax
- movl (%eax),%esi /* tss->esp1 */
-FAULT6: movl 4(%eax),%ds /* tss->ss1 */
- /* base of stack frame must contain ss/esp (inter-priv iret) */
- subl $8,%esi
- movl OLDESP+4(%esp),%eax
-FAULT7: movl %eax,(%esi)
- movl OLDSS+4(%esp),%eax
-FAULT8: movl %eax,4(%esi)
- jmp 2f
-1: /* obtain ss/esp from oldss/oldesp -- a ring-1 activation exists */
- movl OLDESP+4(%esp),%esi
-FAULT9: movl OLDSS+4(%esp),%ds
-2: /* Construct a stack frame: EFLAGS, CS/EIP */
- subl $12,%esi
- movl EIP+4(%esp),%eax
-FAULT10:movl %eax,(%esi)
- movl CS+4(%esp),%eax
-FAULT11:movl %eax,4(%esi)
- movl EFLAGS+4(%esp),%eax
-FAULT12:movl %eax,8(%esi)
- /* Rewrite our stack frame and return to ring 1. */
- /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
- andl $0xfffcbeff,%eax
- movl %eax,EFLAGS+4(%esp)
- movl %ds,OLDSS+4(%esp)
- movl %esi,OLDESP+4(%esp)
- movzwl %es:GTB_CS(%edx),%eax
- movl %eax,CS+4(%esp)
- movl %es:GTB_EIP(%edx),%eax
- movl %eax,EIP+4(%esp)
- ret
-
-
-.section __ex_table,"a"
- .align 4
- .long FAULT1, crash_domain_fixup3 # Fault writing to ring-1 stack
- .long FAULT2, crash_domain_fixup3 # Fault writing to ring-1 stack
- .long FAULT3, crash_domain_fixup3 # Fault writing to ring-1 stack
- .long FAULT4, crash_domain_fixup3 # Fault writing to ring-1 stack
- .long FAULT5, crash_domain_fixup1 # Fault executing failsafe iret
- .long FAULT6, crash_domain_fixup2 # Fault loading ring-1 stack selector
- .long FAULT7, crash_domain_fixup2 # Fault writing to ring-1 stack
- .long FAULT8, crash_domain_fixup2 # Fault writing to ring-1 stack
- .long FAULT9, crash_domain_fixup2 # Fault loading ring-1 stack selector
- .long FAULT10,crash_domain_fixup2 # Fault writing to ring-1 stack
- .long FAULT11,crash_domain_fixup2 # Fault writing to ring-1 stack
- .long FAULT12,crash_domain_fixup2 # Fault writing to ring-1 stack
- .long FAULT13,crash_domain_fixup3 # Fault writing to ring-1 stack
- .long FAULT14,crash_domain_fixup3 # Fault writing to ring-1 stack
-.previous
-
-# This handler kills domains which experience unrecoverable faults.
-.section .fixup,"ax"
-crash_domain_fixup1:
- subl $4,%esp
- SAVE_ALL
- jmp crash_domain
-crash_domain_fixup2:
- addl $4,%esp
-crash_domain_fixup3:
- pushl %ss
- popl %ds
- jmp crash_domain
-.previous
-
- ALIGN
-process_guest_exception_and_events:
- movzwl PROCESSOR(%ebx),%eax
- shl $4,%eax
- lea guest_trap_bounce(%eax),%edx
- testb $~0,GTB_FLAGS(%edx)
- jz test_all_events
- call create_bounce_frame # just the basic frame
- mov %es:GTB_FLAGS(%edx),%cl
- test $GTBF_TRAP_NOCODE,%cl
- jnz 2f
- subl $4,%esi # push error_code onto guest frame
- movl %es:GTB_ERROR_CODE(%edx),%eax
-FAULT13:movl %eax,(%esi)
- test $GTBF_TRAP_CR2,%cl
- jz 1f
- subl $4,%esi # push %cr2 onto guest frame
- movl %es:GTB_CR2(%edx),%eax
-FAULT14:movl %eax,(%esi)
-1: movl %esi,OLDESP(%esp)
-2: push %es # unclobber %ds
- pop %ds
- movb $0,GTB_FLAGS(%edx)
- jmp test_all_events
-
- ALIGN
-ENTRY(ret_from_intr)
- GET_CURRENT(%ebx)
- movb CS(%esp),%al
- testb $3,%al # return to non-supervisor?
- jne test_all_events
- jmp restore_all_xen
-
-ENTRY(divide_error)
- pushl $0 # no error code
- pushl $ SYMBOL_NAME(do_divide_error)
- ALIGN
-error_code:
- pushl %fs
- pushl %es
- pushl %ds
- pushl %eax
- xorl %eax,%eax
- pushl %ebp
- pushl %edi
- pushl %esi
- pushl %edx
- decl %eax # eax = -1
- pushl %ecx
- pushl %ebx
- cld
- movl %gs,%ecx
- movl ORIG_EAX(%esp), %esi # get the error code
- movl GS(%esp), %edi # get the function address
- movl %eax, ORIG_EAX(%esp)
- movl %ecx, GS(%esp)
- movl $(__HYPERVISOR_DS),%edx
- movl %edx,%ds
- movl %edx,%es
- movl %edx,%fs
- movl %edx,%gs
- movl %esp,%edx
- pushl %esi # push the error code
- pushl %edx # push the pt_regs pointer
- GET_CURRENT(%ebx)
- call *%edi
- addl $8,%esp
- movb CS(%esp),%al
- testb $3,%al
- je restore_all_xen
- jmp process_guest_exception_and_events
-
-ENTRY(coprocessor_error)
- pushl $0
- pushl $ SYMBOL_NAME(do_coprocessor_error)
- jmp error_code
-
-ENTRY(simd_coprocessor_error)
- pushl $0
- pushl $ SYMBOL_NAME(do_simd_coprocessor_error)
- jmp error_code
-
-ENTRY(device_not_available)
- pushl $0
- pushl $SYMBOL_NAME(math_state_restore)
- jmp error_code
-
-ENTRY(debug)
- pushl $0
- pushl $ SYMBOL_NAME(do_debug)
- jmp error_code
-
-ENTRY(int3)
- pushl $0
- pushl $ SYMBOL_NAME(do_int3)
- jmp error_code
-
-ENTRY(overflow)
- pushl $0
- pushl $ SYMBOL_NAME(do_overflow)
- jmp error_code
-
-ENTRY(bounds)
- pushl $0
- pushl $ SYMBOL_NAME(do_bounds)
- jmp error_code
-
-ENTRY(invalid_op)
- pushl $0
- pushl $ SYMBOL_NAME(do_invalid_op)
- jmp error_code
-
-ENTRY(coprocessor_segment_overrun)
- pushl $0
- pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun)
- jmp error_code
-
-ENTRY(invalid_TSS)
- pushl $ SYMBOL_NAME(do_invalid_TSS)
- jmp error_code
-
-ENTRY(segment_not_present)
- pushl $ SYMBOL_NAME(do_segment_not_present)
- jmp error_code
-
-ENTRY(stack_segment)
- pushl $ SYMBOL_NAME(do_stack_segment)
- jmp error_code
-
-ENTRY(general_protection)
- pushl $ SYMBOL_NAME(do_general_protection)
- jmp error_code
-
-ENTRY(alignment_check)
- pushl $ SYMBOL_NAME(do_alignment_check)
- jmp error_code
-
-ENTRY(page_fault)
- pushl $ SYMBOL_NAME(do_page_fault)
- jmp error_code
-
-ENTRY(machine_check)
- pushl $0
- pushl $ SYMBOL_NAME(do_machine_check)
- jmp error_code
-
-ENTRY(spurious_interrupt_bug)
- pushl $0
- pushl $ SYMBOL_NAME(do_spurious_interrupt_bug)
- jmp error_code
-
-ENTRY(nmi)
- # Save state but do not trash the segment registers!
- # We may otherwise be unable to reload them or copy them to ring 1.
- pushl %eax
- SAVE_ALL_NOSEGREGS
-
- # Check for hardware problems. These are always fatal so we can
- # reload DS and ES when handling them.
- inb $0x61,%al
- testb $0x80,%al
- jne nmi_parity_err
- testb $0x40,%al
- jne nmi_io_err
- movl %eax,%ebx
-
- # Okay, its almost a normal NMI tick. We can only process it if:
- # A. We are the outermost Xen activation (in which case we have
- # the selectors safely saved on our stack)
- # B. DS-GS all contain sane Xen values.
- # In all other cases we bail without touching DS-GS, as we have
- # interrupted an enclosing Xen activation in tricky prologue or
- # epilogue code.
- movb CS(%esp),%al
- testb $3,%al
- jne do_watchdog_tick
- movl DS(%esp),%eax
- cmpw $(__HYPERVISOR_DS),%ax
- jne nmi_badseg
- movl ES(%esp),%eax
- cmpw $(__HYPERVISOR_DS),%ax
- jne nmi_badseg
- movl FS(%esp),%eax
- cmpw $(__HYPERVISOR_DS),%ax
- jne nmi_badseg
- movl GS(%esp),%eax
- cmpw $(__HYPERVISOR_DS),%ax
- jne nmi_badseg
-
-do_watchdog_tick:
- movl $(__HYPERVISOR_DS),%edx
- movl %edx,%ds
- movl %edx,%es
- movl %esp,%edx
- pushl %ebx # reason
- pushl %edx # regs
- call SYMBOL_NAME(do_nmi)
- addl $8,%esp
- movb CS(%esp),%al
- testb $3,%al
- je restore_all_xen
- GET_CURRENT(%ebx)
- jmp restore_all_guest
-
-nmi_badseg:
- popl %ebx
- popl %ecx
- popl %edx
- popl %esi
- popl %edi
- popl %ebp
- popl %eax
- addl $20,%esp
- iret
-
-nmi_parity_err:
- movl $(__HYPERVISOR_DS),%edx
- movl %edx,%ds
- movl %edx,%es
- jmp SYMBOL_NAME(mem_parity_error)
-
-nmi_io_err:
- movl $(__HYPERVISOR_DS),%edx
- movl %edx,%ds
- movl %edx,%es
- jmp SYMBOL_NAME(io_check_error)
-
-.data
-ENTRY(hypervisor_call_table)
- .long SYMBOL_NAME(do_set_trap_table) /* 0 */
- .long SYMBOL_NAME(do_mmu_update)
- .long SYMBOL_NAME(do_console_write)
- .long SYMBOL_NAME(do_set_gdt)
- .long SYMBOL_NAME(do_stack_switch)
- .long SYMBOL_NAME(do_set_callbacks) /* 5 */
- .long SYMBOL_NAME(do_ni_syscall) # do_net_io_op
- .long SYMBOL_NAME(do_fpu_taskswitch)
- .long SYMBOL_NAME(do_sched_op)
- .long SYMBOL_NAME(do_dom0_op)
- .long SYMBOL_NAME(do_ni_syscall) /* 10 */ # do_network_op
- .long SYMBOL_NAME(do_ni_syscall) # do_block_io_op
- .long SYMBOL_NAME(do_set_debugreg)
- .long SYMBOL_NAME(do_get_debugreg)
- .long SYMBOL_NAME(do_update_descriptor)
- .long SYMBOL_NAME(do_set_fast_trap) /* 15 */
- .long SYMBOL_NAME(do_dom_mem_op)
- .long SYMBOL_NAME(do_multicall)
- .long SYMBOL_NAME(do_kbd_op)
- .long SYMBOL_NAME(do_update_va_mapping)
- .long SYMBOL_NAME(do_set_timer_op) /* 20 */
- .long SYMBOL_NAME(do_event_channel_op)
- .long SYMBOL_NAME(do_xen_version)
- .long SYMBOL_NAME(do_console_io)
- .long SYMBOL_NAME(do_physdev_op)
- .long SYMBOL_NAME(do_update_va_mapping_otherdomain) /* 25 */
- .rept NR_syscalls-((.-hypervisor_call_table)/4)
- .long SYMBOL_NAME(do_ni_syscall)
- .endr
+++ /dev/null
-/*
- * linux/arch/i386/mm/extable.c
- */
-
-#include <xen/config.h>
-#include <xen/module.h>
-#include <xen/spinlock.h>
-#include <asm/uaccess.h>
-
-extern const struct exception_table_entry __start___ex_table[];
-extern const struct exception_table_entry __stop___ex_table[];
-
-static inline unsigned long
-search_one_table(const struct exception_table_entry *first,
- const struct exception_table_entry *last,
- unsigned long value)
-{
- while (first <= last) {
- const struct exception_table_entry *mid;
- long diff;
-
- mid = (last - first) / 2 + first;
- diff = mid->insn - value;
- if (diff == 0)
- return mid->fixup;
- else if (diff < 0)
- first = mid+1;
- else
- last = mid-1;
- }
- return 0;
-}
-
-extern spinlock_t modlist_lock;
-
-unsigned long
-search_exception_table(unsigned long addr)
-{
- unsigned long ret = 0;
-
-#ifndef CONFIG_MODULES
- /* There is only the kernel to search. */
- ret = search_one_table(__start___ex_table, __stop___ex_table-1, addr);
- return ret;
-#else
- unsigned long flags;
- /* The kernel is the last "module" -- no need to treat it special. */
- struct module *mp;
-
- spin_lock_irqsave(&modlist_lock, flags);
- for (mp = module_list; mp != NULL; mp = mp->next) {
- if (mp->ex_table_start == NULL || !(mp->flags&(MOD_RUNNING|MOD_INITIALIZING)))
- continue;
- ret = search_one_table(mp->ex_table_start,
- mp->ex_table_end - 1, addr);
- if (ret)
- break;
- }
- spin_unlock_irqrestore(&modlist_lock, flags);
- return ret;
-#endif
-}
+++ /dev/null
-/******************************************************************************
- * flushtlb.c
- *
- * TLB flushes are timestamped using a global virtual 'clock' which ticks
- * on any TLB flush on any processor.
- *
- * Copyright (c) 2003, K A Fraser
- */
-
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <xen/interrupt.h>
-#include <asm/flushtlb.h>
-
-u32 tlbflush_clock;
-u32 tlbflush_time[NR_CPUS];
-
-void tlb_clocktick(void)
-{
- u32 y, ny;
-
- /* Tick the clock. 'y' contains the current time after the tick. */
- ny = tlbflush_clock;
- do {
-#ifdef CONFIG_SMP
- if ( unlikely(((y = ny+1) & TLBCLOCK_EPOCH_MASK) == 0) )
- {
- raise_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ);
- y = tlbflush_clock;
- break;
- }
-#else
- y = ny+1;
-#endif
- }
- while ( unlikely((ny = cmpxchg(&tlbflush_clock, y-1, y)) != y-1) );
-
- /* Update this CPU's timestamp to new time. */
- tlbflush_time[smp_processor_id()] = y;
-}
+++ /dev/null
-/*
- * linux/arch/i386/kernel/i387.c
- *
- * Copyright (C) 1994 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- * Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <asm/processor.h>
-#include <asm/i387.h>
-
-void init_fpu(void)
-{
- __asm__("fninit");
- if ( cpu_has_xmm ) load_mxcsr(0x1f80);
- set_bit(PF_DONEFPUINIT, ¤t->flags);
-}
-
-static inline void __save_init_fpu( struct task_struct *tsk )
-{
- if ( cpu_has_fxsr ) {
- asm volatile( "fxsave %0 ; fnclex"
- : "=m" (tsk->thread.i387.fxsave) );
- } else {
- asm volatile( "fnsave %0 ; fwait"
- : "=m" (tsk->thread.i387.fsave) );
- }
- clear_bit(PF_USEDFPU, &tsk->flags);
-}
-
-void save_init_fpu( struct task_struct *tsk )
-{
- /*
- * The guest OS may have set the 'virtual STTS' flag.
- * This causes us to set the real flag, so we'll need
- * to temporarily clear it while saving f-p state.
- */
- if ( test_bit(PF_GUEST_STTS, &tsk->flags) ) clts();
- __save_init_fpu(tsk);
- stts();
-}
-
-void restore_fpu( struct task_struct *tsk )
-{
- if ( cpu_has_fxsr ) {
- asm volatile( "fxrstor %0"
- : : "m" (tsk->thread.i387.fxsave) );
- } else {
- asm volatile( "frstor %0"
- : : "m" (tsk->thread.i387.fsave) );
- }
-}
+++ /dev/null
-/******************************************************************************
- * i8259.c
- *
- * Well, this is required for SMP systems as well, as it build interrupt
- * tables for IO APICS as well as uniprocessor 8259-alikes.
- */
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <asm/ptrace.h>
-#include <xen/errno.h>
-#include <xen/sched.h>
-#include <xen/interrupt.h>
-#include <xen/irq.h>
-
-#include <asm/atomic.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/desc.h>
-#include <asm/bitops.h>
-#include <xen/delay.h>
-#include <asm/apic.h>
-
-
-/*
- * Common place to define all x86 IRQ vectors
- *
- * This builds up the IRQ handler stubs using some ugly macros in irq.h
- *
- * These macros create the low-level assembly IRQ routines that save
- * register context and call do_IRQ(). do_IRQ() then does all the
- * operations that are needed to keep the AT (or SMP IOAPIC)
- * interrupt-controller happy.
- */
-
-BUILD_COMMON_IRQ()
-
-#define BI(x,y) \
- BUILD_IRQ(x##y)
-
-#define BUILD_16_IRQS(x) \
- BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
- BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
- BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
- BI(x,c) BI(x,d) BI(x,e) BI(x,f)
-
-/*
- * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
- * (these are usually mapped to vectors 0x30-0x3f)
- */
- BUILD_16_IRQS(0x0)
-
-#ifdef CONFIG_X86_IO_APIC
-/*
- * The IO-APIC gives us many more interrupt sources. Most of these
- * are unused but an SMP system is supposed to have enough memory ...
- * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
- * across the spectrum, so we really want to be prepared to get all
- * of these. Plus, more powerful systems might have more than 64
- * IO-APIC registers.
- *
- * (these are usually mapped into the 0x30-0xff vector range)
- */
- BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
- BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
- BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
- BUILD_16_IRQS(0xc)
-#endif
-
-#undef BUILD_16_IRQS
-#undef BI
-
-
-/*
- * The following vectors are part of the Linux architecture, there
- * is no hardware IRQ pin equivalent for them, they are triggered
- * through the ICC by us (IPIs)
- */
-#ifdef CONFIG_SMP
- BUILD_SMP_INTERRUPT(event_check_interrupt,EVENT_CHECK_VECTOR)
- BUILD_SMP_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
- BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
-#endif
-
-/*
- * every pentium local APIC has two 'local interrupts', with a
- * soft-definable vector attached to both interrupts, one of
- * which is a timer interrupt, the other one is error counter
- * overflow. Linux uses the local APIC timer interrupt to get
- * a much simpler SMP time architecture:
- */
-#ifdef CONFIG_X86_LOCAL_APIC
- BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
- BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
- BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
-#endif
-
-#define IRQ(x,y) \
- IRQ##x##y##_interrupt
-
-#define IRQLIST_16(x) \
- IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
- IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
- IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
- IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
-
- void (*interrupt[NR_IRQS])(void) = {
- IRQLIST_16(0x0),
-
-#ifdef CONFIG_X86_IO_APIC
- IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3),
- IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
- IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
- IRQLIST_16(0xc)
-#endif
- };
-
-#undef IRQ
-#undef IRQLIST_16
-
-/*
- * This is the 'legacy' 8259A Programmable Interrupt Controller,
- * present in the majority of PC/AT boxes.
- * plus some generic x86 specific things if generic specifics makes
- * any sense at all.
- * this file should become arch/i386/kernel/irq.c when the old irq.c
- * moves to arch independent land
- */
-
-spinlock_t i8259A_lock = SPIN_LOCK_UNLOCKED;
-
-static void end_8259A_irq (unsigned int irq)
-{
- if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
- enable_8259A_irq(irq);
-}
-
-#define shutdown_8259A_irq disable_8259A_irq
-
-void mask_and_ack_8259A(unsigned int);
-
-static unsigned int startup_8259A_irq(unsigned int irq)
-{
- enable_8259A_irq(irq);
- return 0; /* never anything pending */
-}
-
-static struct hw_interrupt_type i8259A_irq_type = {
- "XT-PIC",
- startup_8259A_irq,
- shutdown_8259A_irq,
- enable_8259A_irq,
- disable_8259A_irq,
- mask_and_ack_8259A,
- end_8259A_irq,
- NULL
-};
-
-/*
- * 8259A PIC functions to handle ISA devices:
- */
-
-/*
- * This contains the irq mask for both 8259A irq controllers,
- */
-static unsigned int cached_irq_mask = 0xffff;
-
-#define __byte(x,y) (((unsigned char *)&(y))[x])
-#define cached_21 (__byte(0,cached_irq_mask))
-#define cached_A1 (__byte(1,cached_irq_mask))
-
-/*
- * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
- * boards the timer interrupt is not really connected to any IO-APIC pin,
- * it's fed to the master 8259A's IR0 line only.
- *
- * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
- * this 'mixed mode' IRQ handling costs nothing because it's only used
- * at IRQ setup time.
- */
-unsigned long io_apic_irqs;
-
-void disable_8259A_irq(unsigned int irq)
-{
- unsigned int mask = 1 << irq;
- unsigned long flags;
-
- spin_lock_irqsave(&i8259A_lock, flags);
- cached_irq_mask |= mask;
- if (irq & 8)
- outb(cached_A1,0xA1);
- else
- outb(cached_21,0x21);
- spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
-void enable_8259A_irq(unsigned int irq)
-{
- unsigned int mask = ~(1 << irq);
- unsigned long flags;
-
- spin_lock_irqsave(&i8259A_lock, flags);
- cached_irq_mask &= mask;
- if (irq & 8)
- outb(cached_A1,0xA1);
- else
- outb(cached_21,0x21);
- spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
-int i8259A_irq_pending(unsigned int irq)
-{
- unsigned int mask = 1<<irq;
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&i8259A_lock, flags);
- if (irq < 8)
- ret = inb(0x20) & mask;
- else
- ret = inb(0xA0) & (mask >> 8);
- spin_unlock_irqrestore(&i8259A_lock, flags);
-
- return ret;
-}
-
-void make_8259A_irq(unsigned int irq)
-{
- disable_irq_nosync(irq);
- io_apic_irqs &= ~(1<<irq);
- irq_desc[irq].handler = &i8259A_irq_type;
- enable_irq(irq);
-}
-
-/*
- * This function assumes to be called rarely. Switching between
- * 8259A registers is slow.
- * This has to be protected by the irq controller spinlock
- * before being called.
- */
-static inline int i8259A_irq_real(unsigned int irq)
-{
- int value;
- int irqmask = 1<<irq;
-
- if (irq < 8) {
- outb(0x0B,0x20); /* ISR register */
- value = inb(0x20) & irqmask;
- outb(0x0A,0x20); /* back to the IRR register */
- return value;
- }
- outb(0x0B,0xA0); /* ISR register */
- value = inb(0xA0) & (irqmask >> 8);
- outb(0x0A,0xA0); /* back to the IRR register */
- return value;
-}
-
-/*
- * Careful! The 8259A is a fragile beast, it pretty
- * much _has_ to be done exactly like this (mask it
- * first, _then_ send the EOI, and the order of EOI
- * to the two 8259s is important!
- */
-void mask_and_ack_8259A(unsigned int irq)
-{
- unsigned int irqmask = 1 << irq;
- unsigned long flags;
-
- spin_lock_irqsave(&i8259A_lock, flags);
- /*
- * Lightweight spurious IRQ detection. We do not want
- * to overdo spurious IRQ handling - it's usually a sign
- * of hardware problems, so we only do the checks we can
- * do without slowing down good hardware unnecesserily.
- *
- * Note that IRQ7 and IRQ15 (the two spurious IRQs
- * usually resulting from the 8259A-1|2 PICs) occur
- * even if the IRQ is masked in the 8259A. Thus we
- * can check spurious 8259A IRQs without doing the
- * quite slow i8259A_irq_real() call for every IRQ.
- * This does not cover 100% of spurious interrupts,
- * but should be enough to warn the user that there
- * is something bad going on ...
- */
- if (cached_irq_mask & irqmask)
- goto spurious_8259A_irq;
- cached_irq_mask |= irqmask;
-
- handle_real_irq:
- if (irq & 8) {
- inb(0xA1); /* DUMMY - (do we need this?) */
- outb(cached_A1,0xA1);
- outb(0x60+(irq&7),0xA0);/* 'Specific EOI' to slave */
- outb(0x62,0x20); /* 'Specific EOI' to master-IRQ2 */
- } else {
- inb(0x21); /* DUMMY - (do we need this?) */
- outb(cached_21,0x21);
- outb(0x60+irq,0x20); /* 'Specific EOI' to master */
- }
- spin_unlock_irqrestore(&i8259A_lock, flags);
- return;
-
- spurious_8259A_irq:
- /*
- * this is the slow path - should happen rarely.
- */
- if (i8259A_irq_real(irq))
- /*
- * oops, the IRQ _is_ in service according to the
- * 8259A - not spurious, go handle it.
- */
- goto handle_real_irq;
-
- {
- static int spurious_irq_mask;
- /*
- * At this point we can be sure the IRQ is spurious,
- * lets ACK and report it. [once per IRQ]
- */
- if (!(spurious_irq_mask & irqmask)) {
- printk("spurious 8259A interrupt: IRQ%d.\n", irq);
- spurious_irq_mask |= irqmask;
- }
- atomic_inc(&irq_err_count);
- /*
- * Theoretically we do not have to handle this IRQ,
- * but in Linux this does not cause problems and is
- * simpler for us.
- */
- goto handle_real_irq;
- }
-}
-
-void __init init_8259A(int auto_eoi)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&i8259A_lock, flags);
-
- outb(0xff, 0x21); /* mask all of 8259A-1 */
- outb(0xff, 0xA1); /* mask all of 8259A-2 */
-
- /*
- * outb_p - this has to work on a wide range of PC hardware.
- */
- outb_p(0x11, 0x20); /* ICW1: select 8259A-1 init */
- outb_p(0x30 + 0, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
- outb_p(0x04, 0x21); /* 8259A-1 (the master) has a slave on IR2 */
- if (auto_eoi)
- outb_p(0x03, 0x21); /* master does Auto EOI */
- else
- outb_p(0x01, 0x21); /* master expects normal EOI */
-
- outb_p(0x11, 0xA0); /* ICW1: select 8259A-2 init */
- outb_p(0x30 + 8, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
- outb_p(0x02, 0xA1); /* 8259A-2 is a slave on master's IR2 */
- outb_p(0x01, 0xA1); /* (slave's support for AEOI in flat mode
- is to be investigated) */
-
- if (auto_eoi)
- /*
- * in AEOI mode we just have to mask the interrupt
- * when acking.
- */
- i8259A_irq_type.ack = disable_8259A_irq;
- else
- i8259A_irq_type.ack = mask_and_ack_8259A;
-
- udelay(100); /* wait for 8259A to initialize */
-
- outb(cached_21, 0x21); /* restore master IRQ mask */
- outb(cached_A1, 0xA1); /* restore slave IRQ mask */
-
- spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
-
-/*
- * IRQ2 is cascade interrupt to second interrupt controller
- */
-
-static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL};
-
-void __init init_ISA_irqs (void)
-{
- int i;
-
-#ifdef CONFIG_X86_LOCAL_APIC
- init_bsp_APIC();
-#endif
- init_8259A(0);
-
- for (i = 0; i < NR_IRQS; i++) {
- irq_desc[i].status = IRQ_DISABLED;
- irq_desc[i].action = 0;
- irq_desc[i].depth = 1;
-
- if (i < 16) {
- /*
- * 16 old-style INTA-cycle interrupts:
- */
- irq_desc[i].handler = &i8259A_irq_type;
- } else {
- /*
- * 'high' PCI IRQs filled in on demand
- */
- irq_desc[i].handler = &no_irq_type;
- }
- }
-}
-
-void __init init_IRQ(void)
-{
- int i;
-
- init_ISA_irqs();
-
- /*
- * Cover the whole vector space, no vector can escape
- * us. (some of these will be overridden and become
- * 'special' SMP interrupts)
- */
- for (i = 0; i < NR_IRQS; i++) {
- int vector = FIRST_EXTERNAL_VECTOR + i;
- if (vector != HYPERVISOR_CALL_VECTOR)
- set_intr_gate(vector, interrupt[i]);
- }
-
-#ifdef CONFIG_SMP
- /*
- * IRQ0 must be given a fixed assignment and initialized,
- * because it's used before the IO-APIC is set up.
- */
- set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
-
- /*
- * The reschedule interrupt is a CPU-to-CPU reschedule-helper
- * IPI, driven by wakeup.
- */
- set_intr_gate(EVENT_CHECK_VECTOR, event_check_interrupt);
-
- /* IPI for invalidation */
- set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
-
- /* IPI for generic function call */
- set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-#endif
-
-#ifdef CONFIG_X86_LOCAL_APIC
- /* self generated IPI for local APIC timer */
- set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
-
- /* IPI vectors for APIC spurious and error interrupts */
- set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
- set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
-#endif
-
- /*
- * Set the clock to HZ Hz, we already have a valid
- * vector now:
- */
-#define CLOCK_TICK_RATE 1193180 /* crystal freq (Hz) */
-#define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
- outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */
- outb_p(LATCH & 0xff , 0x40); /* LSB */
- outb(LATCH >> 8 , 0x40); /* MSB */
-
- setup_irq(2, &irq2);
-}
-
+++ /dev/null
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <asm/desc.h>
-
-struct task_struct idle0_task = IDLE0_TASK(idle0_task);
-
-/*
- * per-CPU TSS segments. Threads are completely 'soft' on Linux,
- * no more per-task TSS's. The TSS size is kept cacheline-aligned
- * so they are allowed to end up in the .data.cacheline_aligned
- * section. Since TSS's are completely CPU-local, we want them
- * on exact cacheline boundaries, to eliminate cacheline ping-pong.
- */
-struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS };
-
+++ /dev/null
-/*
- * Intel IO-APIC support for multi-Pentium hosts.
- *
- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
- *
- * Many thanks to Stig Venaas for trying out countless experimental
- * patches and reporting/debugging problems patiently!
- *
- * (c) 1999, Multiple IO-APIC support, developed by
- * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
- * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
- * further tested and cleaned up by Zach Brown <zab@redhat.com>
- * and Ingo Molnar <mingo@redhat.com>
- *
- * Fixes
- * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
- * thanks to Eric Gilmore
- * and Rolf G. Tews
- * for testing these extensively
- * Paul Diefenbaugh : Added full ACPI support
- */
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/interrupt.h>
-#include <xen/irq.h>
-#include <xen/delay.h>
-#include <xen/sched.h>
-#include <xen/config.h>
-#include <asm/mc146818rtc.h>
-#include <asm/io.h>
-#include <asm/mpspec.h>
-#include <asm/io_apic.h>
-#include <asm/smp.h>
-#include <asm/desc.h>
-#include <asm/smpboot.h>
-
-#ifdef CONFIG_X86_IO_APIC
-
-#undef APIC_LOCKUP_DEBUG
-
-#define APIC_LOCKUP_DEBUG
-
-static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED;
-
-unsigned int int_dest_addr_mode = APIC_DEST_LOGICAL;
-unsigned char int_delivery_mode = dest_LowestPrio;
-
-
-/*
- * # of IRQ routing registers
- */
-int nr_ioapic_registers[MAX_IO_APICS];
-
-/*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
- */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
-
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
-
-static struct irq_pin_list {
- int apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
-
-/*
- * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
- * shared ISA-space IRQs, so we have to support them. We are super
- * fast in the common case, and fast for shared ISA-space IRQs.
- */
-static void __init add_pin_to_irq(unsigned int irq, int apic, int pin)
-{
- static int first_free_entry = NR_IRQS;
- struct irq_pin_list *entry = irq_2_pin + irq;
-
- while (entry->next)
- entry = irq_2_pin + entry->next;
-
- if (entry->pin != -1) {
- entry->next = first_free_entry;
- entry = irq_2_pin + entry->next;
- if (++first_free_entry >= PIN_MAP_SIZE)
- panic("io_apic.c: whoops");
- }
- entry->apic = apic;
- entry->pin = pin;
-}
-
-/*
- * Reroute an IRQ to a different pin.
- */
-static void __init replace_pin_at_irq(unsigned int irq,
- int oldapic, int oldpin,
- int newapic, int newpin)
-{
- struct irq_pin_list *entry = irq_2_pin + irq;
-
- while (1) {
- if (entry->apic == oldapic && entry->pin == oldpin) {
- entry->apic = newapic;
- entry->pin = newpin;
- }
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
- }
-}
-
-#define __DO_ACTION(R, ACTION, FINAL) \
- \
-{ \
- int pin; \
- struct irq_pin_list *entry = irq_2_pin + irq; \
- \
- for (;;) { \
- unsigned int reg; \
- pin = entry->pin; \
- if (pin == -1) \
- break; \
- reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
- reg ACTION; \
- io_apic_write(entry->apic, 0x10 + R + pin*2, reg); \
- if (!entry->next) \
- break; \
- entry = irq_2_pin + entry->next; \
- } \
- FINAL; \
-}
-
-#define DO_ACTION(name,R,ACTION, FINAL) \
- \
- static void name##_IO_APIC_irq (unsigned int irq) \
- __DO_ACTION(R, ACTION, FINAL)
-
-DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) )
-DO_ACTION( __unmask, 0, &= 0xfffeffff, )
-DO_ACTION( __edge, 0, &= 0xffff7fff, )
-DO_ACTION( __level, 0, |= 0x00008000, )
-
-static void mask_IO_APIC_irq (unsigned int irq)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __mask_IO_APIC_irq(irq);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void unmask_IO_APIC_irq (unsigned int irq)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __unmask_IO_APIC_irq(irq);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
-{
- struct IO_APIC_route_entry entry;
- unsigned long flags;
-
- /* Check delivery_mode to be sure we're not clearing an SMI pin */
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
- *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- if (entry.delivery_mode == dest_SMI)
- return;
-
- /*
- * Disable it in the IO-APIC irq-routing table:
- */
- memset(&entry, 0, sizeof(entry));
- entry.mask = 1;
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void clear_IO_APIC (void)
-{
- int apic, pin;
-
- for (apic = 0; apic < nr_ioapics; apic++)
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
- clear_IO_APIC_pin(apic, pin);
-}
-
-static void set_ioapic_affinity (unsigned int irq, unsigned long mask)
-{
- unsigned long flags;
-
- /*
- * Only the first 8 bits are valid.
- */
- mask = mask << 24;
- spin_lock_irqsave(&ioapic_lock, flags);
- __DO_ACTION(1, = mask, )
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#define balance_irq(_irq) ((void)0)
-
-/*
- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
- * specific CPU-side IRQs.
- */
-
-#define MAX_PIRQS 8
-int pirq_entries [MAX_PIRQS];
-int pirqs_enabled;
-
-int skip_ioapic_setup;
-#if 0
-
-static int __init noioapic_setup(char *str)
-{
- skip_ioapic_setup = 1;
- return 1;
-}
-
-__setup("noapic", noioapic_setup);
-
-static int __init ioapic_setup(char *str)
-{
- skip_ioapic_setup = 0;
- return 1;
-}
-
-__setup("apic", ioapic_setup);
-
-
-
-static int __init ioapic_pirq_setup(char *str)
-{
- int i, max;
- int ints[MAX_PIRQS+1];
-
- get_options(str, ARRAY_SIZE(ints), ints);
-
- for (i = 0; i < MAX_PIRQS; i++)
- pirq_entries[i] = -1;
-
- pirqs_enabled = 1;
- printk(KERN_INFO "PIRQ redirection, working around broken MP-BIOS.\n");
- max = MAX_PIRQS;
- if (ints[0] < MAX_PIRQS)
- max = ints[0];
-
- for (i = 0; i < max; i++) {
- printk(KERN_DEBUG "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
- /*
- * PIRQs are mapped upside down, usually.
- */
- pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
- }
- return 1;
-}
-
-__setup("pirq=", ioapic_pirq_setup);
-
-#endif
-
-/*
- * Find the IRQ entry number of a certain pin.
- */
-static int __init find_irq_entry(int apic, int pin, int type)
-{
- int i;
-
- for (i = 0; i < mp_irq_entries; i++)
- if (mp_irqs[i].mpc_irqtype == type &&
- (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
- mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
- mp_irqs[i].mpc_dstirq == pin)
- return i;
-
- return -1;
-}
-
-/*
- * Find the pin to which IRQ[irq] (ISA) is connected
- */
-static int __init find_isa_irq_pin(int irq, int type)
-{
- int i;
-
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].mpc_srcbus;
-
- if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
- mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
- mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
- (mp_irqs[i].mpc_irqtype == type) &&
- (mp_irqs[i].mpc_srcbusirq == irq))
-
- return mp_irqs[i].mpc_dstirq;
- }
- return -1;
-}
-
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
- int apic, i, best_guess = -1;
-
- Dprintk("querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
- bus, slot, pin);
- if ((mp_bus_id_to_pci_bus==NULL) || (mp_bus_id_to_pci_bus[bus] == -1)) {
- printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
- return -1;
- }
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].mpc_srcbus;
-
- for (apic = 0; apic < nr_ioapics; apic++)
- if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
- mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
- break;
-
- if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
- !mp_irqs[i].mpc_irqtype &&
- (bus == lbus) &&
- (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
- int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
-
- if (!(apic || IO_APIC_IRQ(irq)))
- continue;
-
- if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
- return irq;
- /*
- * Use the first all-but-pin matching entry as a
- * best-guess fuzzy result for broken mptables.
- */
- if (best_guess < 0)
- best_guess = irq;
- }
- }
- return best_guess;
-}
-
-/*
- * EISA Edge/Level control register, ELCR
- */
-static int __init EISA_ELCR(unsigned int irq)
-{
- if (irq < 16) {
- unsigned int port = 0x4d0 + (irq >> 3);
- return (inb(port) >> (irq & 7)) & 1;
- }
- printk(KERN_INFO "Broken MPtable reports ISA irq %d\n", irq);
- return 0;
-}
-
-/* EISA interrupts are always polarity zero and can be edge or level
- * trigger depending on the ELCR value. If an interrupt is listed as
- * EISA conforming in the MP table, that means its trigger type must
- * be read in from the ELCR */
-
-#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
-#define default_EISA_polarity(idx) (0)
-
-/* ISA interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_ISA_trigger(idx) (0)
-#define default_ISA_polarity(idx) (0)
-
-/* PCI interrupts are always polarity one level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_PCI_trigger(idx) (1)
-#define default_PCI_polarity(idx) (1)
-
-/* MCA interrupts are always polarity zero level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_MCA_trigger(idx) (1)
-#define default_MCA_polarity(idx) (0)
-
-static int __init MPBIOS_polarity(int idx)
-{
- int bus = mp_irqs[idx].mpc_srcbus;
- int polarity;
-
- /*
- * Determine IRQ line polarity (high active or low active):
- */
- switch (mp_irqs[idx].mpc_irqflag & 3)
- {
- case 0: /* conforms, ie. bus-type dependent polarity */
- {
- switch (mp_bus_id_to_type[bus])
- {
- case MP_BUS_ISA: /* ISA pin */
- {
- polarity = default_ISA_polarity(idx);
- break;
- }
- case MP_BUS_EISA: /* EISA pin */
- {
- polarity = default_EISA_polarity(idx);
- break;
- }
- case MP_BUS_PCI: /* PCI pin */
- {
- polarity = default_PCI_polarity(idx);
- break;
- }
- case MP_BUS_MCA: /* MCA pin */
- {
- polarity = default_MCA_polarity(idx);
- break;
- }
- default:
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- polarity = 1;
- break;
- }
- }
- break;
- }
- case 1: /* high active */
- {
- polarity = 0;
- break;
- }
- case 2: /* reserved */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- polarity = 1;
- break;
- }
- case 3: /* low active */
- {
- polarity = 1;
- break;
- }
- default: /* invalid */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- polarity = 1;
- break;
- }
- }
- return polarity;
-}
-
-static int __init MPBIOS_trigger(int idx)
-{
- int bus = mp_irqs[idx].mpc_srcbus;
- int trigger;
-
- /*
- * Determine IRQ trigger mode (edge or level sensitive):
- */
- switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
- {
- case 0: /* conforms, ie. bus-type dependent */
- {
- switch (mp_bus_id_to_type[bus])
- {
- case MP_BUS_ISA: /* ISA pin */
- {
- trigger = default_ISA_trigger(idx);
- break;
- }
- case MP_BUS_EISA: /* EISA pin */
- {
- trigger = default_EISA_trigger(idx);
- break;
- }
- case MP_BUS_PCI: /* PCI pin */
- {
- trigger = default_PCI_trigger(idx);
- break;
- }
- case MP_BUS_MCA: /* MCA pin */
- {
- trigger = default_MCA_trigger(idx);
- break;
- }
- default:
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- trigger = 1;
- break;
- }
- }
- break;
- }
- case 1: /* edge */
- {
- trigger = 0;
- break;
- }
- case 2: /* reserved */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- trigger = 1;
- break;
- }
- case 3: /* level */
- {
- trigger = 1;
- break;
- }
- default: /* invalid */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- trigger = 0;
- break;
- }
- }
- return trigger;
-}
-
-static inline int irq_polarity(int idx)
-{
- return MPBIOS_polarity(idx);
-}
-
-static inline int irq_trigger(int idx)
-{
- return MPBIOS_trigger(idx);
-}
-
-static int pin_2_irq(int idx, int apic, int pin)
-{
- int irq, i;
- int bus = mp_irqs[idx].mpc_srcbus;
-
- /*
- * Debugging check, we are in big trouble if this message pops up!
- */
- if (mp_irqs[idx].mpc_dstirq != pin)
- printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
-
- switch (mp_bus_id_to_type[bus])
- {
- case MP_BUS_ISA: /* ISA pin */
- case MP_BUS_EISA:
- case MP_BUS_MCA:
- {
- irq = mp_irqs[idx].mpc_srcbusirq;
- break;
- }
- case MP_BUS_PCI: /* PCI pin */
- {
- /*
- * PCI IRQs are mapped in order
- */
- i = irq = 0;
- while (i < apic)
- irq += nr_ioapic_registers[i++];
- irq += pin;
- break;
- }
- default:
- {
- printk(KERN_ERR "unknown bus type %d.\n",bus);
- irq = 0;
- break;
- }
- }
-
- /*
- * PCI IRQ command line redirection. Yes, limits are hardcoded.
- */
- if ((pin >= 16) && (pin <= 23)) {
- if (pirq_entries[pin-16] != -1) {
- if (!pirq_entries[pin-16]) {
- printk(KERN_DEBUG "disabling PIRQ%d\n", pin-16);
- } else {
- irq = pirq_entries[pin-16];
- printk(KERN_DEBUG "using PIRQ%d -> IRQ %d\n",
- pin-16, irq);
- }
- }
- }
- return irq;
-}
-
-static inline int IO_APIC_irq_trigger(int irq)
-{
- int apic, idx, pin;
-
- for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
- idx = find_irq_entry(apic,pin,mp_INT);
- if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
- return irq_trigger(idx);
- }
- }
- /*
- * nonexistent IRQs are edge default
- */
- return 0;
-}
-
-int irq_vector[NR_IRQS] = { FIRST_DEVICE_VECTOR , 0 };
-
-static int __init assign_irq_vector(int irq)
-{
- static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
- if (IO_APIC_VECTOR(irq) > 0)
- return IO_APIC_VECTOR(irq);
-next:
- current_vector += 8;
-
- /* XXX Skip the guestOS -> Xen syscall vector! XXX */
- if (current_vector == HYPERVISOR_CALL_VECTOR) goto next;
- /* XXX Skip the Linux/BSD fast-trap vector! XXX */
- if (current_vector == 0x80) goto next;
-
- if (current_vector > FIRST_SYSTEM_VECTOR) {
- offset++;
- current_vector = FIRST_DEVICE_VECTOR + offset;
- }
-
- if (current_vector == FIRST_SYSTEM_VECTOR)
- panic("ran out of interrupt sources!");
-
- IO_APIC_VECTOR(irq) = current_vector;
- return current_vector;
-}
-
-extern void (*interrupt[NR_IRQS])(void);
-
-/*
- * Level and edge triggered IO-APIC interrupts need different handling,
- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
- * handled with the level-triggered descriptor, but that one has slightly
- * more overhead. Level-triggered interrupts cannot be handled with the
- * edge-triggered handler, without risking IRQ storms and other ugly
- * races.
- */
-
-static unsigned int startup_edge_ioapic_irq(unsigned int irq);
-#define shutdown_edge_ioapic_irq disable_edge_ioapic_irq
-#define enable_edge_ioapic_irq unmask_IO_APIC_irq
-static void disable_edge_ioapic_irq (unsigned int irq);
-static void ack_edge_ioapic_irq(unsigned int irq);
-static void end_edge_ioapic_irq (unsigned int i);
-static struct hw_interrupt_type ioapic_edge_irq_type = {
- "IO-APIC-edge",
- startup_edge_ioapic_irq,
- shutdown_edge_ioapic_irq,
- enable_edge_ioapic_irq,
- disable_edge_ioapic_irq,
- ack_edge_ioapic_irq,
- end_edge_ioapic_irq,
- set_ioapic_affinity,
-};
-
-static unsigned int startup_level_ioapic_irq (unsigned int irq);
-#define shutdown_level_ioapic_irq mask_IO_APIC_irq
-#define enable_level_ioapic_irq unmask_IO_APIC_irq
-#define disable_level_ioapic_irq mask_IO_APIC_irq
-static void mask_and_ack_level_ioapic_irq (unsigned int irq);
-static void end_level_ioapic_irq (unsigned int irq);
-static struct hw_interrupt_type ioapic_level_irq_type = {
- "IO-APIC-level",
- startup_level_ioapic_irq,
- shutdown_level_ioapic_irq,
- enable_level_ioapic_irq,
- disable_level_ioapic_irq,
- mask_and_ack_level_ioapic_irq,
- end_level_ioapic_irq,
- set_ioapic_affinity,
-};
-
-void __init setup_IO_APIC_irqs(void)
-{
- struct IO_APIC_route_entry entry;
- int apic, pin, idx, irq, first_notcon = 1, vector;
- unsigned long flags;
-
- printk(KERN_DEBUG "init IO_APIC IRQs\n");
-
- for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-
- /*
- * add it to the IO-APIC irq-routing table:
- */
- memset(&entry,0,sizeof(entry));
-
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.dest_mode = (INT_DEST_ADDR_MODE != 0);
- entry.mask = 0; /* enable IRQ */
- entry.dest.logical.logical_dest = target_cpus();
-
- idx = find_irq_entry(apic,pin,mp_INT);
- if (idx == -1) {
- if (first_notcon) {
- printk(KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
- first_notcon = 0;
- } else
- printk(", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
- continue;
- }
-
- entry.trigger = irq_trigger(idx);
- entry.polarity = irq_polarity(idx);
-
- if (irq_trigger(idx)) {
- entry.trigger = 1;
- entry.mask = 1;
- }
-
- irq = pin_2_irq(idx, apic, pin);
- /*
- * skip adding the timer int on secondary nodes, which causes
- * a small but painful rift in the time-space continuum
- */
- if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
- && (apic != 0) && (irq == 0))
- continue;
- else
- add_pin_to_irq(irq, apic, pin);
-
- if (!apic && !IO_APIC_IRQ(irq))
- continue;
-
- if (IO_APIC_IRQ(irq)) {
- vector = assign_irq_vector(irq);
- entry.vector = vector;
-
- if (IO_APIC_irq_trigger(irq))
- irq_desc[irq].handler = &ioapic_level_irq_type;
- else
- irq_desc[irq].handler = &ioapic_edge_irq_type;
-
- set_intr_gate(vector, interrupt[irq]);
-
- if (!apic && (irq < 16))
- disable_8259A_irq(irq);
- }
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
- }
- }
-
- if (!first_notcon)
- printk(" not connected.\n");
-}
-
-/*
- * Set up the 8259A-master output pin as broadcast to all
- * CPUs.
- */
-void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
-{
- struct IO_APIC_route_entry entry;
- unsigned long flags;
-
- memset(&entry,0,sizeof(entry));
-
- disable_8259A_irq(0);
-
- /* mask LVT0 */
- apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
-
- /*
- * We use logical delivery to get the timer IRQ
- * to the first CPU.
- */
- entry.dest_mode = (INT_DEST_ADDR_MODE != 0);
- entry.mask = 0; /* unmask IRQ now */
- entry.dest.logical.logical_dest = target_cpus();
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.polarity = 0;
- entry.trigger = 0;
- entry.vector = vector;
-
- /*
- * The timer IRQ doesn't have to know that behind the
- * scene we have a 8259A-master in AEOI mode ...
- */
- irq_desc[0].handler = &ioapic_edge_irq_type;
-
- /*
- * Add it to the IO-APIC irq-routing table:
- */
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- enable_8259A_irq(0);
-}
-
-void __init UNEXPECTED_IO_APIC(void)
-{
- printk(KERN_WARNING
- "An unexpected IO-APIC was found. If this kernel release is less than\n"
- "three months old please report this to linux-smp@vger.kernel.org\n");
-}
-
-void __init print_IO_APIC(void)
-{
-#ifndef NDEBUG
- int apic, i;
- struct IO_APIC_reg_00 reg_00;
- struct IO_APIC_reg_01 reg_01;
- struct IO_APIC_reg_02 reg_02;
- struct IO_APIC_reg_03 reg_03;
- unsigned long flags;
-
- printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
- for (i = 0; i < nr_ioapics; i++)
- printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
- mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
-
- /*
- * We are a bit conservative about what we expect. We have to
- * know about every hardware change ASAP.
- */
- printk(KERN_INFO "testing the IO APIC.......................\n");
-
- for (apic = 0; apic < nr_ioapics; apic++) {
-
- spin_lock_irqsave(&ioapic_lock, flags);
- *(int *)®_00 = io_apic_read(apic, 0);
- *(int *)®_01 = io_apic_read(apic, 1);
- if (reg_01.version >= 0x10)
- *(int *)®_02 = io_apic_read(apic, 2);
- if (reg_01.version >= 0x20)
- *(int *)®_03 = io_apic_read(apic, 3);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- printk("\n");
- printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
- printk(KERN_DEBUG ".... register #00: %08X\n", *(int *)®_00);
- printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.ID);
- printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.delivery_type);
- printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.LTS);
- if (reg_00.__reserved_0 || reg_00.__reserved_1 || reg_00.__reserved_2)
- UNEXPECTED_IO_APIC();
-
- printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01);
- printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.entries);
- if ( (reg_01.entries != 0x0f) && /* older (Neptune) boards */
- (reg_01.entries != 0x17) && /* typical ISA+PCI boards */
- (reg_01.entries != 0x1b) && /* Compaq Proliant boards */
- (reg_01.entries != 0x1f) && /* dual Xeon boards */
- (reg_01.entries != 0x22) && /* bigger Xeon boards */
- (reg_01.entries != 0x2E) &&
- (reg_01.entries != 0x3F)
- )
- UNEXPECTED_IO_APIC();
-
- printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.PRQ);
- printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.version);
- if ( (reg_01.version != 0x01) && /* 82489DX IO-APICs */
- (reg_01.version != 0x02) && /* VIA */
- (reg_01.version != 0x03) && /* later VIA */
- (reg_01.version != 0x10) && /* oldest IO-APICs */
- (reg_01.version != 0x11) && /* Pentium/Pro IO-APICs */
- (reg_01.version != 0x13) && /* Xeon IO-APICs */
- (reg_01.version != 0x20) /* Intel P64H (82806 AA) */
- )
- UNEXPECTED_IO_APIC();
- if (reg_01.__reserved_1 || reg_01.__reserved_2)
- UNEXPECTED_IO_APIC();
-
- /*
- * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
- * but the value of reg_02 is read as the previous read register
- * value, so ignore it if reg_02 == reg_01.
- */
- if (reg_01.version >= 0x10 && *(int *)®_02 != *(int *)®_01) {
- printk(KERN_DEBUG ".... register #02: %08X\n", *(int *)®_02);
- printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.arbitration);
- if (reg_02.__reserved_1 || reg_02.__reserved_2)
- UNEXPECTED_IO_APIC();
- }
-
- /*
- * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
- * or reg_03, but the value of reg_0[23] is read as the previous read
- * register value, so ignore it if reg_03 == reg_0[12].
- */
- if (reg_01.version >= 0x20 && *(int *)®_03 != *(int *)®_02 &&
- *(int *)®_03 != *(int *)®_01) {
- printk(KERN_DEBUG ".... register #03: %08X\n", *(int *)®_03);
- printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.boot_DT);
- if (reg_03.__reserved_1)
- UNEXPECTED_IO_APIC();
- }
-
- printk(KERN_DEBUG ".... IRQ redirection table:\n");
-
- printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
- " Stat Dest Deli Vect: \n");
-
- for (i = 0; i <= reg_01.entries; i++) {
- struct IO_APIC_route_entry entry;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
- *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- printk(KERN_DEBUG " %02x %03X %02X ",
- i,
- entry.dest.logical.logical_dest,
- entry.dest.physical.physical_dest
- );
-
- printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
- entry.mask,
- entry.trigger,
- entry.irr,
- entry.polarity,
- entry.delivery_status,
- entry.dest_mode,
- entry.delivery_mode,
- entry.vector
- );
- }
- }
- printk(KERN_DEBUG "IRQ to pin mappings:\n");
- for (i = 0; i < NR_IRQS; i++) {
- struct irq_pin_list *entry = irq_2_pin + i;
- if (entry->pin < 0)
- continue;
- printk(KERN_DEBUG "IRQ%d ", i);
- for (;;) {
- printk("-> %d:%d", entry->apic, entry->pin);
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
- }
- printk("\n");
- }
-
- printk(KERN_INFO ".................................... done.\n");
-#endif
-}
-
-
-#if 0 /* Maybe useful for debugging, but not currently used anywhere. */
-
-static void print_APIC_bitfield (int base)
-{
- unsigned int v;
- int i, j;
-
- printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
- for (i = 0; i < 8; i++) {
- v = apic_read(base + i*0x10);
- for (j = 0; j < 32; j++) {
- if (v & (1<<j))
- printk("1");
- else
- printk("0");
- }
- printk("\n");
- }
-}
-
-
-void /*__init*/ print_local_APIC(void * dummy)
-{
- unsigned int v, ver, maxlvt;
-
- printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
- smp_processor_id(), hard_smp_processor_id());
- v = apic_read(APIC_ID);
- printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v));
- v = apic_read(APIC_LVR);
- printk(KERN_INFO "... APIC VERSION: %08x\n", v);
- ver = GET_APIC_VERSION(v);
- maxlvt = get_maxlvt();
-
- v = apic_read(APIC_TASKPRI);
- printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
-
- if (APIC_INTEGRATED(ver)) { /* !82489DX */
- v = apic_read(APIC_ARBPRI);
- printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
- v & APIC_ARBPRI_MASK);
- v = apic_read(APIC_PROCPRI);
- printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
- }
-
- v = apic_read(APIC_EOI);
- printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
- v = apic_read(APIC_RRR);
- printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
- v = apic_read(APIC_LDR);
- printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
- v = apic_read(APIC_DFR);
- printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
- v = apic_read(APIC_SPIV);
- printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
-
- printk(KERN_DEBUG "... APIC ISR field:\n");
- print_APIC_bitfield(APIC_ISR);
- printk(KERN_DEBUG "... APIC TMR field:\n");
- print_APIC_bitfield(APIC_TMR);
- printk(KERN_DEBUG "... APIC IRR field:\n");
- print_APIC_bitfield(APIC_IRR);
-
- if (APIC_INTEGRATED(ver)) { /* !82489DX */
- if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
- apic_write(APIC_ESR, 0);
- v = apic_read(APIC_ESR);
- printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
- }
-
- v = apic_read(APIC_ICR);
- printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
- v = apic_read(APIC_ICR2);
- printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
-
- v = apic_read(APIC_LVTT);
- printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
-
- if (maxlvt > 3) { /* PC is LVT#4. */
- v = apic_read(APIC_LVTPC);
- printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
- }
- v = apic_read(APIC_LVT0);
- printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
- v = apic_read(APIC_LVT1);
- printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
-
- if (maxlvt > 2) { /* ERR is LVT#3. */
- v = apic_read(APIC_LVTERR);
- printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
- }
-
- v = apic_read(APIC_TMICT);
- printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
- v = apic_read(APIC_TMCCT);
- printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
- v = apic_read(APIC_TDCR);
- printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
- printk("\n");
-}
-
-void print_all_local_APICs (void)
-{
- smp_call_function(print_local_APIC, NULL, 1, 1);
- print_local_APIC(NULL);
-}
-
-void /*__init*/ print_PIC(void)
-{
- extern spinlock_t i8259A_lock;
- unsigned int v, flags;
-
- printk(KERN_DEBUG "\nprinting PIC contents\n");
-
- spin_lock_irqsave(&i8259A_lock, flags);
-
- v = inb(0xa1) << 8 | inb(0x21);
- printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
-
- v = inb(0xa0) << 8 | inb(0x20);
- printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
-
- outb(0x0b,0xa0);
- outb(0x0b,0x20);
- v = inb(0xa0) << 8 | inb(0x20);
- outb(0x0a,0xa0);
- outb(0x0a,0x20);
-
- spin_unlock_irqrestore(&i8259A_lock, flags);
-
- printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
-
- v = inb(0x4d1) << 8 | inb(0x4d0);
- printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
-}
-
-#endif /* 0 */
-
-
-static void __init enable_IO_APIC(void)
-{
- struct IO_APIC_reg_01 reg_01;
- int i;
- unsigned long flags;
-
- for (i = 0; i < PIN_MAP_SIZE; i++) {
- irq_2_pin[i].pin = -1;
- irq_2_pin[i].next = 0;
- }
- if (!pirqs_enabled)
- for (i = 0; i < MAX_PIRQS; i++)
- pirq_entries[i] = -1;
-
- /*
- * The number of IO-APIC IRQ registers (== #pins):
- */
- for (i = 0; i < nr_ioapics; i++) {
- spin_lock_irqsave(&ioapic_lock, flags);
- *(int *)®_01 = io_apic_read(i, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- nr_ioapic_registers[i] = reg_01.entries+1;
- }
-
- /*
- * Do not trust the IO-APIC being empty at bootup
- */
- clear_IO_APIC();
-}
-
-/*
- * Not an __init, needed by the reboot code
- */
-void disable_IO_APIC(void)
-{
- /*
- * Clear the IO-APIC before rebooting:
- */
- clear_IO_APIC();
-
- disconnect_bsp_APIC();
-}
-
-/*
- * function to set the IO-APIC physical IDs based on the
- * values stored in the MPC table.
- *
- * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
- */
-
-static void __init setup_ioapic_ids_from_mpc (void)
-{
- struct IO_APIC_reg_00 reg_00;
- unsigned long phys_id_present_map = phys_cpu_present_map;
- int apic;
- int i;
- unsigned char old_id;
- unsigned long flags;
-
- if (clustered_apic_mode)
- /* We don't have a good way to do this yet - hack */
- phys_id_present_map = (u_long) 0xf;
- /*
- * Set the IOAPIC ID to the value stored in the MPC table.
- */
- for (apic = 0; apic < nr_ioapics; apic++) {
-
- /* Read the register 0 value */
- spin_lock_irqsave(&ioapic_lock, flags);
- *(int *)®_00 = io_apic_read(apic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- old_id = mp_ioapics[apic].mpc_apicid;
-
- if (mp_ioapics[apic].mpc_apicid >= apic_broadcast_id) {
- printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
- apic, mp_ioapics[apic].mpc_apicid);
- printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
- reg_00.ID);
- mp_ioapics[apic].mpc_apicid = reg_00.ID;
- }
-
- /*
- * Sanity check, is the ID really free? Every APIC in a
- * system must have a unique ID or we get lots of nice
- * 'stuck on smp_invalidate_needed IPI wait' messages.
- * I/O APIC IDs no longer have any meaning for xAPICs and SAPICs.
- */
- if ((clustered_apic_mode != CLUSTERED_APIC_XAPIC) &&
- (phys_id_present_map & (1 << mp_ioapics[apic].mpc_apicid))) {
- printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
- apic, mp_ioapics[apic].mpc_apicid);
- for (i = 0; i < 0xf; i++)
- if (!(phys_id_present_map & (1 << i)))
- break;
- if (i >= apic_broadcast_id)
- panic("Max APIC ID exceeded!\n");
- printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
- i);
- phys_id_present_map |= 1 << i;
- mp_ioapics[apic].mpc_apicid = i;
- } else {
- printk("Setting %d in the phys_id_present_map\n", mp_ioapics[apic].mpc_apicid);
- phys_id_present_map |= 1 << mp_ioapics[apic].mpc_apicid;
- }
-
-
- /*
- * We need to adjust the IRQ routing table
- * if the ID changed.
- */
- if (old_id != mp_ioapics[apic].mpc_apicid)
- for (i = 0; i < mp_irq_entries; i++)
- if (mp_irqs[i].mpc_dstapic == old_id)
- mp_irqs[i].mpc_dstapic
- = mp_ioapics[apic].mpc_apicid;
-
- /*
- * Read the right value from the MPC table and
- * write it into the ID register.
- */
- printk(KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
- mp_ioapics[apic].mpc_apicid);
-
- reg_00.ID = mp_ioapics[apic].mpc_apicid;
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0, *(int *)®_00);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- /*
- * Sanity check
- */
- spin_lock_irqsave(&ioapic_lock, flags);
- *(int *)®_00 = io_apic_read(apic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- if (reg_00.ID != mp_ioapics[apic].mpc_apicid)
- panic("could not set ID!\n");
- else
- printk(" ok.\n");
- }
-}
-
-/*
- * There is a nasty bug in some older SMP boards, their mptable lies
- * about the timer IRQ. We do the following to work around the situation:
- *
- * - timer IRQ defaults to IO-APIC IRQ
- * - if this function detects that timer IRQs are defunct, then we fall
- * back to ISA timer IRQs
- */
-static int __init timer_irq_works(void)
-{
- unsigned int t1 = jiffies;
-
- sti();
- /* Let ten ticks pass... */
- mdelay((10 * 1000) / HZ);
-
- /*
- * Expect a few ticks at least, to be sure some possible
- * glue logic does not lock up after one or two first
- * ticks in a non-ExtINT mode. Also the local APIC
- * might have cached one ExtINT interrupt. Finally, at
- * least one tick may be lost due to delays.
- */
- if (jiffies - t1 > 4)
- return 1;
-
- return 0;
-}
-
-static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ }
-
-/*
- * Starting up a edge-triggered IO-APIC interrupt is
- * nasty - we need to make sure that we get the edge.
- * If it is already asserted for some reason, we need
- * return 1 to indicate that is was pending.
- *
- * This is not complete - we should be able to fake
- * an edge even if it isn't on the 8259A...
- */
-
-static unsigned int startup_edge_ioapic_irq(unsigned int irq)
-{
- int was_pending = 0;
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- if (irq < 16) {
- disable_8259A_irq(irq);
- if (i8259A_irq_pending(irq))
- was_pending = 1;
- }
- __unmask_IO_APIC_irq(irq);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return was_pending;
-}
-
-/*
- * Once we have recorded IRQ_PENDING already, we can mask the
- * interrupt for real. This prevents IRQ storms from unhandled
- * devices.
- */
-static void ack_edge_ioapic_irq(unsigned int irq)
-{
- balance_irq(irq);
- if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
- == (IRQ_PENDING | IRQ_DISABLED))
- mask_IO_APIC_irq(irq);
- ack_APIC_irq();
-}
-
-static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ }
-
-
-/*
- * Level triggered interrupts can just be masked,
- * and shutting down and starting up the interrupt
- * is the same as enabling and disabling them -- except
- * with a startup need to return a "was pending" value.
- *
- * Level triggered interrupts are special because we
- * do not touch any IO-APIC register while handling
- * them. We ack the APIC in the end-IRQ handler, not
- * in the start-IRQ-handler. Protection against reentrance
- * from the same interrupt is still provided, both by the
- * generic IRQ layer and by the fact that an unacked local
- * APIC does not accept IRQs.
- */
-static unsigned int startup_level_ioapic_irq (unsigned int irq)
-{
- unmask_IO_APIC_irq(irq);
-
- return 0; /* don't check for pending */
-}
-
-static void mask_and_ack_level_ioapic_irq(unsigned int irq)
-{
- unsigned long v;
- int i;
-
- balance_irq(irq);
-
- mask_IO_APIC_irq(irq);
-
-/*
- * It appears there is an erratum which affects at least version 0x11
- * of I/O APIC (that's the 82093AA and cores integrated into various
- * chipsets). Under certain conditions a level-triggered interrupt is
- * erroneously delivered as edge-triggered one but the respective IRR
- * bit gets set nevertheless. As a result the I/O unit expects an EOI
- * message but it will never arrive and further interrupts are blocked
- * from the source. The exact reason is so far unknown, but the
- * phenomenon was observed when two consecutive interrupt requests
- * from a given source get delivered to the same CPU and the source is
- * temporarily disabled in between.
- *
- * A workaround is to simulate an EOI message manually. We achieve it
- * by setting the trigger mode to edge and then to level when the edge
- * trigger mode gets detected in the TMR of a local APIC for a
- * level-triggered interrupt. We mask the source for the time of the
- * operation to prevent an edge-triggered interrupt escaping meanwhile.
- * The idea is from Manfred Spraul. --macro
- */
- i = IO_APIC_VECTOR(irq);
- v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-
- ack_APIC_irq();
-
- if (!(v & (1 << (i & 0x1f)))) {
-#ifdef APIC_LOCKUP_DEBUG
- struct irq_pin_list *entry;
-#endif
-
-#ifdef APIC_MISMATCH_DEBUG
- atomic_inc(&irq_mis_count);
-#endif
- spin_lock(&ioapic_lock);
- __edge_IO_APIC_irq(irq);
-#ifdef APIC_LOCKUP_DEBUG
- for (entry = irq_2_pin + irq;;) {
- unsigned int reg;
-
- if (entry->pin == -1)
- break;
- reg = io_apic_read(entry->apic, 0x10 + entry->pin * 2);
- if (reg & 0x00004000)
- printk(KERN_CRIT "Aieee!!! Remote IRR"
- " still set after unlock!\n");
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
- }
-#endif
- __level_IO_APIC_irq(irq);
- spin_unlock(&ioapic_lock);
- }
-}
-
-static void end_level_ioapic_irq(unsigned int irq)
-{
- unmask_IO_APIC_irq(irq);
-}
-
-static inline void init_IO_APIC_traps(void)
-{
- int irq;
-
- /*
- * NOTE! The local APIC isn't very good at handling
- * multiple interrupts at the same interrupt level.
- * As the interrupt level is determined by taking the
- * vector number and shifting that right by 4, we
- * want to spread these out a bit so that they don't
- * all fall in the same interrupt level.
- *
- * Also, we've got to be careful not to trash gate
- * 0x80, because int 0x80 is hm, kind of importantish. ;)
- */
- for (irq = 0; irq < NR_IRQS ; irq++) {
- if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq)) {
- /*
- * Hmm.. We don't have an entry for this,
- * so default to an old-fashioned 8259
- * interrupt if we can..
- */
- if (irq < 16)
- make_8259A_irq(irq);
- else
- /* Strange. Oh, well.. */
- irq_desc[irq].handler = &no_irq_type;
- }
- }
-}
-
-static void enable_lapic_irq (unsigned int irq)
-{
- unsigned long v;
-
- v = apic_read(APIC_LVT0);
- apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
-}
-
-static void disable_lapic_irq (unsigned int irq)
-{
- unsigned long v;
-
- v = apic_read(APIC_LVT0);
- apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
-}
-
-static void ack_lapic_irq (unsigned int irq)
-{
- ack_APIC_irq();
-}
-
-static void end_lapic_irq (unsigned int i) { /* nothing */ }
-
-static struct hw_interrupt_type lapic_irq_type = {
- "local-APIC-edge",
- NULL, /* startup_irq() not used for IRQ0 */
- NULL, /* shutdown_irq() not used for IRQ0 */
- enable_lapic_irq,
- disable_lapic_irq,
- ack_lapic_irq,
- end_lapic_irq
-};
-
-/*
- * This looks a bit hackish but it's about the only one way of sending
- * a few INTA cycles to 8259As and any associated glue logic. ICR does
- * not support the ExtINT mode, unfortunately. We need to send these
- * cycles as some i82489DX-based boards have glue logic that keeps the
- * 8259A interrupt line asserted until INTA. --macro
- */
-static inline void unlock_ExtINT_logic(void)
-{
- int pin, i;
- struct IO_APIC_route_entry entry0, entry1;
- unsigned char save_control, save_freq_select;
- unsigned long flags;
-
- pin = find_isa_irq_pin(8, mp_INT);
- if (pin == -1)
- return;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
- *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- clear_IO_APIC_pin(0, pin);
-
- memset(&entry1, 0, sizeof(entry1));
-
- entry1.dest_mode = 0; /* physical delivery */
- entry1.mask = 0; /* unmask IRQ now */
- entry1.dest.physical.physical_dest = hard_smp_processor_id();
- entry1.delivery_mode = dest_ExtINT;
- entry1.polarity = entry0.polarity;
- entry1.trigger = 0;
- entry1.vector = 0;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
- io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- save_control = CMOS_READ(RTC_CONTROL);
- save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
- CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
- RTC_FREQ_SELECT);
- CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
-
- i = 100;
- while (i-- > 0) {
- mdelay(10);
- if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
- i -= 10;
- }
-
- CMOS_WRITE(save_control, RTC_CONTROL);
- CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
- clear_IO_APIC_pin(0, pin);
-
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
- io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-/*
- * This code may look a bit paranoid, but it's supposed to cooperate with
- * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
- * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
- * fanatically on his truly buggy board.
- */
-static inline void check_timer(void)
-{
- extern int timer_ack;
- int pin1, pin2;
- int vector;
-
- /*
- * get/set the timer IRQ vector:
- */
- disable_8259A_irq(0);
- vector = assign_irq_vector(0);
- set_intr_gate(vector, interrupt[0]);
-
- /*
- * Subtle, code in do_timer_interrupt() expects an AEOI
- * mode for the 8259A whenever interrupts are routed
- * through I/O APICs. Also IRQ0 has to be enabled in
- * the 8259A which implies the virtual wire has to be
- * disabled in the local APIC.
- */
- apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
- init_8259A(1);
- timer_ack = 1;
- enable_8259A_irq(0);
-
- pin1 = find_isa_irq_pin(0, mp_INT);
- pin2 = find_isa_irq_pin(0, mp_ExtINT);
-
- printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2);
-
- if (pin1 != -1) {
- /*
- * Ok, does IRQ0 through the IOAPIC work?
- */
- unmask_IO_APIC_irq(0);
- if (timer_irq_works())
- return;
- clear_IO_APIC_pin(0, pin1);
- printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
- }
-
- printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
- if (pin2 != -1) {
- printk("\n..... (found pin %d) ...", pin2);
- /*
- * legacy devices should be connected to IO APIC #0
- */
- setup_ExtINT_IRQ0_pin(pin2, vector);
- if (timer_irq_works()) {
- printk("works.\n");
- if (pin1 != -1)
- replace_pin_at_irq(0, 0, pin1, 0, pin2);
- else
- add_pin_to_irq(0, 0, pin2);
- return;
- }
- /*
- * Cleanup, just in case ...
- */
- clear_IO_APIC_pin(0, pin2);
- }
- printk(" failed.\n");
-
- printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
-
- disable_8259A_irq(0);
- irq_desc[0].handler = &lapic_irq_type;
- apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
- enable_8259A_irq(0);
-
- if (timer_irq_works()) {
- printk(" works.\n");
- return;
- }
- apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
- printk(" failed.\n");
-
- printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
-
- init_8259A(0);
- make_8259A_irq(0);
- apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
-
- unlock_ExtINT_logic();
-
- if (timer_irq_works()) {
- printk(" works.\n");
- return;
- }
- printk(" failed :(.\n");
- panic("IO-APIC + timer doesn't work! pester mingo@redhat.com");
-}
-
-/*
- *
- * IRQ's that are handled by the old PIC in all cases:
- * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
- * Linux doesn't really care, as it's not actually used
- * for any interrupt handling anyway.
- * - There used to be IRQ13 here as well, but all
- * MPS-compliant must not use it for FPU coupling and we
- * want to use exception 16 anyway. And there are
- * systems who connect it to an I/O APIC for other uses.
- * Thus we don't mark it special any longer.
- *
- * Additionally, something is definitely wrong with irq9
- * on PIIX4 boards.
- */
-#define PIC_IRQS (1<<2)
-
-void __init setup_IO_APIC(void)
-{
- enable_IO_APIC();
-
- io_apic_irqs = ~PIC_IRQS;
- printk("ENABLING IO-APIC IRQs\n");
-
- /*
- * Set up IO-APIC IRQ routing.
- */
- if (!acpi_ioapic)
- setup_ioapic_ids_from_mpc();
- sync_Arb_IDs();
- setup_IO_APIC_irqs();
- init_IO_APIC_traps();
- check_timer();
- if (!acpi_ioapic)
- print_IO_APIC();
-}
-
-#endif /* CONFIG_X86_IO_APIC */
-
-
-
-/* --------------------------------------------------------------------------
- ACPI-based IOAPIC Configuration
- -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI_BOOT
-
-#define IO_APIC_MAX_ID 15
-
-int __init io_apic_get_unique_id (int ioapic, int apic_id)
-{
- struct IO_APIC_reg_00 reg_00;
- static unsigned long apic_id_map = 0;
- unsigned long flags;
- int i = 0;
-
- /*
- * The P4 platform supports up to 256 APIC IDs on two separate APIC
- * buses (one for LAPICs, one for IOAPICs), where predecessors only
- * supports up to 16 on one shared APIC bus.
- *
- * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
- * advantage of new APIC bus architecture.
- */
-
- if (!apic_id_map)
- apic_id_map = phys_cpu_present_map;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- *(int *)®_00 = io_apic_read(ioapic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- if (apic_id >= IO_APIC_MAX_ID) {
- printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
- "%d\n", ioapic, apic_id, reg_00.ID);
- apic_id = reg_00.ID;
- }
-
- /* XAPICs do not need unique IDs */
- if (clustered_apic_mode == CLUSTERED_APIC_XAPIC){
- printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n",
- ioapic, apic_id);
- return apic_id;
- }
-
- /*
- * Every APIC in a system must have a unique ID or we get lots of nice
- * 'stuck on smp_invalidate_needed IPI wait' messages.
- */
- if (apic_id_map & (1 << apic_id)) {
-
- for (i = 0; i < IO_APIC_MAX_ID; i++) {
- if (!(apic_id_map & (1 << i)))
- break;
- }
-
- if (i == IO_APIC_MAX_ID)
- panic("Max apic_id exceeded!\n");
-
- printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
- "trying %d\n", ioapic, apic_id, i);
-
- apic_id = i;
- }
-
- apic_id_map |= (1 << apic_id);
-
- if (reg_00.ID != apic_id) {
- reg_00.ID = apic_id;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(ioapic, 0, *(int *)®_00);
- *(int *)®_00 = io_apic_read(ioapic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- /* Sanity check */
- if (reg_00.ID != apic_id)
- panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
- }
-
- printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
-
- return apic_id;
-}
-
-
-int __init io_apic_get_version (int ioapic)
-{
- struct IO_APIC_reg_01 reg_01;
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- *(int *)®_01 = io_apic_read(ioapic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return reg_01.version;
-}
-
-
-int __init io_apic_get_redir_entries (int ioapic)
-{
- struct IO_APIC_reg_01 reg_01;
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- *(int *)®_01 = io_apic_read(ioapic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return reg_01.entries;
-}
-
-
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
-{
- struct IO_APIC_route_entry entry;
- unsigned long flags;
-
- if (!IO_APIC_IRQ(irq)) {
- printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0/n",
- ioapic);
- return -EINVAL;
- }
-
- /*
- * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
- * Note that we mask (disable) IRQs now -- these get enabled when the
- * corresponding device driver registers for this IRQ.
- */
-
- memset(&entry,0,sizeof(entry));
-
- entry.delivery_mode = dest_LowestPrio;
- entry.dest_mode = INT_DELIVERY_MODE;
- entry.dest.logical.logical_dest = target_cpus();
- entry.mask = 1; /* Disabled (masked) */
- entry.trigger = edge_level;
- entry.polarity = active_high_low;
-
- add_pin_to_irq(irq, ioapic, pin);
-
- entry.vector = assign_irq_vector(irq);
-
- printk(KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
- "IRQ %d Mode:%i Active:%i)\n", ioapic,
- mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low);
-
- if (edge_level) {
- irq_desc[irq].handler = &ioapic_level_irq_type;
- } else {
- irq_desc[irq].handler = &ioapic_edge_irq_type;
- }
-
- set_intr_gate(entry.vector, interrupt[irq]);
-
- if (!ioapic && (irq < 16))
- disable_8259A_irq(irq);
-
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return 0;
-}
-
-#endif /*CONFIG_ACPI_BOOT*/
-
-extern char opt_leveltrigger[], opt_edgetrigger[];
-
-static int __init ioapic_trigger_setup(void)
-{
- char *p;
- irq_desc_t *desc;
- long irq;
-
- p = opt_leveltrigger;
- while ( *p != '\0' )
- {
- irq = simple_strtol(p, &p, 10);
- if ( (irq <= 0) || (irq >= NR_IRQS) )
- {
- printk("IRQ '%ld' out of range in level-trigger list '%s'\n",
- irq, opt_leveltrigger);
- break;
- }
-
- printk("Forcing IRQ %ld to level-trigger: ", irq);
-
- desc = &irq_desc[irq];
- spin_lock_irq(&desc->lock);
-
- if ( desc->handler == &ioapic_level_irq_type )
- {
- printk("already level-triggered (no force applied).\n");
- }
- else if ( desc->handler != &ioapic_edge_irq_type )
- {
- printk("cannot force (can only force IO-APIC-edge IRQs).\n");
- }
- else
- {
- desc->handler = &ioapic_level_irq_type;
- __mask_IO_APIC_irq(irq);
- __level_IO_APIC_irq(irq);
- printk("done.\n");
- }
-
- spin_unlock_irq(&desc->lock);
-
- if ( *p == '\0' )
- break;
-
- if ( *p != ',' )
- {
- printk("Unexpected character '%c' in level-trigger list '%s'\n",
- *p, opt_leveltrigger);
- break;
- }
-
- p++;
- }
-
- p = opt_edgetrigger;
- while ( *p != '\0' )
- {
- irq = simple_strtol(p, &p, 10);
- if ( (irq <= 0) || (irq >= NR_IRQS) )
- {
- printk("IRQ '%ld' out of range in edge-trigger list '%s'\n",
- irq, opt_edgetrigger);
- break;
- }
-
- printk("Forcing IRQ %ld to edge-trigger: ", irq);
-
- desc = &irq_desc[irq];
- spin_lock_irq(&desc->lock);
-
- if ( desc->handler == &ioapic_edge_irq_type )
- {
- printk("already edge-triggered (no force applied).\n");
- }
- else if ( desc->handler != &ioapic_level_irq_type )
- {
- printk("cannot force (can only force IO-APIC-level IRQs).\n");
- }
- else
- {
- desc->handler = &ioapic_edge_irq_type;
- __edge_IO_APIC_irq(irq);
- desc->status |= IRQ_PENDING; /* may have lost a masked edge */
- printk("done.\n");
- }
-
- spin_unlock_irq(&desc->lock);
-
- if ( *p == '\0' )
- break;
-
- if ( *p != ',' )
- {
- printk("Unexpected character '%c' in edge-trigger list '%s'\n",
- *p, opt_edgetrigger);
- break;
- }
-
- p++;
- }
-
- return 0;
-}
-
-__initcall(ioapic_trigger_setup);
+++ /dev/null
-/*
- * arch/i386/mm/ioremap.c
- *
- * Re-map IO memory to kernel address space so that we can access it.
- * This is needed for high PCI addresses that aren't mapped in the
- * 640k-1MB IO memory area on PC's
- *
- * (C) Copyright 1995 1996 Linus Torvalds
- */
-
-#include <xen/config.h>
-#include <xen/lib.h>
-#include <xen/mm.h>
-#include <asm/io.h>
-#include <asm/pgalloc.h>
-#include <asm/page.h>
-
-static unsigned long remap_base = IOREMAP_VIRT_START;
-
-#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
-void * __ioremap(unsigned long phys_addr,
- unsigned long size,
- unsigned long flags)
-{
- unsigned long vaddr;
- unsigned long offset, cur=0, last_addr;
- l2_pgentry_t *pl2e;
- l1_pgentry_t *pl1e;
-
- /* Don't allow wraparound or zero size */
- last_addr = phys_addr + size - 1;
- if ( (size == 0) || (last_addr < phys_addr) )
- return NULL;
-
- /* Don't remap the low PCI/ISA area: it's always mapped. */
- if ( (phys_addr >= 0xA0000) && (last_addr < 0x100000) )
- return phys_to_virt(phys_addr);
-
- if ( (remap_base + size) > (IOREMAP_VIRT_END - 1) )
- {
- printk("ioremap: going past end of reserved space!\n");
- return NULL;
- }
-
- /* Mappings have to be page-aligned. */
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(last_addr) - phys_addr;
-
- /* Ok, go for it. */
- vaddr = remap_base;
- remap_base += size;
- pl2e = &idle_pg_table[l2_table_offset(vaddr)];
- pl1e = l2_pgentry_to_l1(*pl2e++) + l1_table_offset(vaddr);
- do {
- *pl1e++ = mk_l1_pgentry((phys_addr+cur)|PAGE_HYPERVISOR|flags);
- }
- while ( (cur += PAGE_SIZE) != size );
-
- return (void *)(offset + (char *)vaddr);
-}
-
-void iounmap(void *addr)
-{
- /* NOP for now. */
-}
+++ /dev/null
-/*
- * linux/arch/i386/kernel/irq.c
- *
- * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
- *
- * This file contains the code used by various IRQ handling routines:
- * asking for different IRQ's should be done through these routines
- * instead of just grabbing them. Thus setup_irqs with different IRQ numbers
- * shouldn't result in any weird surprises, and installing new handlers
- * should be easier.
- */
-
-/*
- * (mostly architecture independent, will move to kernel/irq.c in 2.5.)
- *
- * IRQs are in fact implemented a bit like signal handlers for the kernel.
- * Naturally it's not a 1:1 relation, but there are similarities.
- */
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/errno.h>
-#include <xen/sched.h>
-#include <xen/interrupt.h>
-#include <xen/irq.h>
-#include <xen/slab.h>
-#include <xen/event.h>
-#include <asm/mpspec.h>
-#include <asm/io_apic.h>
-#include <asm/msr.h>
-#include <asm/hardirq.h>
-#include <asm/ptrace.h>
-#include <asm/atomic.h>
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/system.h>
-#include <asm/bitops.h>
-#include <asm/pgalloc.h>
-#include <xen/delay.h>
-#include <xen/timex.h>
-#include <xen/perfc.h>
-#include <asm/smpboot.h>
-
-/*
- * Linux has a controller-independent x86 interrupt architecture.
- * every controller has a 'controller-template', that is used
- * by the main code to do the right thing. Each driver-visible
- * interrupt source is transparently wired to the apropriate
- * controller. Thus drivers need not be aware of the
- * interrupt-controller.
- *
- * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
- * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
- * (IO-APICs assumed to be messaging to Pentium local-APICs)
- *
- * the code is designed to be easily extended with new/different
- * interrupt controllers, without having to do assembly magic.
- */
-
-/*
- * Controller mappings for all interrupt sources:
- */
-irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned =
-{ [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}};
-
-#ifdef CONFIG_SMP
-/* NB. XXX We'll want some way of fiddling with this from DOM0. */
-unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL };
-#endif
-
-static void __do_IRQ_guest(int irq);
-
-/*
- * Special irq handlers.
- */
-
-void no_action(int cpl, void *dev_id, struct pt_regs *regs) { }
-
-/*
- * Generic no controller code
- */
-
-static void enable_none(unsigned int irq) { }
-static unsigned int startup_none(unsigned int irq) { return 0; }
-static void disable_none(unsigned int irq) { }
-static void ack_none(unsigned int irq)
-{
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves, it doesnt deserve
- * a generic callback i think.
- */
-#if CONFIG_X86
- printk("unexpected IRQ trap at vector %02x\n", irq);
-#ifdef CONFIG_X86_LOCAL_APIC
- /*
- * Currently unexpected vectors happen only on SMP and APIC.
- * We _must_ ack these because every local APIC has only N
- * irq slots per priority level, and a 'hanging, unacked' IRQ
- * holds up an irq slot - in excessive cases (when multiple
- * unexpected vectors occur) that might lock up the APIC
- * completely.
- */
- ack_APIC_irq();
-#endif
-#endif
-}
-
-/* startup is the same as "enable", shutdown is same as "disable" */
-#define shutdown_none disable_none
-#define end_none enable_none
-
-struct hw_interrupt_type no_irq_type = {
- "none",
- startup_none,
- shutdown_none,
- enable_none,
- disable_none,
- ack_none,
- end_none
-};
-
-atomic_t irq_err_count;
-#ifdef CONFIG_X86_IO_APIC
-#ifdef APIC_MISMATCH_DEBUG
-atomic_t irq_mis_count;
-#endif
-#endif
-
-/*
- * Generic, controller-independent functions:
- */
-
-/*
- * Global interrupt locks for SMP. Allow interrupts to come in on any
- * CPU, yet make cli/sti act globally to protect critical regions..
- */
-
-#ifdef CONFIG_SMP
-unsigned char global_irq_holder = 0xff;
-unsigned volatile long global_irq_lock; /* pendantic: long for set_bit --RR */
-
-#define MAXCOUNT 100000000
-
-/*
- * I had a lockup scenario where a tight loop doing
- * spin_unlock()/spin_lock() on CPU#1 was racing with
- * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but
- * apparently the spin_unlock() information did not make it
- * through to CPU#0 ... nasty, is this by design, do we have to limit
- * 'memory update oscillation frequency' artificially like here?
- *
- * Such 'high frequency update' races can be avoided by careful design, but
- * some of our major constructs like spinlocks use similar techniques,
- * it would be nice to clarify this issue. Set this define to 0 if you
- * want to check whether your system freezes. I suspect the delay done
- * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but
- * i thought that such things are guaranteed by design, since we use
- * the 'LOCK' prefix.
- */
-#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 0
-
-#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND
-# define SYNC_OTHER_CORES(x) udelay(x+1)
-#else
-/*
- * We have to allow irqs to arrive between __sti and __cli
- */
-# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop")
-#endif
-
-static inline void wait_on_irq(int cpu)
-{
- for (;;) {
-
- /*
- * Wait until all interrupts are gone. Wait
- * for bottom half handlers unless we're
- * already executing in one..
- */
- if (!irqs_running())
- if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock))
- break;
-
- /* Duh, we have to loop. Release the lock to avoid deadlocks */
- clear_bit(0,&global_irq_lock);
-
- for (;;) {
- __sti();
- SYNC_OTHER_CORES(cpu);
- __cli();
- if (irqs_running())
- continue;
- if (global_irq_lock)
- continue;
- if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock))
- continue;
- if (!test_and_set_bit(0,&global_irq_lock))
- break;
- }
- }
-}
-
-/*
- * This is called when we want to synchronize with
- * interrupts. We may for example tell a device to
- * stop sending interrupts: but to make sure there
- * are no interrupts that are executing on another
- * CPU we need to call this function.
- */
-void synchronize_irq(void)
-{
- if (irqs_running()) {
- /* Stupid approach */
- cli();
- sti();
- }
-}
-
-static inline void get_irqlock(int cpu)
-{
- if (test_and_set_bit(0,&global_irq_lock)) {
- /* do we already hold the lock? */
- if ((unsigned char) cpu == global_irq_holder)
- return;
- /* Uhhuh.. Somebody else got it. Wait.. */
- do {
- do {
- rep_nop();
- } while (test_bit(0,&global_irq_lock));
- } while (test_and_set_bit(0,&global_irq_lock));
- }
- /*
- * We also to make sure that nobody else is running
- * in an interrupt context.
- */
- wait_on_irq(cpu);
-
- /*
- * Ok, finally..
- */
- global_irq_holder = cpu;
-}
-
-#define EFLAGS_IF_SHIFT 9
-
-/*
- * A global "cli()" while in an interrupt context
- * turns into just a local cli(). Interrupts
- * should use spinlocks for the (very unlikely)
- * case that they ever want to protect against
- * each other.
- *
- * If we already have local interrupts disabled,
- * this will not turn a local disable into a
- * global one (problems with spinlocks: this makes
- * save_flags+cli+sti usable inside a spinlock).
- */
-void __global_cli(void)
-{
- unsigned int flags;
-
- __save_flags(flags);
- if (flags & (1 << EFLAGS_IF_SHIFT)) {
- int cpu = smp_processor_id();
- __cli();
- if (!local_irq_count(cpu))
- get_irqlock(cpu);
- }
-}
-
-void __global_sti(void)
-{
- int cpu = smp_processor_id();
-
- if (!local_irq_count(cpu))
- release_irqlock(cpu);
- __sti();
-}
-
-/*
- * SMP flags value to restore to:
- * 0 - global cli
- * 1 - global sti
- * 2 - local cli
- * 3 - local sti
- */
-unsigned long __global_save_flags(void)
-{
- int retval;
- int local_enabled;
- unsigned long flags;
- int cpu = smp_processor_id();
-
- __save_flags(flags);
- local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1;
- /* default to local */
- retval = 2 + local_enabled;
-
- /* check for global flags if we're not in an interrupt */
- if (!local_irq_count(cpu)) {
- if (local_enabled)
- retval = 1;
- if (global_irq_holder == cpu)
- retval = 0;
- }
- return retval;
-}
-
-void __global_restore_flags(unsigned long flags)
-{
- switch (flags) {
- case 0:
- __global_cli();
- break;
- case 1:
- __global_sti();
- break;
- case 2:
- __cli();
- break;
- case 3:
- __sti();
- break;
- default:
- printk("global_restore_flags: %08lx (%08lx)\n",
- flags, (&flags)[-1]);
- }
-}
-
-#endif
-
-/*
- * This should really return information about whether
- * we should do bottom half handling etc. Right now we
- * end up _always_ checking the bottom half, which is a
- * waste of time and is not what some drivers would
- * prefer.
- */
-static int handle_IRQ_event(unsigned int irq,
- struct pt_regs * regs,
- struct irqaction * action)
-{
- int status;
- int cpu = smp_processor_id();
-
- irq_enter(cpu, irq);
-
- status = 1; /* Force the "do bottom halves" bit */
-
- if (!(action->flags & SA_INTERRUPT))
- __sti();
-
- do {
- status |= action->flags;
- action->handler(irq, action->dev_id, regs);
- action = action->next;
- } while (action);
-
- __cli();
-
- irq_exit(cpu, irq);
-
- return status;
-}
-
-/*
- * Generic enable/disable code: this just calls
- * down into the PIC-specific version for the actual
- * hardware disable after having gotten the irq
- * controller lock.
- */
-
-/**
- * disable_irq_nosync - disable an irq without waiting
- * @irq: Interrupt to disable
- *
- * Disable the selected interrupt line. Disables and Enables are
- * nested.
- * Unlike disable_irq(), this function does not ensure existing
- * instances of the IRQ handler have completed before returning.
- *
- * This function may be called from IRQ context.
- */
-
-inline void disable_irq_nosync(unsigned int irq)
-{
- irq_desc_t *desc = irq_desc + irq;
- unsigned long flags;
-
- spin_lock_irqsave(&desc->lock, flags);
- if (!desc->depth++) {
- desc->status |= IRQ_DISABLED;
- desc->handler->disable(irq);
- }
- spin_unlock_irqrestore(&desc->lock, flags);
-}
-
-/**
- * disable_irq - disable an irq and wait for completion
- * @irq: Interrupt to disable
- *
- * Disable the selected interrupt line. Enables and Disables are
- * nested.
- * This function waits for any pending IRQ handlers for this interrupt
- * to complete before returning. If you use this function while
- * holding a resource the IRQ handler may need you will deadlock.
- *
- * This function may be called - with care - from IRQ context.
- */
-
-void disable_irq(unsigned int irq)
-{
- disable_irq_nosync(irq);
-
- if (!local_irq_count(smp_processor_id())) {
- do {
- barrier();
- cpu_relax();
- } while (irq_desc[irq].status & IRQ_INPROGRESS);
- }
-}
-
-/**
- * enable_irq - enable handling of an irq
- * @irq: Interrupt to enable
- *
- * Undoes the effect of one call to disable_irq(). If this
- * matches the last disable, processing of interrupts on this
- * IRQ line is re-enabled.
- *
- * This function may be called from IRQ context.
- */
-
-void enable_irq(unsigned int irq)
-{
- irq_desc_t *desc = irq_desc + irq;
- unsigned long flags;
-
- spin_lock_irqsave(&desc->lock, flags);
- switch (desc->depth) {
- case 1: {
- unsigned int status = desc->status & ~IRQ_DISABLED;
- desc->status = status;
- if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
- desc->status = status | IRQ_REPLAY;
- hw_resend_irq(desc->handler,irq);
- }
- desc->handler->enable(irq);
- /* fall-through */
- }
- default:
- desc->depth--;
- break;
- case 0:
- printk("enable_irq(%u) unbalanced from %p\n", irq,
- __builtin_return_address(0));
- }
- spin_unlock_irqrestore(&desc->lock, flags);
-}
-
-/*
- * do_IRQ handles all normal device IRQ's (the special
- * SMP cross-CPU interrupts have their own specific
- * handlers).
- */
-asmlinkage unsigned int do_IRQ(struct pt_regs regs)
-{
- /*
- * We ack quickly, we don't want the irq controller
- * thinking we're snobs just because some other CPU has
- * disabled global interrupts (we have already done the
- * INT_ACK cycles, it's too late to try to pretend to the
- * controller that we aren't taking the interrupt).
- *
- * 0 return value means that this irq is already being
- * handled by some other CPU. (or is disabled)
- */
- int irq = regs.orig_eax & 0xff; /* high bits used in ret_from_ code */
- irq_desc_t *desc = irq_desc + irq;
- struct irqaction * action;
- unsigned int status;
-
-#ifdef PERF_COUNTERS
- int cpu = smp_processor_id();
- u32 cc_start, cc_end;
-
- perfc_incra(irqs, cpu);
- rdtscl(cc_start);
-#endif
-
- spin_lock(&desc->lock);
- desc->handler->ack(irq);
-
- /*
- REPLAY is when Linux resends an IRQ that was dropped earlier
- WAITING is used by probe to mark irqs that are being tested
- */
- status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
- status |= IRQ_PENDING; /* we _want_ to handle it */
-
- /* We hook off guest-bound IRQs for special handling. */
- if ( status & IRQ_GUEST )
- {
- __do_IRQ_guest(irq);
- spin_unlock(&desc->lock);
- return 1;
- }
-
- /*
- * If the IRQ is disabled for whatever reason, we cannot use the action we
- * have.
- */
- action = NULL;
- if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
- action = desc->action;
- status &= ~IRQ_PENDING; /* we commit to handling */
- status |= IRQ_INPROGRESS; /* we are handling it */
- }
- desc->status = status;
-
- /*
- * If there is no IRQ handler or it was disabled, exit early. Since we set
- * PENDING, if another processor is handling a different instance of this
- * same irq, the other processor will take care of it.
- */
- if (!action)
- goto out;
-
- /*
- * Edge triggered interrupts need to remember pending events. This applies
- * to any hw interrupts that allow a second instance of the same irq to
- * arrive while we are in do_IRQ or in the handler. But the code here only
- * handles the _second_ instance of the irq, not the third or fourth. So
- * it is mostly useful for irq hardware that does not mask cleanly in an
- * SMP environment.
- */
- for (;;) {
- spin_unlock(&desc->lock);
- handle_IRQ_event(irq, ®s, action);
- spin_lock(&desc->lock);
-
- if (!(desc->status & IRQ_PENDING))
- break;
- desc->status &= ~IRQ_PENDING;
- }
- desc->status &= ~IRQ_INPROGRESS;
- out:
- /*
- * The ->end() handler has to deal with interrupts which got disabled
- * while the handler was running.
- */
- desc->handler->end(irq);
- spin_unlock(&desc->lock);
-
-#ifdef PERF_COUNTERS
- rdtscl(cc_end);
-
- if ( !action || (!(action->flags & SA_NOPROFILE)) )
- {
- perfc_adda(irq_time, cpu, cc_end - cc_start);
-#ifndef NDEBUG
- if ( (cc_end - cc_start) > (cpu_khz * 100) )
- printk("Long interrupt %08x -> %08x\n", cc_start, cc_end);
-#endif
- }
-#endif
-
- return 1;
-}
-
-/**
- * request_irq - allocate an interrupt line
- * @irq: Interrupt line to allocate
- * @handler: Function to be called when the IRQ occurs
- * @irqflags: Interrupt type flags
- * @devname: An ascii name for the claiming device
- * @dev_id: A cookie passed back to the handler function
- *
- * This call allocates interrupt resources and enables the
- * interrupt line and IRQ handling. From the point this
- * call is made your handler function may be invoked. Since
- * your handler function must clear any interrupt the board
- * raises, you must take care both to initialise your hardware
- * and to set up the interrupt handler in the right order.
- *
- * Dev_id must be globally unique. Normally the address of the
- * device data structure is used as the cookie. Since the handler
- * receives this value it makes sense to use it.
- *
- * If your interrupt is shared you must pass a non NULL dev_id
- * as this is required when freeing the interrupt.
- *
- * Flags:
- *
- * SA_SHIRQ Interrupt is shared
- *
- * SA_INTERRUPT Disable local interrupts while processing
- */
-
-int request_irq(unsigned int irq,
- void (*handler)(int, void *, struct pt_regs *),
- unsigned long irqflags,
- const char * devname,
- void *dev_id)
-{
- int retval;
- struct irqaction * action;
-
- if (irq >= NR_IRQS)
- return -EINVAL;
- if (!handler)
- return -EINVAL;
-
- action = (struct irqaction *)
- kmalloc(sizeof(struct irqaction), GFP_KERNEL);
- if (!action)
- return -ENOMEM;
-
- action->handler = handler;
- action->flags = irqflags;
- action->mask = 0;
- action->name = devname;
- action->next = NULL;
- action->dev_id = dev_id;
-
- retval = setup_irq(irq, action);
- if (retval)
- kfree(action);
-
- return retval;
-}
-
-/**
- * free_irq - free an interrupt
- * @irq: Interrupt line to free
- * @dev_id: Device identity to free
- *
- * Remove an interrupt handler. The handler is removed and if the
- * interrupt line is no longer in use by any driver it is disabled.
- * On a shared IRQ the caller must ensure the interrupt is disabled
- * on the card it drives before calling this function. The function
- * does not return until any executing interrupts for this IRQ
- * have completed.
- *
- * This function may be called from interrupt context.
- *
- * Bugs: Attempting to free an irq in a handler for the same irq hangs
- * the machine.
- */
-
-void free_irq(unsigned int irq, void *dev_id)
-{
- irq_desc_t *desc;
- struct irqaction **p;
- unsigned long flags;
-
- if (irq >= NR_IRQS)
- return;
-
- desc = irq_desc + irq;
- spin_lock_irqsave(&desc->lock,flags);
- p = &desc->action;
- for (;;) {
- struct irqaction * action = *p;
- if (action) {
- struct irqaction **pp = p;
- p = &action->next;
- if (action->dev_id != dev_id)
- continue;
-
- /* Found it - now remove it from the list of entries */
- *pp = action->next;
- if (!desc->action) {
- desc->status |= IRQ_DISABLED;
- desc->handler->shutdown(irq);
- }
- spin_unlock_irqrestore(&desc->lock,flags);
-
-#ifdef CONFIG_SMP
- /* Wait to make sure it's not being used on another CPU */
- while (desc->status & IRQ_INPROGRESS) {
- barrier();
- cpu_relax();
- }
-#endif
- kfree(action);
- return;
- }
- printk("Trying to free free IRQ%d\n",irq);
- spin_unlock_irqrestore(&desc->lock,flags);
- return;
- }
-}
-
-/*
- * IRQ autodetection code..
- *
- * This depends on the fact that any interrupt that
- * comes in on to an unassigned handler will get stuck
- * with "IRQ_WAITING" cleared and the interrupt
- * disabled.
- */
-
-static spinlock_t probe_sem = SPIN_LOCK_UNLOCKED;
-
-/**
- * probe_irq_on - begin an interrupt autodetect
- *
- * Commence probing for an interrupt. The interrupts are scanned
- * and a mask of potential interrupt lines is returned.
- *
- */
-
-unsigned long probe_irq_on(void)
-{
- unsigned int i;
- irq_desc_t *desc;
- unsigned long val;
- unsigned long s=0, e=0;
-
- spin_lock(&probe_sem);
- /*
- * something may have generated an irq long ago and we want to
- * flush such a longstanding irq before considering it as spurious.
- */
- for (i = NR_IRQS-1; i > 0; i--) {
- desc = irq_desc + i;
-
- spin_lock_irq(&desc->lock);
- if (!irq_desc[i].action)
- irq_desc[i].handler->startup(i);
- spin_unlock_irq(&desc->lock);
- }
-
- /* Wait for longstanding interrupts to trigger (20ms delay). */
- rdtscl(s);
- do {
- synchronize_irq();
- rdtscl(e);
- } while ( ((e-s)/ticks_per_usec) < 20000 );
-
- /*
- * enable any unassigned irqs
- * (we must startup again here because if a longstanding irq
- * happened in the previous stage, it may have masked itself)
- */
- for (i = NR_IRQS-1; i > 0; i--) {
- desc = irq_desc + i;
-
- spin_lock_irq(&desc->lock);
- if (!desc->action) {
- desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
- if (desc->handler->startup(i))
- desc->status |= IRQ_PENDING;
- }
- spin_unlock_irq(&desc->lock);
- }
-
- /*
- * Wait for spurious interrupts to trigger (100ms delay).
- */
- rdtscl(s);
- do {
- synchronize_irq();
- rdtscl(e);
- } while ( ((e-s)/ticks_per_usec) < 100000 );
-
- /*
- * Now filter out any obviously spurious interrupts
- */
- val = 0;
- for (i = 0; i < NR_IRQS; i++) {
- irq_desc_t *desc = irq_desc + i;
- unsigned int status;
-
- spin_lock_irq(&desc->lock);
- status = desc->status;
-
- if (status & IRQ_AUTODETECT) {
- /* It triggered already - consider it spurious. */
- if (!(status & IRQ_WAITING)) {
- desc->status = status & ~IRQ_AUTODETECT;
- desc->handler->shutdown(i);
- } else
- if (i < 32)
- val |= 1 << i;
- }
- spin_unlock_irq(&desc->lock);
- }
-
- return val;
-}
-
-/*
- * Return a mask of triggered interrupts (this
- * can handle only legacy ISA interrupts).
- */
-
-/**
- * probe_irq_mask - scan a bitmap of interrupt lines
- * @val: mask of interrupts to consider
- *
- * Scan the ISA bus interrupt lines and return a bitmap of
- * active interrupts. The interrupt probe logic state is then
- * returned to its previous value.
- *
- * Note: we need to scan all the irq's even though we will
- * only return ISA irq numbers - just so that we reset them
- * all to a known state.
- */
-unsigned int probe_irq_mask(unsigned long val)
-{
- int i;
- unsigned int mask;
-
- mask = 0;
- for (i = 0; i < NR_IRQS; i++) {
- irq_desc_t *desc = irq_desc + i;
- unsigned int status;
-
- spin_lock_irq(&desc->lock);
- status = desc->status;
-
- if (status & IRQ_AUTODETECT) {
- if (i < 16 && !(status & IRQ_WAITING))
- mask |= 1 << i;
-
- desc->status = status & ~IRQ_AUTODETECT;
- desc->handler->shutdown(i);
- }
- spin_unlock_irq(&desc->lock);
- }
- spin_unlock(&probe_sem);
-
- return mask & val;
-}
-
-/*
- * Return the one interrupt that triggered (this can
- * handle any interrupt source).
- */
-
-/**
- * probe_irq_off - end an interrupt autodetect
- * @val: mask of potential interrupts (unused)
- *
- * Scans the unused interrupt lines and returns the line which
- * appears to have triggered the interrupt. If no interrupt was
- * found then zero is returned. If more than one interrupt is
- * found then minus the first candidate is returned to indicate
- * their is doubt.
- *
- * The interrupt probe logic state is returned to its previous
- * value.
- *
- * BUGS: When used in a module (which arguably shouldnt happen)
- * nothing prevents two IRQ probe callers from overlapping. The
- * results of this are non-optimal.
- */
-
-int probe_irq_off(unsigned long val)
-{
- int i, irq_found, nr_irqs;
-
- nr_irqs = 0;
- irq_found = 0;
- for (i = 0; i < NR_IRQS; i++) {
- irq_desc_t *desc = irq_desc + i;
- unsigned int status;
-
- spin_lock_irq(&desc->lock);
- status = desc->status;
-
- if (status & IRQ_AUTODETECT) {
- if (!(status & IRQ_WAITING)) {
- if (!nr_irqs)
- irq_found = i;
- nr_irqs++;
- }
- desc->status = status & ~IRQ_AUTODETECT;
- desc->handler->shutdown(i);
- }
- spin_unlock_irq(&desc->lock);
- }
- spin_unlock(&probe_sem);
-
- if (nr_irqs > 1)
- irq_found = -irq_found;
- return irq_found;
-}
-
-/* this was setup_x86_irq but it seems pretty generic */
-int setup_irq(unsigned int irq, struct irqaction * new)
-{
- int shared = 0;
- unsigned long flags;
- struct irqaction *old, **p;
- irq_desc_t *desc = irq_desc + irq;
-
- /*
- * The following block of code has to be executed atomically
- */
- spin_lock_irqsave(&desc->lock,flags);
-
- if ( desc->status & IRQ_GUEST )
- {
- spin_unlock_irqrestore(&desc->lock,flags);
- return -EBUSY;
- }
-
- p = &desc->action;
- if ((old = *p) != NULL) {
- /* Can't share interrupts unless both agree to */
- if (!(old->flags & new->flags & SA_SHIRQ)) {
- spin_unlock_irqrestore(&desc->lock,flags);
- return -EBUSY;
- }
-
- /* add new interrupt at end of irq queue */
- do {
- p = &old->next;
- old = *p;
- } while (old);
- shared = 1;
- }
-
- *p = new;
-
- if (!shared) {
- desc->depth = 0;
- desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING);
- desc->handler->startup(irq);
- }
-
- spin_unlock_irqrestore(&desc->lock,flags);
-
- return 0;
-}
-
-
-
-/*
- * HANDLING OF GUEST-BOUND PHYSICAL IRQS
- */
-
-#define IRQ_MAX_GUESTS 7
-typedef struct {
- u8 nr_guests;
- u8 in_flight;
- u8 shareable;
- struct task_struct *guest[IRQ_MAX_GUESTS];
-} irq_guest_action_t;
-
-static void __do_IRQ_guest(int irq)
-{
- irq_desc_t *desc = &irq_desc[irq];
- irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
- struct task_struct *p;
- int i;
-
- for ( i = 0; i < action->nr_guests; i++ )
- {
- p = action->guest[i];
- if ( !test_and_set_bit(irq, &p->pirq_mask) )
- action->in_flight++;
- send_guest_pirq(p, irq);
- }
-}
-
-int pirq_guest_unmask(struct task_struct *p)
-{
- irq_desc_t *desc;
- int i, j, pirq;
- u32 m;
- shared_info_t *s = p->shared_info;
-
- for ( i = 0; i < 2; i++ )
- {
- m = p->pirq_mask[i];
- while ( (j = ffs(m)) != 0 )
- {
- m &= ~(1 << --j);
- pirq = (i << 5) + j;
- desc = &irq_desc[pirq];
- spin_lock_irq(&desc->lock);
- if ( !test_bit(p->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) &&
- test_and_clear_bit(pirq, &p->pirq_mask) &&
- (--((irq_guest_action_t *)desc->action)->in_flight == 0) )
- desc->handler->end(pirq);
- spin_unlock_irq(&desc->lock);
- }
- }
-
- return 0;
-}
-
-int pirq_guest_bind(struct task_struct *p, int irq, int will_share)
-{
- unsigned long flags;
- irq_desc_t *desc = &irq_desc[irq];
- irq_guest_action_t *action;
- int rc = 0;
-
- if ( !IS_CAPABLE_PHYSDEV(p) )
- return -EPERM;
-
- spin_lock_irqsave(&desc->lock, flags);
-
- action = (irq_guest_action_t *)desc->action;
-
- if ( !(desc->status & IRQ_GUEST) )
- {
- if ( desc->action != NULL )
- {
- DPRINTK("Cannot bind IRQ %d to guest. In use by '%s'.\n",
- irq, desc->action->name);
- rc = -EBUSY;
- goto out;
- }
-
- action = kmalloc(sizeof(irq_guest_action_t), GFP_KERNEL);
- if ( (desc->action = (struct irqaction *)action) == NULL )
- {
- DPRINTK("Cannot bind IRQ %d to guest. Out of memory.\n", irq);
- rc = -ENOMEM;
- goto out;
- }
-
- action->nr_guests = 0;
- action->in_flight = 0;
- action->shareable = will_share;
-
- desc->depth = 0;
- desc->status |= IRQ_GUEST;
- desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING);
- desc->handler->startup(irq);
-
- /* Attempt to bind the interrupt target to the correct CPU. */
- if ( desc->handler->set_affinity != NULL )
- desc->handler->set_affinity(
- irq, apicid_to_phys_cpu_present(p->processor));
- }
- else if ( !will_share || !action->shareable )
- {
- DPRINTK("Cannot bind IRQ %d to guest. Will not share with others.\n",
- irq);
- rc = -EBUSY;
- goto out;
- }
-
- if ( action->nr_guests == IRQ_MAX_GUESTS )
- {
- DPRINTK("Cannot bind IRQ %d to guest. Already at max share.\n", irq);
- rc = -EBUSY;
- goto out;
- }
-
- action->guest[action->nr_guests++] = p;
-
- out:
- spin_unlock_irqrestore(&desc->lock, flags);
- return rc;
-}
-
-int pirq_guest_unbind(struct task_struct *p, int irq)
-{
- unsigned long flags;
- irq_desc_t *desc = &irq_desc[irq];
- irq_guest_action_t *action;
- int i;
-
- spin_lock_irqsave(&desc->lock, flags);
-
- action = (irq_guest_action_t *)desc->action;
-
- if ( test_and_clear_bit(irq, &p->pirq_mask) &&
- (--action->in_flight == 0) )
- desc->handler->end(irq);
-
- if ( action->nr_guests == 1 )
- {
- desc->action = NULL;
- kfree(action);
- desc->status |= IRQ_DISABLED;
- desc->status &= ~IRQ_GUEST;
- desc->handler->shutdown(irq);
- }
- else
- {
- i = 0;
- while ( action->guest[i] != p )
- i++;
- memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
- action->nr_guests--;
- }
-
- spin_unlock_irqrestore(&desc->lock, flags);
- return 0;
-}
+++ /dev/null
-/******************************************************************************
- * arch/i386/mm.c
- *
- * Modifications to Linux original are copyright (c) 2002-2003, K A Fraser
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <xen/config.h>
-#include <xen/lib.h>
-#include <xen/init.h>
-#include <xen/mm.h>
-#include <asm/page.h>
-#include <asm/pgalloc.h>
-#include <asm/fixmap.h>
-#include <asm/domain_page.h>
-
-static inline void set_pte_phys(unsigned long vaddr,
- l1_pgentry_t entry)
-{
- l2_pgentry_t *l2ent;
- l1_pgentry_t *l1ent;
-
- l2ent = &idle_pg_table[l2_table_offset(vaddr)];
- l1ent = l2_pgentry_to_l1(*l2ent) + l1_table_offset(vaddr);
- *l1ent = entry;
-
- /* It's enough to flush this one mapping. */
- __flush_tlb_one(vaddr);
-}
-
-
-void __set_fixmap(enum fixed_addresses idx,
- l1_pgentry_t entry)
-{
- unsigned long address = __fix_to_virt(idx);
-
- if ( likely(idx < __end_of_fixed_addresses) )
- set_pte_phys(address, entry);
- else
- printk("Invalid __set_fixmap\n");
-}
-
-
-static void __init fixrange_init(unsigned long start,
- unsigned long end,
- l2_pgentry_t *pg_base)
-{
- l2_pgentry_t *l2e;
- int i;
- unsigned long vaddr, page;
-
- vaddr = start;
- i = l2_table_offset(vaddr);
- l2e = pg_base + i;
-
- for ( ; (i < ENTRIES_PER_L2_PAGETABLE) && (vaddr != end); l2e++, i++ )
- {
- if ( !l2_pgentry_empty(*l2e) )
- continue;
- page = (unsigned long)get_free_page(GFP_KERNEL);
- clear_page(page);
- *l2e = mk_l2_pgentry(__pa(page) | __PAGE_HYPERVISOR);
- vaddr += 1 << L2_PAGETABLE_SHIFT;
- }
-}
-
-void __init paging_init(void)
-{
- unsigned long addr;
- void *ioremap_pt;
- int i;
-
- /* Idle page table 1:1 maps the first part of physical memory. */
- for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- idle_pg_table[i] =
- mk_l2_pgentry((i << L2_PAGETABLE_SHIFT) |
- __PAGE_HYPERVISOR | _PAGE_PSE);
-
- /*
- * Fixed mappings, only the page table structure has to be
- * created - mappings will be set by set_fixmap():
- */
- addr = FIXADDR_START & ~((1<<L2_PAGETABLE_SHIFT)-1);
- fixrange_init(addr, 0, idle_pg_table);
-
- /* Create page table for ioremap(). */
- ioremap_pt = (void *)get_free_page(GFP_KERNEL);
- clear_page(ioremap_pt);
- idle_pg_table[IOREMAP_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(ioremap_pt) | __PAGE_HYPERVISOR);
-
- /* Create read-only mapping of MPT for guest-OS use. */
- idle_pg_table[READONLY_MPT_VIRT_START >> L2_PAGETABLE_SHIFT] =
- idle_pg_table[RDWR_MPT_VIRT_START >> L2_PAGETABLE_SHIFT];
- mk_l2_readonly(idle_pg_table +
- (READONLY_MPT_VIRT_START >> L2_PAGETABLE_SHIFT));
-
- /* Set up mapping cache for domain pages. */
- mapcache = (unsigned long *)get_free_page(GFP_KERNEL);
- clear_page(mapcache);
- idle_pg_table[MAPCACHE_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(mapcache) | __PAGE_HYPERVISOR);
-
- /* Set up linear page table mapping. */
- idle_pg_table[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR);
-
-}
-
-void __init zap_low_mappings(void)
-{
- int i;
- for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- idle_pg_table[i] = mk_l2_pgentry(0);
- flush_tlb_all_pge();
-}
-
-
-long do_stack_switch(unsigned long ss, unsigned long esp)
-{
- int nr = smp_processor_id();
- struct tss_struct *t = &init_tss[nr];
-
- /* We need to do this check as we load and use SS on guest's behalf. */
- if ( (ss & 3) == 0 )
- return -EPERM;
-
- current->thread.guestos_ss = ss;
- current->thread.guestos_sp = esp;
- t->ss1 = ss;
- t->esp1 = esp;
-
- return 0;
-}
-
-
-/* Returns TRUE if given descriptor is valid for GDT or LDT. */
-int check_descriptor(unsigned long a, unsigned long b)
-{
- unsigned long base, limit;
-
- /* A not-present descriptor will always fault, so is safe. */
- if ( !(b & _SEGMENT_P) )
- goto good;
-
- /*
- * We don't allow a DPL of zero. There is no legitimate reason for
- * specifying DPL==0, and it gets rather dangerous if we also accept call
- * gates (consider a call gate pointing at another guestos descriptor with
- * DPL 0 -- this would get the OS ring-0 privileges).
- */
- if ( (b & _SEGMENT_DPL) == 0 )
- goto bad;
-
- if ( !(b & _SEGMENT_S) )
- {
- /*
- * System segment:
- * 1. Don't allow interrupt or trap gates as they belong in the IDT.
- * 2. Don't allow TSS descriptors or task gates as we don't
- * virtualise x86 tasks.
- * 3. Don't allow LDT descriptors because they're unnecessary and
- * I'm uneasy about allowing an LDT page to contain LDT
- * descriptors. In any case, Xen automatically creates the
- * required descriptor when reloading the LDT register.
- * 4. We allow call gates but they must not jump to a private segment.
- */
-
- /* Disallow everything but call gates. */
- if ( (b & _SEGMENT_TYPE) != 0xc00 )
- goto bad;
-
- /* Can't allow far jump to a Xen-private segment. */
- if ( !VALID_CODESEL(a>>16) )
- goto bad;
-
- /* Reserved bits must be zero. */
- if ( (b & 0xe0) != 0 )
- goto bad;
-
- /* No base/limit check is needed for a call gate. */
- goto good;
- }
-
- /* Check that base/limit do not overlap Xen-private space. */
- base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
- limit = (b&0xf0000) | (a&0xffff);
- limit++; /* We add one because limit is inclusive. */
- if ( (b & _SEGMENT_G) )
- limit <<= 12;
- if ( ((base + limit) <= base) ||
- ((base + limit) > PAGE_OFFSET) )
- goto bad;
-
- good:
- return 1;
- bad:
- return 0;
-}
-
-
-long set_gdt(struct task_struct *p,
- unsigned long *frames,
- unsigned int entries)
-{
- /* NB. There are 512 8-byte entries per GDT page. */
- int i, nr_pages = (entries + 511) / 512;
- unsigned long pfn;
- struct desc_struct *vgdt;
-
- /* Check the new GDT. */
- for ( i = 0; i < nr_pages; i++ )
- {
- if ( unlikely(frames[i] >= max_page) ||
- unlikely(!get_page_and_type(&frame_table[frames[i]],
- p, PGT_gdt_page)) )
- goto fail;
- }
-
- /* Copy reserved GDT entries to the new GDT. */
- vgdt = map_domain_mem(frames[0] << PAGE_SHIFT);
- memcpy(vgdt + FIRST_RESERVED_GDT_ENTRY,
- gdt_table + FIRST_RESERVED_GDT_ENTRY,
- NR_RESERVED_GDT_ENTRIES*8);
- unmap_domain_mem(vgdt);
-
- /* Tear down the old GDT. */
- for ( i = 0; i < 16; i++ )
- {
- if ( (pfn = l1_pgentry_to_pagenr(p->mm.perdomain_pt[i])) != 0 )
- put_page_and_type(&frame_table[pfn]);
- p->mm.perdomain_pt[i] = mk_l1_pgentry(0);
- }
-
- /* Install the new GDT. */
- for ( i = 0; i < nr_pages; i++ )
- p->mm.perdomain_pt[i] =
- mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR);
-
- SET_GDT_ADDRESS(p, GDT_VIRT_START);
- SET_GDT_ENTRIES(p, (entries*8)-1);
-
- return 0;
-
- fail:
- while ( i-- > 0 )
- put_page_and_type(&frame_table[frames[i]]);
- return -EINVAL;
-}
-
-
-long do_set_gdt(unsigned long *frame_list, unsigned int entries)
-{
- int nr_pages = (entries + 511) / 512;
- unsigned long frames[16];
- long ret;
-
- if ( (entries <= LAST_RESERVED_GDT_ENTRY) || (entries > 8192) )
- return -EINVAL;
-
- if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) )
- return -EFAULT;
-
- if ( (ret = set_gdt(current, frames, entries)) == 0 )
- {
- local_flush_tlb();
- __asm__ __volatile__ ("lgdt %0" : "=m" (*current->mm.gdt));
- }
-
- return ret;
-}
-
-
-long do_update_descriptor(
- unsigned long pa, unsigned long word1, unsigned long word2)
-{
- unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT;
- struct pfn_info *page;
- long ret = -EINVAL;
-
- if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(word1, word2) )
- return -EINVAL;
-
- page = &frame_table[pfn];
- if ( unlikely(!get_page(page, current)) )
- goto out;
-
- /* Check if the given frame is in use in an unsafe context. */
- switch ( page->type_and_flags & PGT_type_mask )
- {
- case PGT_gdt_page:
- /* Disallow updates of Xen-reserved descriptors in the current GDT. */
- if ( (l1_pgentry_to_pagenr(current->mm.perdomain_pt[0]) == pfn) &&
- (((pa&(PAGE_SIZE-1))>>3) >= FIRST_RESERVED_GDT_ENTRY) &&
- (((pa&(PAGE_SIZE-1))>>3) <= LAST_RESERVED_GDT_ENTRY) )
- goto out;
- if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
- goto out;
- break;
- case PGT_ldt_page:
- if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
- goto out;
- break;
- default:
- if ( unlikely(!get_page_type(page, PGT_writeable_page)) )
- goto out;
- break;
- }
-
- /* All is good so make the update. */
- gdt_pent = map_domain_mem(pa);
- gdt_pent[0] = word1;
- gdt_pent[1] = word2;
- unmap_domain_mem(gdt_pent);
-
- put_page_type(page);
-
- ret = 0; /* success */
-
- out:
- put_page(page);
- return ret;
-}
-
-#ifdef MEMORY_GUARD
-
-void *memguard_init(void *heap_start)
-{
- l1_pgentry_t *l1;
- int i, j;
-
- /* Round the allocation pointer up to a page boundary. */
- heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) &
- PAGE_MASK);
-
- /* Memory guarding is incompatible with super pages. */
- for ( i = 0; i < (MAX_MONITOR_ADDRESS >> L2_PAGETABLE_SHIFT); i++ )
- {
- l1 = (l1_pgentry_t *)heap_start;
- heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE);
- for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
- l1[j] = mk_l1_pgentry((i << L2_PAGETABLE_SHIFT) |
- (j << L1_PAGETABLE_SHIFT) |
- __PAGE_HYPERVISOR);
- idle_pg_table[i] = idle_pg_table[i + l2_table_offset(PAGE_OFFSET)] =
- mk_l2_pgentry(virt_to_phys(l1) | __PAGE_HYPERVISOR);
- }
-
- return heap_start;
-}
-
-static void __memguard_change_range(void *p, unsigned long l, int guard)
-{
- l1_pgentry_t *l1;
- l2_pgentry_t *l2;
- unsigned long _p = (unsigned long)p;
- unsigned long _l = (unsigned long)l;
-
- /* Ensure we are dealing with a page-aligned whole number of pages. */
- ASSERT((_p&PAGE_MASK) != 0);
- ASSERT((_l&PAGE_MASK) != 0);
- ASSERT((_p&~PAGE_MASK) == 0);
- ASSERT((_l&~PAGE_MASK) == 0);
-
- while ( _l != 0 )
- {
- l2 = &idle_pg_table[l2_table_offset(_p)];
- l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
- if ( guard )
- *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) & ~_PAGE_PRESENT);
- else
- *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) | _PAGE_PRESENT);
- _p += PAGE_SIZE;
- _l -= PAGE_SIZE;
- }
-}
-
-void memguard_guard_range(void *p, unsigned long l)
-{
- __memguard_change_range(p, l, 1);
- local_flush_tlb();
-}
-
-void memguard_unguard_range(void *p, unsigned long l)
-{
- __memguard_change_range(p, l, 0);
-}
-
-int memguard_is_guarded(void *p)
-{
- l1_pgentry_t *l1;
- l2_pgentry_t *l2;
- unsigned long _p = (unsigned long)p;
- l2 = &idle_pg_table[l2_table_offset(_p)];
- l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
- return !(l1_pgentry_val(*l1) & _PAGE_PRESENT);
-}
-
-#endif
+++ /dev/null
-/*
- * Intel Multiprocessor Specificiation 1.1 and 1.4
- * compliant MP-table parsing routines.
- *
- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- * Fixes
- * Erich Boleyn : MP v1.4 and additional changes.
- * Alan Cox : Added EBDA scanning
- * Ingo Molnar : various cleanups and rewrites
- * Maciej W. Rozycki: Bits for default MP configurations
- * Paul Diefenbaugh: Added full ACPI support
- */
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/lib.h>
-#include <xen/kernel.h>
-#include <xen/irq.h>
-#include <xen/smp.h>
-#include <xen/mm.h>
-#include <xen/acpi.h>
-#include <asm/acpi.h>
-#include <asm/io.h>
-#include <asm/apic.h>
-#include <asm/mpspec.h>
-#include <asm/pgalloc.h>
-#include <asm/smpboot.h>
-
-int numnodes = 1; /* XXX Xen */
-
-/* Have we found an MP table */
-int smp_found_config;
-
-/*
- * Various Linux-internal data structures created from the
- * MP-table.
- */
-int apic_version [MAX_APICS];
-int quad_local_to_mp_bus_id [NR_CPUS/4][4];
-int mp_current_pci_id;
-int *mp_bus_id_to_type;
-int *mp_bus_id_to_node;
-int *mp_bus_id_to_local;
-int *mp_bus_id_to_pci_bus;
-int max_mp_busses;
-int max_irq_sources;
-
-/* I/O APIC entries */
-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
-
-/* # of MP IRQ source entries */
-struct mpc_config_intsrc *mp_irqs;
-
-/* MP IRQ source entries */
-int mp_irq_entries;
-
-int nr_ioapics;
-
-int pic_mode;
-unsigned long mp_lapic_addr;
-
-/* Processor that is doing the boot up */
-unsigned int boot_cpu_physical_apicid = -1U;
-unsigned int boot_cpu_logical_apicid = -1U;
-/* Internal processor count */
-static unsigned int num_processors;
-
-/* Bitmask of physically existing CPUs */
-unsigned long phys_cpu_present_map;
-unsigned long logical_cpu_present_map;
-
-#ifdef CONFIG_X86_CLUSTERED_APIC
-unsigned char esr_disable = 0;
-unsigned char clustered_apic_mode = CLUSTERED_APIC_NONE;
-unsigned int apic_broadcast_id = APIC_BROADCAST_ID_APIC;
-#endif
-unsigned char raw_phys_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-
-/*
- * Intel MP BIOS table parsing routines:
- */
-
-#ifndef CONFIG_X86_VISWS_APIC
-/*
- * Checksum an MP configuration block.
- */
-
-static int __init mpf_checksum(unsigned char *mp, int len)
-{
- int sum = 0;
-
- while (len--)
- sum += *mp++;
-
- return sum & 0xFF;
-}
-
-/*
- * Processor encoding in an MP configuration block
- */
-
-static char __init *mpc_family(int family,int model)
-{
- static char n[32];
- static char *model_defs[]=
- {
- "80486DX","80486DX",
- "80486SX","80486DX/2 or 80487",
- "80486SL","80486SX/2",
- "Unknown","80486DX/2-WB",
- "80486DX/4","80486DX/4-WB"
- };
-
- switch (family) {
- case 0x04:
- if (model < 10)
- return model_defs[model];
- break;
-
- case 0x05:
- return("Pentium(tm)");
-
- case 0x06:
- return("Pentium(tm) Pro");
-
- case 0x0F:
- if (model == 0x00)
- return("Pentium 4(tm)");
- if (model == 0x01)
- return("Pentium 4(tm)");
- if (model == 0x02)
- return("Pentium 4(tm) XEON(tm)");
- if (model == 0x0F)
- return("Special controller");
- }
- sprintf(n,"Unknown CPU [%d:%d]",family, model);
- return n;
-}
-
-/*
- * Have to match translation table entries to main table entries by counter
- * hence the mpc_record variable .... can't see a less disgusting way of
- * doing this ....
- */
-
-static int mpc_record;
-static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata;
-
-void __init MP_processor_info (struct mpc_config_processor *m)
-{
- int ver, quad, logical_apicid;
-
- if (!(m->mpc_cpuflag & CPU_ENABLED))
- return;
-
- logical_apicid = m->mpc_apicid;
- if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
- quad = translation_table[mpc_record]->trans_quad;
- logical_apicid = (quad << 4) +
- (m->mpc_apicid ? m->mpc_apicid << 1 : 1);
- printk("Processor #%d %s APIC version %d (quad %d, apic %d)\n",
- m->mpc_apicid,
- mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 ,
- (m->mpc_cpufeature & CPU_MODEL_MASK)>>4),
- m->mpc_apicver, quad, logical_apicid);
- } else {
- printk("Processor #%d %s APIC version %d\n",
- m->mpc_apicid,
- mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 ,
- (m->mpc_cpufeature & CPU_MODEL_MASK)>>4),
- m->mpc_apicver);
- }
-
- if (m->mpc_featureflag&(1<<0))
- Dprintk(" Floating point unit present.\n");
- if (m->mpc_featureflag&(1<<7))
- Dprintk(" Machine Exception supported.\n");
- if (m->mpc_featureflag&(1<<8))
- Dprintk(" 64 bit compare & exchange supported.\n");
- if (m->mpc_featureflag&(1<<9))
- Dprintk(" Internal APIC present.\n");
- if (m->mpc_featureflag&(1<<11))
- Dprintk(" SEP present.\n");
- if (m->mpc_featureflag&(1<<12))
- Dprintk(" MTRR present.\n");
- if (m->mpc_featureflag&(1<<13))
- Dprintk(" PGE present.\n");
- if (m->mpc_featureflag&(1<<14))
- Dprintk(" MCA present.\n");
- if (m->mpc_featureflag&(1<<15))
- Dprintk(" CMOV present.\n");
- if (m->mpc_featureflag&(1<<16))
- Dprintk(" PAT present.\n");
- if (m->mpc_featureflag&(1<<17))
- Dprintk(" PSE present.\n");
- if (m->mpc_featureflag&(1<<18))
- Dprintk(" PSN present.\n");
- if (m->mpc_featureflag&(1<<19))
- Dprintk(" Cache Line Flush Instruction present.\n");
- /* 20 Reserved */
- if (m->mpc_featureflag&(1<<21))
- Dprintk(" Debug Trace and EMON Store present.\n");
- if (m->mpc_featureflag&(1<<22))
- Dprintk(" ACPI Thermal Throttle Registers present.\n");
- if (m->mpc_featureflag&(1<<23))
- Dprintk(" MMX present.\n");
- if (m->mpc_featureflag&(1<<24))
- Dprintk(" FXSR present.\n");
- if (m->mpc_featureflag&(1<<25))
- Dprintk(" XMM present.\n");
- if (m->mpc_featureflag&(1<<26))
- Dprintk(" Willamette New Instructions present.\n");
- if (m->mpc_featureflag&(1<<27))
- Dprintk(" Self Snoop present.\n");
- if (m->mpc_featureflag&(1<<28))
- Dprintk(" HT present.\n");
- if (m->mpc_featureflag&(1<<29))
- Dprintk(" Thermal Monitor present.\n");
- /* 30, 31 Reserved */
-
-
- if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
- Dprintk(" Bootup CPU\n");
- boot_cpu_physical_apicid = m->mpc_apicid;
- boot_cpu_logical_apicid = logical_apicid;
- }
-
- if (num_processors >= NR_CPUS){
- printk(KERN_WARNING "NR_CPUS limit of %i reached. Cannot "
- "boot CPU(apicid 0x%x).\n", NR_CPUS, m->mpc_apicid);
- return;
- }
- num_processors++;
-
- if (m->mpc_apicid > MAX_APICS) {
- printk("Processor #%d INVALID. (Max ID: %d).\n",
- m->mpc_apicid, MAX_APICS);
- --num_processors;
- return;
- }
- ver = m->mpc_apicver;
-
- logical_cpu_present_map |= 1 << (num_processors-1);
- phys_cpu_present_map |= apicid_to_phys_cpu_present(m->mpc_apicid);
-
- /*
- * Validate version
- */
- if (ver == 0x0) {
- printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
- ver = 0x10;
- }
- apic_version[m->mpc_apicid] = ver;
- raw_phys_apicid[num_processors - 1] = m->mpc_apicid;
-}
-
-static void __init MP_bus_info (struct mpc_config_bus *m)
-{
- char str[7];
- int quad;
-
- memcpy(str, m->mpc_bustype, 6);
- str[6] = 0;
-
- if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
- quad = translation_table[mpc_record]->trans_quad;
- mp_bus_id_to_node[m->mpc_busid] = quad;
- mp_bus_id_to_local[m->mpc_busid] = translation_table[mpc_record]->trans_local;
- quad_local_to_mp_bus_id[quad][translation_table[mpc_record]->trans_local] = m->mpc_busid;
- printk("Bus #%d is %s (node %d)\n", m->mpc_busid, str, quad);
- } else {
- Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
- }
-
- if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
- } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
- } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
- mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
- mp_current_pci_id++;
- } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
- } else {
- printk("Unknown bustype %s - ignoring\n", str);
- }
-}
-
-static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
-{
- if (!(m->mpc_flags & MPC_APIC_USABLE))
- return;
-
- printk("I/O APIC #%d Version %d at 0x%lX.\n",
- m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
- if (nr_ioapics >= MAX_IO_APICS) {
- printk("Max # of I/O APICs (%d) exceeded (found %d).\n",
- MAX_IO_APICS, nr_ioapics);
- panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
- }
- if (!m->mpc_apicaddr) {
- printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
- " found in MP table, skipping!\n");
- return;
- }
- mp_ioapics[nr_ioapics] = *m;
- nr_ioapics++;
-}
-
-static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
-{
- mp_irqs [mp_irq_entries] = *m;
- Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
- " IRQ %02x, APIC ID %x, APIC INT %02x\n",
- m->mpc_irqtype, m->mpc_irqflag & 3,
- (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
- m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
- if (++mp_irq_entries == max_irq_sources)
- panic("Max # of irq sources exceeded!!\n");
-}
-
-static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
-{
- Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
- " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
- m->mpc_irqtype, m->mpc_irqflag & 3,
- (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
- m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
- /*
- * Well it seems all SMP boards in existence
- * use ExtINT/LVT1 == LINT0 and
- * NMI/LVT2 == LINT1 - the following check
- * will show us if this assumptions is false.
- * Until then we do not have to add baggage.
- */
- if ((m->mpc_irqtype == mp_ExtINT) &&
- (m->mpc_destapiclint != 0))
- BUG();
- if ((m->mpc_irqtype == mp_NMI) &&
- (m->mpc_destapiclint != 1))
- BUG();
-}
-
-static void __init MP_translation_info (struct mpc_config_translation *m)
-{
- printk("Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
-
- if (mpc_record >= MAX_MPC_ENTRY)
- printk("MAX_MPC_ENTRY exceeded!\n");
- else
- translation_table[mpc_record] = m; /* stash this for later */
- if (m->trans_quad+1 > numnodes)
- numnodes = m->trans_quad+1;
-}
-
-/*
- * Read/parse the MPC oem tables
- */
-
-static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
- unsigned short oemsize)
-{
- int count = sizeof (*oemtable); /* the header size */
- unsigned char *oemptr = ((unsigned char *)oemtable)+count;
-
- printk("Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
- if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
- {
- printk("SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
- oemtable->oem_signature[0],
- oemtable->oem_signature[1],
- oemtable->oem_signature[2],
- oemtable->oem_signature[3]);
- return;
- }
- if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
- {
- printk("SMP oem mptable: checksum error!\n");
- return;
- }
- while (count < oemtable->oem_length) {
- switch (*oemptr) {
- case MP_TRANSLATION:
- {
- struct mpc_config_translation *m=
- (struct mpc_config_translation *)oemptr;
- MP_translation_info(m);
- oemptr += sizeof(*m);
- count += sizeof(*m);
- ++mpc_record;
- break;
- }
- default:
- {
- printk("Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
- return;
- }
- }
- }
-}
-
-/*
- * Read/parse the MPC
- */
-
-static int __init smp_read_mpc(struct mp_config_table *mpc)
-{
- char oem[16], prod[14];
- int count=sizeof(*mpc);
- unsigned char *mpt=((unsigned char *)mpc)+count;
- int num_bus = 0;
- int num_irq = 0;
- unsigned char *bus_data;
-
- if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
- panic("SMP mptable: bad signature [%c%c%c%c]!\n",
- mpc->mpc_signature[0],
- mpc->mpc_signature[1],
- mpc->mpc_signature[2],
- mpc->mpc_signature[3]);
- return 0;
- }
- if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
- panic("SMP mptable: checksum error!\n");
- return 0;
- }
- if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
- printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
- mpc->mpc_spec);
- return 0;
- }
- if (!mpc->mpc_lapic) {
- printk(KERN_ERR "SMP mptable: null local APIC address!\n");
- return 0;
- }
- memcpy(oem,mpc->mpc_oem,8);
- oem[8]=0;
- printk("OEM ID: %s ",oem);
-
- memcpy(prod,mpc->mpc_productid,12);
- prod[12]=0;
- printk("Product ID: %s ",prod);
-
- detect_clustered_apic(oem, prod);
-
- printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
-
- /*
- * Save the local APIC address (it might be non-default) -- but only
- * if we're not using ACPI.
- */
- if (!acpi_lapic)
- mp_lapic_addr = mpc->mpc_lapic;
-
- if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) && mpc->mpc_oemptr) {
- /* We need to process the oem mpc tables to tell us which quad things are in ... */
- mpc_record = 0;
- smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr, mpc->mpc_oemsize);
- mpc_record = 0;
- }
-
- /* Pre-scan to determine the number of bus and
- * interrupts records we have
- */
- while (count < mpc->mpc_length) {
- switch (*mpt) {
- case MP_PROCESSOR:
- mpt += sizeof(struct mpc_config_processor);
- count += sizeof(struct mpc_config_processor);
- break;
- case MP_BUS:
- ++num_bus;
- mpt += sizeof(struct mpc_config_bus);
- count += sizeof(struct mpc_config_bus);
- break;
- case MP_INTSRC:
- ++num_irq;
- mpt += sizeof(struct mpc_config_intsrc);
- count += sizeof(struct mpc_config_intsrc);
- break;
- case MP_IOAPIC:
- mpt += sizeof(struct mpc_config_ioapic);
- count += sizeof(struct mpc_config_ioapic);
- break;
- case MP_LINTSRC:
- mpt += sizeof(struct mpc_config_lintsrc);
- count += sizeof(struct mpc_config_lintsrc);
- break;
- default:
- count = mpc->mpc_length;
- break;
- }
- }
- /*
- * Paranoia: Allocate one extra of both the number of busses and number
- * of irqs, and make sure that we have at least 4 interrupts per PCI
- * slot. But some machines do not report very many busses, so we need
- * to fall back on the older defaults.
- */
- ++num_bus;
- max_mp_busses = max(num_bus, MAX_MP_BUSSES);
- if (num_irq < (4 * max_mp_busses))
- num_irq = 4 * num_bus; /* 4 intr/PCI slot */
- ++num_irq;
- max_irq_sources = max(num_irq, MAX_IRQ_SOURCES);
-
- count = (max_mp_busses * sizeof(int)) * 4;
- count += (max_irq_sources * sizeof(struct mpc_config_intsrc));
- bus_data = (void *)__get_free_pages(GFP_KERNEL, get_order(count));
- if (!bus_data) {
- printk(KERN_ERR "SMP mptable: out of memory!\n");
- return 0;
- }
- mp_bus_id_to_type = (int *)&bus_data[0];
- mp_bus_id_to_node = (int *)&bus_data[(max_mp_busses * sizeof(int))];
- mp_bus_id_to_local = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 2];
- mp_bus_id_to_pci_bus = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 3];
- mp_irqs = (struct mpc_config_intsrc *)&bus_data[(max_mp_busses * sizeof(int)) * 4];
- memset(mp_bus_id_to_pci_bus, -1, max_mp_busses * sizeof(int));
-
- /*
- * Now process the configuration blocks.
- */
- count = sizeof(*mpc);
- mpt = ((unsigned char *)mpc)+count;
- while (count < mpc->mpc_length) {
- switch(*mpt) {
- case MP_PROCESSOR:
- {
- struct mpc_config_processor *m=
- (struct mpc_config_processor *)mpt;
- /* ACPI may have already provided this data */
- if (!acpi_lapic)
- MP_processor_info(m);
- mpt += sizeof(*m);
- count += sizeof(*m);
- break;
- }
- case MP_BUS:
- {
- struct mpc_config_bus *m=
- (struct mpc_config_bus *)mpt;
- MP_bus_info(m);
- mpt += sizeof(*m);
- count += sizeof(*m);
- break;
- }
- case MP_IOAPIC:
- {
- struct mpc_config_ioapic *m=
- (struct mpc_config_ioapic *)mpt;
- MP_ioapic_info(m);
- mpt+=sizeof(*m);
- count+=sizeof(*m);
- break;
- }
- case MP_INTSRC:
- {
- struct mpc_config_intsrc *m=
- (struct mpc_config_intsrc *)mpt;
-
- MP_intsrc_info(m);
- mpt+=sizeof(*m);
- count+=sizeof(*m);
- break;
- }
- case MP_LINTSRC:
- {
- struct mpc_config_lintsrc *m=
- (struct mpc_config_lintsrc *)mpt;
- MP_lintsrc_info(m);
- mpt+=sizeof(*m);
- count+=sizeof(*m);
- break;
- }
- default:
- {
- count = mpc->mpc_length;
- break;
- }
- }
- ++mpc_record;
- }
-
- if (clustered_apic_mode){
- phys_cpu_present_map = logical_cpu_present_map;
- }
-
-
- printk("Enabling APIC mode: ");
- if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
- printk("Clustered Logical. ");
- else if(clustered_apic_mode == CLUSTERED_APIC_XAPIC)
- printk("Physical. ");
- else
- printk("Flat. ");
- printk("Using %d I/O APICs\n",nr_ioapics);
-
- if (!num_processors)
- printk(KERN_ERR "SMP mptable: no processors registered!\n");
- return num_processors;
-}
-
-static int __init ELCR_trigger(unsigned int irq)
-{
- unsigned int port;
-
- port = 0x4d0 + (irq >> 3);
- return (inb(port) >> (irq & 7)) & 1;
-}
-
-static void __init construct_default_ioirq_mptable(int mpc_default_type)
-{
- struct mpc_config_intsrc intsrc;
- int i;
- int ELCR_fallback = 0;
-
- intsrc.mpc_type = MP_INTSRC;
- intsrc.mpc_irqflag = 0; /* conforming */
- intsrc.mpc_srcbus = 0;
- intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
-
- intsrc.mpc_irqtype = mp_INT;
-
- /*
- * If true, we have an ISA/PCI system with no IRQ entries
- * in the MP table. To prevent the PCI interrupts from being set up
- * incorrectly, we try to use the ELCR. The sanity check to see if
- * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
- * never be level sensitive, so we simply see if the ELCR agrees.
- * If it does, we assume it's valid.
- */
- if (mpc_default_type == 5) {
- printk("ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
-
- if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
- printk("ELCR contains invalid data... not using ELCR\n");
- else {
- printk("Using ELCR to identify PCI interrupts\n");
- ELCR_fallback = 1;
- }
- }
-
- for (i = 0; i < 16; i++) {
- switch (mpc_default_type) {
- case 2:
- if (i == 0 || i == 13)
- continue; /* IRQ0 & IRQ13 not connected */
- /* fall through */
- default:
- if (i == 2)
- continue; /* IRQ2 is never connected */
- }
-
- if (ELCR_fallback) {
- /*
- * If the ELCR indicates a level-sensitive interrupt, we
- * copy that information over to the MP table in the
- * irqflag field (level sensitive, active high polarity).
- */
- if (ELCR_trigger(i))
- intsrc.mpc_irqflag = 13;
- else
- intsrc.mpc_irqflag = 0;
- }
-
- intsrc.mpc_srcbusirq = i;
- intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
- MP_intsrc_info(&intsrc);
- }
-
- intsrc.mpc_irqtype = mp_ExtINT;
- intsrc.mpc_srcbusirq = 0;
- intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
- MP_intsrc_info(&intsrc);
-}
-
-static inline void __init construct_default_ISA_mptable(int mpc_default_type)
-{
- struct mpc_config_processor processor;
- struct mpc_config_bus bus;
- struct mpc_config_ioapic ioapic;
- struct mpc_config_lintsrc lintsrc;
- int linttypes[2] = { mp_ExtINT, mp_NMI };
- int i;
- struct {
- int mp_bus_id_to_type[MAX_MP_BUSSES];
- int mp_bus_id_to_node[MAX_MP_BUSSES];
- int mp_bus_id_to_local[MAX_MP_BUSSES];
- int mp_bus_id_to_pci_bus[MAX_MP_BUSSES];
- struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
- } *bus_data;
-
- bus_data = (void *)__get_free_pages(GFP_KERNEL, get_order(sizeof(*bus_data)));
- if (!bus_data)
- panic("SMP mptable: out of memory!\n");
- mp_bus_id_to_type = bus_data->mp_bus_id_to_type;
- mp_bus_id_to_node = bus_data->mp_bus_id_to_node;
- mp_bus_id_to_local = bus_data->mp_bus_id_to_local;
- mp_bus_id_to_pci_bus = bus_data->mp_bus_id_to_pci_bus;
- mp_irqs = bus_data->mp_irqs;
- for (i = 0; i < MAX_MP_BUSSES; ++i)
- mp_bus_id_to_pci_bus[i] = -1;
-
- /*
- * local APIC has default address
- */
- mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-
- /*
- * 2 CPUs, numbered 0 & 1.
- */
- processor.mpc_type = MP_PROCESSOR;
- /* Either an integrated APIC or a discrete 82489DX. */
- processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
- processor.mpc_cpuflag = CPU_ENABLED;
- processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
- (boot_cpu_data.x86_model << 4) |
- boot_cpu_data.x86_mask;
- processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
- processor.mpc_reserved[0] = 0;
- processor.mpc_reserved[1] = 0;
- for (i = 0; i < 2; i++) {
- processor.mpc_apicid = i;
- MP_processor_info(&processor);
- }
-
- bus.mpc_type = MP_BUS;
- bus.mpc_busid = 0;
- switch (mpc_default_type) {
- default:
- printk("???\nUnknown standard configuration %d\n",
- mpc_default_type);
- /* fall through */
- case 1:
- case 5:
- memcpy(bus.mpc_bustype, "ISA ", 6);
- break;
- case 2:
- case 6:
- case 3:
- memcpy(bus.mpc_bustype, "EISA ", 6);
- break;
- case 4:
- case 7:
- memcpy(bus.mpc_bustype, "MCA ", 6);
- }
- MP_bus_info(&bus);
- if (mpc_default_type > 4) {
- bus.mpc_busid = 1;
- memcpy(bus.mpc_bustype, "PCI ", 6);
- MP_bus_info(&bus);
- }
-
- ioapic.mpc_type = MP_IOAPIC;
- ioapic.mpc_apicid = 2;
- ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
- ioapic.mpc_flags = MPC_APIC_USABLE;
- ioapic.mpc_apicaddr = 0xFEC00000;
- MP_ioapic_info(&ioapic);
-
- /*
- * We set up most of the low 16 IO-APIC pins according to MPS rules.
- */
- construct_default_ioirq_mptable(mpc_default_type);
-
- lintsrc.mpc_type = MP_LINTSRC;
- lintsrc.mpc_irqflag = 0; /* conforming */
- lintsrc.mpc_srcbusid = 0;
- lintsrc.mpc_srcbusirq = 0;
- lintsrc.mpc_destapic = MP_APIC_ALL;
- for (i = 0; i < 2; i++) {
- lintsrc.mpc_irqtype = linttypes[i];
- lintsrc.mpc_destapiclint = i;
- MP_lintsrc_info(&lintsrc);
- }
-}
-
-static struct intel_mp_floating *mpf_found;
-
-/*
- * Scan the memory blocks for an SMP configuration block.
- */
-void __init get_smp_config (void)
-{
- struct intel_mp_floating *mpf = mpf_found;
-
- /*
- * ACPI may be used to obtain the entire SMP configuration or just to
- * enumerate/configure processors (CONFIG_ACPI_HT_ONLY). Note that
- * ACPI supports both logical (e.g. Hyper-Threading) and physical
- * processors, where MPS only supports physical.
- */
- if (acpi_lapic && acpi_ioapic) {
- printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
- return;
- }
- else if (acpi_lapic)
- printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
-
- printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
- if (mpf->mpf_feature2 & (1<<7)) {
- printk(" IMCR and PIC compatibility mode.\n");
- pic_mode = 1;
- } else {
- printk(" Virtual Wire compatibility mode.\n");
- pic_mode = 0;
- }
-
- /*
- * Now see if we need to read further.
- */
- if (mpf->mpf_feature1 != 0) {
-
- printk("Default MP configuration #%d\n", mpf->mpf_feature1);
- construct_default_ISA_mptable(mpf->mpf_feature1);
-
- } else if (mpf->mpf_physptr) {
-
- /*
- * Read the physical hardware table. Anything here will
- * override the defaults.
- */
- if (!smp_read_mpc((void *)mpf->mpf_physptr)) {
- smp_found_config = 0;
- printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
- printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
- return;
- }
- /*
- * If there are no explicit MP IRQ entries, then we are
- * broken. We set up most of the low 16 IO-APIC pins to
- * ISA defaults and hope it will work.
- */
- if (!mp_irq_entries) {
- struct mpc_config_bus bus;
-
- printk("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
-
- bus.mpc_type = MP_BUS;
- bus.mpc_busid = 0;
- memcpy(bus.mpc_bustype, "ISA ", 6);
- MP_bus_info(&bus);
-
- construct_default_ioirq_mptable(0);
- }
-
- } else
- BUG();
-
- printk("Processors: %d\n", num_processors);
- /*
- * Only use the first configuration found.
- */
-}
-
-static int __init smp_scan_config (unsigned long base, unsigned long length)
-{
- unsigned long *bp = phys_to_virt(base);
- struct intel_mp_floating *mpf;
-
- Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
- if (sizeof(*mpf) != 16)
- printk("Error: MPF size\n");
-
- while (length > 0) {
- mpf = (struct intel_mp_floating *)bp;
- if ((*bp == SMP_MAGIC_IDENT) &&
- (mpf->mpf_length == 1) &&
- !mpf_checksum((unsigned char *)bp, 16) &&
- ((mpf->mpf_specification == 1)
- || (mpf->mpf_specification == 4)) ) {
-
- smp_found_config = 1;
- printk("found SMP MP-table at %08lx\n",
- virt_to_phys(mpf));
- reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
- if (mpf->mpf_physptr)
- reserve_bootmem(mpf->mpf_physptr, PAGE_SIZE);
- mpf_found = mpf;
- return 1;
- }
- bp += 4;
- length -= 16;
- }
- return 0;
-}
-
-void __init find_intel_smp (void)
-{
- unsigned int address;
-
- /*
- * FIXME: Linux assumes you have 640K of base ram..
- * this continues the error...
- *
- * 1) Scan the bottom 1K for a signature
- * 2) Scan the top 1K of base RAM
- * 3) Scan the 64K of bios
- */
- if (smp_scan_config(0x0,0x400) ||
- smp_scan_config(639*0x400,0x400) ||
- smp_scan_config(0xF0000,0x10000))
- return;
- /*
- * If it is an SMP machine we should know now, unless the
- * configuration is in an EISA/MCA bus machine with an
- * extended bios data area.
- *
- * there is a real-mode segmented pointer pointing to the
- * 4K EBDA area at 0x40E, calculate and scan it here.
- *
- * NOTE! There were Linux loaders that will corrupt the EBDA
- * area, and as such this kind of SMP config may be less
- * trustworthy, simply because the SMP table may have been
- * stomped on during early boot. Thankfully the bootloaders
- * now honour the EBDA.
- */
-
- address = *(unsigned short *)phys_to_virt(0x40E);
- address <<= 4;
- smp_scan_config(address, 0x1000);
-}
-
-#else
-
-/*
- * The Visual Workstation is Intel MP compliant in the hardware
- * sense, but it doesn't have a BIOS(-configuration table).
- * No problem for Linux.
- */
-void __init find_visws_smp(void)
-{
- smp_found_config = 1;
-
- phys_cpu_present_map |= 2; /* or in id 1 */
- apic_version[1] |= 0x10; /* integrated APIC */
- apic_version[0] |= 0x10;
-
- mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-}
-
-#endif
-
-/*
- * - Intel MP Configuration Table
- * - or SGI Visual Workstation configuration
- */
-void __init find_smp_config (void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
- find_intel_smp();
-#endif
-#ifdef CONFIG_VISWS
- find_visws_smp();
-#endif
-}
-
-
-/* --------------------------------------------------------------------------
- ACPI-based MP Configuration
- -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI_BOOT
-
-void __init mp_register_lapic_address (
- u64 address)
-{
- mp_lapic_addr = (unsigned long) address;
-
- set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
-
- if (boot_cpu_physical_apicid == -1U)
- boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
-
- Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
-}
-
-
-void __init mp_register_lapic (
- u8 id,
- u8 enabled)
-{
- struct mpc_config_processor processor;
- int boot_cpu = 0;
-
- if (id >= MAX_APICS) {
- printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
- id, MAX_APICS);
- return;
- }
-
- if (id == boot_cpu_physical_apicid)
- boot_cpu = 1;
-
- processor.mpc_type = MP_PROCESSOR;
- processor.mpc_apicid = id;
-
- /*
- * mp_register_lapic_address() which is called before the
- * current function does the fixmap of FIX_APIC_BASE.
- * Read in the correct APIC version from there
- */
- processor.mpc_apicver = apic_read(APIC_LVR);
-
- processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
- processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
- processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
- (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
- processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
- processor.mpc_reserved[0] = 0;
- processor.mpc_reserved[1] = 0;
-
- MP_processor_info(&processor);
-}
-
-#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
-
-#define MP_ISA_BUS 0
-#define MP_MAX_IOAPIC_PIN 127
-
-struct mp_ioapic_routing {
- int apic_id;
- int irq_start;
- int irq_end;
- u32 pin_programmed[4];
-} mp_ioapic_routing[MAX_IO_APICS];
-
-
-static int __init mp_find_ioapic (
- int irq)
-{
- int i = 0;
-
- /* Find the IOAPIC that manages this IRQ. */
- for (i = 0; i < nr_ioapics; i++) {
- if ((irq >= mp_ioapic_routing[i].irq_start)
- && (irq <= mp_ioapic_routing[i].irq_end))
- return i;
- }
-
- printk(KERN_ERR "ERROR: Unable to locate IOAPIC for IRQ %d\n", irq);
-
- return -1;
-}
-
-
-void __init mp_register_ioapic (
- u8 id,
- u32 address,
- u32 irq_base)
-{
- int idx = 0;
-
- if (nr_ioapics >= MAX_IO_APICS) {
- printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
- "(found %d)\n", MAX_IO_APICS, nr_ioapics);
- panic("Recompile kernel with bigger MAX_IO_APICS!\n");
- }
- if (!address) {
- printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
- " found in MADT table, skipping!\n");
- return;
- }
-
- idx = nr_ioapics++;
-
- mp_ioapics[idx].mpc_type = MP_IOAPIC;
- mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
- mp_ioapics[idx].mpc_apicaddr = address;
-
- set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
- mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id);
- mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
-
- /*
- * Build basic IRQ lookup table to facilitate irq->io_apic lookups
- * and to prevent reprogramming of IOAPIC pins (PCI IRQs).
- */
- mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
- mp_ioapic_routing[idx].irq_start = irq_base;
- mp_ioapic_routing[idx].irq_end = irq_base +
- io_apic_get_redir_entries(idx);
-
- printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
- "IRQ %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
- mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
- mp_ioapic_routing[idx].irq_start,
- mp_ioapic_routing[idx].irq_end);
-
- return;
-}
-
-
-void __init mp_override_legacy_irq (
- u8 bus_irq,
- u8 polarity,
- u8 trigger,
- u32 global_irq)
-{
- struct mpc_config_intsrc intsrc;
- int i = 0;
- int found = 0;
- int ioapic = -1;
- int pin = -1;
-
- /*
- * Convert 'global_irq' to 'ioapic.pin'.
- */
- ioapic = mp_find_ioapic(global_irq);
- if (ioapic < 0)
- return;
- pin = global_irq - mp_ioapic_routing[ioapic].irq_start;
-
- /*
- * TBD: This check is for faulty timer entries, where the override
- * erroneously sets the trigger to level, resulting in a HUGE
- * increase of timer interrupts!
- */
- if ((bus_irq == 0) && (global_irq == 2) && (trigger == 3))
- trigger = 1;
-
- intsrc.mpc_type = MP_INTSRC;
- intsrc.mpc_irqtype = mp_INT;
- intsrc.mpc_irqflag = (trigger << 2) | polarity;
- intsrc.mpc_srcbus = MP_ISA_BUS;
- intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
- intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
- intsrc.mpc_dstirq = pin; /* INTIN# */
-
- Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
- intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
- (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
- intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
-
- /*
- * If an existing [IOAPIC.PIN -> IRQ] routing entry exists we override it.
- * Otherwise create a new entry (e.g. global_irq == 2).
- */
- for (i = 0; i < mp_irq_entries; i++) {
- if ((mp_irqs[i].mpc_dstapic == intsrc.mpc_dstapic)
- && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) {
- mp_irqs[i] = intsrc;
- found = 1;
- break;
- }
- }
- if (!found) {
- mp_irqs[mp_irq_entries] = intsrc;
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!\n");
- }
-
- return;
-}
-
-
-void __init mp_config_acpi_legacy_irqs (void)
-{
- int i = 0;
- int ioapic = -1;
-
- /*
- * Initialize mp_irqs for IRQ configuration.
- */
- unsigned char *bus_data;
- int count;
-
- count = (MAX_MP_BUSSES * sizeof(int)) * 4;
- count += (MAX_IRQ_SOURCES * sizeof(int)) * 4;
- bus_data = (void *)__get_free_pages(GFP_KERNEL, get_order(count));
- if (!bus_data) {
- panic("Fatal: can't allocate bus memory for ACPI legacy IRQ!");
- }
- mp_bus_id_to_type = (int *)&bus_data[0];
- mp_bus_id_to_node = (int *)&bus_data[(MAX_MP_BUSSES * sizeof(int))];
- mp_bus_id_to_local = (int *)&bus_data[(MAX_MP_BUSSES * sizeof(int)) * 2];
- mp_bus_id_to_pci_bus = (int *)&bus_data[(MAX_MP_BUSSES * sizeof(int)) * 3];
- mp_irqs = (struct mpc_config_intsrc *)&bus_data[(MAX_MP_BUSSES * sizeof(int)) * 4];
- for (i = 0; i < MAX_MP_BUSSES; ++i)
- mp_bus_id_to_pci_bus[i] = -1;
-
- /*
- * Fabricate the legacy ISA bus (bus #31).
- */
- mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
- Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
-
- /*
- * Locate the IOAPIC that manages the ISA IRQs (0-15).
- */
- ioapic = mp_find_ioapic(0);
- if (ioapic < 0)
- return;
-
- /*
- * Use the default configuration for the IRQs 0-15. These may be
- * overriden by (MADT) interrupt source override entries.
- */
- for (i = 0; i < 16; i++) {
-
- if (i == 2) continue; /* Don't connect IRQ2 */
-
- mp_irqs[mp_irq_entries].mpc_type = MP_INTSRC;
- mp_irqs[mp_irq_entries].mpc_irqflag = 0; /* Conforming */
- mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS;
- mp_irqs[mp_irq_entries].mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
- mp_irqs[mp_irq_entries].mpc_irqtype = i ? mp_INT : mp_ExtINT; /* 8259A to #0 */
- mp_irqs[mp_irq_entries].mpc_srcbusirq = i; /* Identity mapped */
- mp_irqs[mp_irq_entries].mpc_dstirq = i;
-
- Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
- "%d-%d\n",
- mp_irqs[mp_irq_entries].mpc_irqtype,
- mp_irqs[mp_irq_entries].mpc_irqflag & 3,
- (mp_irqs[mp_irq_entries].mpc_irqflag >> 2) & 3,
- mp_irqs[mp_irq_entries].mpc_srcbus,
- mp_irqs[mp_irq_entries].mpc_srcbusirq,
- mp_irqs[mp_irq_entries].mpc_dstapic,
- mp_irqs[mp_irq_entries].mpc_dstirq);
-
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!\n");
- }
-}
-
-/*extern FADT_DESCRIPTOR acpi_fadt;*/
-
-void __init mp_config_ioapic_for_sci(int irq)
-{
- int ioapic;
- int ioapic_pin;
- struct acpi_table_madt* madt;
- struct acpi_table_int_src_ovr *entry = NULL;
- acpi_interrupt_flags flags;
- void *madt_end;
- acpi_status status;
-
- /*
- * Ensure that if there is an interrupt source override entry
- * for the ACPI SCI, we leave it as is. Unfortunately this involves
- * walking the MADT again.
- */
- status = acpi_get_firmware_table("APIC", 1, ACPI_LOGICAL_ADDRESSING,
- (struct acpi_table_header **) &madt);
- if (ACPI_SUCCESS(status)) {
- madt_end = (void *) (unsigned long)madt + madt->header.length;
-
- entry = (struct acpi_table_int_src_ovr *)
- ((unsigned long) madt + sizeof(struct acpi_table_madt));
-
- while ((void *) entry < madt_end) {
- if (entry->header.type == ACPI_MADT_INT_SRC_OVR &&
- acpi_fadt.sci_int == entry->bus_irq)
- goto found;
-
- entry = (struct acpi_table_int_src_ovr *)
- ((unsigned long) entry + entry->header.length);
- }
- }
- /*
- * Although the ACPI spec says that the SCI should be level/low
- * don't reprogram it unless there is an explicit MADT OVR entry
- * instructing us to do so -- otherwise we break Tyan boards which
- * have the SCI wired edge/high but no MADT OVR.
- */
- return;
-
-found:
- /*
- * See the note at the end of ACPI 2.0b section
- * 5.2.10.8 for what this is about.
- */
- flags = entry->flags;
- acpi_fadt.sci_int = entry->global_irq;
- irq = entry->global_irq;
-
- ioapic = mp_find_ioapic(irq);
-
- ioapic_pin = irq - mp_ioapic_routing[ioapic].irq_start;
-
- /*
- * MPS INTI flags:
- * trigger: 0=default, 1=edge, 3=level
- * polarity: 0=default, 1=high, 3=low
- * Per ACPI spec, default for SCI means level/low.
- */
- io_apic_set_pci_routing(ioapic, ioapic_pin, irq,
- (flags.trigger == 1 ? 0 : 1), (flags.polarity == 1 ? 0 : 1));
-}
-
-
-#ifdef CONFIG_ACPI_PCI
-
-void __init mp_parse_prt (void)
-{
- struct list_head *node = NULL;
- struct acpi_prt_entry *entry = NULL;
- int ioapic = -1;
- int ioapic_pin = 0;
- int irq = 0;
- int idx, bit = 0;
- int edge_level = 0;
- int active_high_low = 0;
-
- /*
- * Parsing through the PCI Interrupt Routing Table (PRT) and program
- * routing for all entries.
- */
- list_for_each(node, &acpi_prt.entries) {
- entry = list_entry(node, struct acpi_prt_entry, node);
-
- /* Need to get irq for dynamic entry */
- if (entry->link.handle) {
- irq = acpi_pci_link_get_irq(entry->link.handle, entry->link.index, &edge_level, &active_high_low);
- if (!irq)
- continue;
- }
- else {
- /* Hardwired IRQ. Assume PCI standard settings */
- irq = entry->link.index;
- edge_level = 1;
- active_high_low = 1;
- }
-
- /* Don't set up the ACPI SCI because it's already set up */
- if (acpi_fadt.sci_int == irq) {
- entry->irq = irq; /*we still need to set entry's irq*/
- continue;
- }
-
- ioapic = mp_find_ioapic(irq);
- if (ioapic < 0)
- continue;
- ioapic_pin = irq - mp_ioapic_routing[ioapic].irq_start;
-
- /*
- * Avoid pin reprogramming. PRTs typically include entries
- * with redundant pin->irq mappings (but unique PCI devices);
- * we only only program the IOAPIC on the first.
- */
- bit = ioapic_pin % 32;
- idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
- if (idx > 3) {
- printk(KERN_ERR "Invalid reference to IOAPIC pin "
- "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
- ioapic_pin);
- continue;
- }
- if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
- printk(KERN_DEBUG "Pin %d-%d already programmed\n",
- mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
- entry->irq = irq;
- continue;
- }
-
- mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
-
- if (!io_apic_set_pci_routing(ioapic, ioapic_pin, irq, edge_level, active_high_low))
- entry->irq = irq;
-
- printk(KERN_DEBUG "%02x:%02x:%02x[%c] -> %d-%d -> IRQ %d\n",
- entry->id.segment, entry->id.bus,
- entry->id.device, ('A' + entry->pin),
- mp_ioapic_routing[ioapic].apic_id, ioapic_pin,
- entry->irq);
- }
-
- print_IO_APIC();
-
- return;
-}
-
-#endif /*CONFIG_ACPI_PCI*/
-
-#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/
-
-#endif /*CONFIG_ACPI*/
+++ /dev/null
-/*
- * linux/arch/i386/nmi.c
- *
- * NMI watchdog support on APIC systems
- *
- * Started by Ingo Molnar <mingo@redhat.com>
- *
- * Fixes:
- * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
- * Mikael Pettersson : Power Management for local APIC NMI watchdog.
- * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
- * Keir Fraser : Pentium 4 Hyperthreading support
- */
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/lib.h>
-#include <xen/mm.h>
-#include <xen/irq.h>
-#include <xen/delay.h>
-#include <xen/interrupt.h>
-#include <xen/time.h>
-#include <xen/timex.h>
-#include <xen/sched.h>
-
-#include <asm/mc146818rtc.h>
-#include <asm/smp.h>
-#include <asm/msr.h>
-#include <asm/mpspec.h>
-
-unsigned int nmi_watchdog = NMI_NONE;
-unsigned int watchdog_on = 0;
-static unsigned int nmi_hz = HZ;
-unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
-extern void show_registers(struct pt_regs *regs);
-
-extern int logical_proc_id[];
-
-#define K7_EVNTSEL_ENABLE (1 << 22)
-#define K7_EVNTSEL_INT (1 << 20)
-#define K7_EVNTSEL_OS (1 << 17)
-#define K7_EVNTSEL_USR (1 << 16)
-#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
-#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
-
-#define P6_EVNTSEL0_ENABLE (1 << 22)
-#define P6_EVNTSEL_INT (1 << 20)
-#define P6_EVNTSEL_OS (1 << 17)
-#define P6_EVNTSEL_USR (1 << 16)
-#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
-#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
-
-#define MSR_P4_MISC_ENABLE 0x1A0
-#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
-#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
-#define MSR_P4_PERFCTR0 0x300
-#define MSR_P4_CCCR0 0x360
-#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
-#define P4_ESCR_OS0 (1<<3)
-#define P4_ESCR_USR0 (1<<2)
-#define P4_ESCR_OS1 (1<<1)
-#define P4_ESCR_USR1 (1<<0)
-#define P4_CCCR_OVF_PMI0 (1<<26)
-#define P4_CCCR_OVF_PMI1 (1<<27)
-#define P4_CCCR_THRESHOLD(N) ((N)<<20)
-#define P4_CCCR_COMPLEMENT (1<<19)
-#define P4_CCCR_COMPARE (1<<18)
-#define P4_CCCR_REQUIRED (3<<16)
-#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
-#define P4_CCCR_ENABLE (1<<12)
-/*
- * Set up IQ_COUNTER{0,1} to behave like a clock, by having IQ_CCCR{0,1} filter
- * CRU_ESCR0 (with any non-null event selector) through a complemented
- * max threshold. [IA32-Vol3, Section 14.9.9]
- */
-#define MSR_P4_IQ_COUNTER0 0x30C
-#define MSR_P4_IQ_COUNTER1 0x30D
-#define MSR_P4_IQ_CCCR0 0x36C
-#define MSR_P4_IQ_CCCR1 0x36D
-#define MSR_P4_CRU_ESCR0 0x3B8 /* ESCR no. 4 */
-#define P4_NMI_CRU_ESCR0 \
- (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS0|P4_ESCR_USR0| \
- P4_ESCR_OS1|P4_ESCR_USR1)
-#define P4_NMI_IQ_CCCR0 \
- (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
- P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
-#define P4_NMI_IQ_CCCR1 \
- (P4_CCCR_OVF_PMI1|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
- P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
-
-int __init check_nmi_watchdog (void)
-{
- unsigned int prev_nmi_count[NR_CPUS];
- int j, cpu;
-
- if ( !nmi_watchdog )
- return 0;
-
- printk("Testing NMI watchdog --- ");
-
- for ( j = 0; j < smp_num_cpus; j++ )
- {
- cpu = cpu_logical_map(j);
- prev_nmi_count[cpu] = irq_stat[cpu].__nmi_count;
- }
- sti();
- mdelay((10*1000)/nmi_hz); /* wait 10 ticks */
-
- for ( j = 0; j < smp_num_cpus; j++ )
- {
- cpu = cpu_logical_map(j);
- if ( nmi_count(cpu) - prev_nmi_count[cpu] <= 5 )
- printk("CPU#%d stuck. ", cpu);
- else
- printk("CPU#%d okay. ", cpu);
- }
-
- printk("\n");
-
- /* now that we know it works we can reduce NMI frequency to
- something more reasonable; makes a difference in some configs */
- if ( nmi_watchdog == NMI_LOCAL_APIC )
- nmi_hz = 1;
-
- return 0;
-}
-
-static inline void nmi_pm_init(void) { }
-#define __pminit __init
-
-/*
- * Activate the NMI watchdog via the local APIC.
- * Original code written by Keith Owens.
- */
-
-static void __pminit clear_msr_range(unsigned int base, unsigned int n)
-{
- unsigned int i;
- for ( i = 0; i < n; i++ )
- wrmsr(base+i, 0, 0);
-}
-
-static void __pminit setup_k7_watchdog(void)
-{
- unsigned int evntsel;
-
- nmi_perfctr_msr = MSR_K7_PERFCTR0;
-
- clear_msr_range(MSR_K7_EVNTSEL0, 4);
- clear_msr_range(MSR_K7_PERFCTR0, 4);
-
- evntsel = K7_EVNTSEL_INT
- | K7_EVNTSEL_OS
- | K7_EVNTSEL_USR
- | K7_NMI_EVENT;
-
- wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
- Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
- wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= K7_EVNTSEL_ENABLE;
- wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
-}
-
-static void __pminit setup_p6_watchdog(void)
-{
- unsigned int evntsel;
-
- nmi_perfctr_msr = MSR_P6_PERFCTR0;
-
- clear_msr_range(MSR_P6_EVNTSEL0, 2);
- clear_msr_range(MSR_P6_PERFCTR0, 2);
-
- evntsel = P6_EVNTSEL_INT
- | P6_EVNTSEL_OS
- | P6_EVNTSEL_USR
- | P6_NMI_EVENT;
-
- wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
- Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
- wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= P6_EVNTSEL0_ENABLE;
- wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
-}
-
-static int __pminit setup_p4_watchdog(void)
-{
- unsigned int misc_enable, dummy;
-
- rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
- if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
- return 0;
-
- nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
-
- if ( logical_proc_id[smp_processor_id()] == 0 )
- {
- if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
- clear_msr_range(0x3F1, 2);
- /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
- docs doesn't fully define it, so leave it alone for now. */
- clear_msr_range(0x3A0, 31);
- clear_msr_range(0x3C0, 6);
- clear_msr_range(0x3C8, 6);
- clear_msr_range(0x3E0, 2);
- clear_msr_range(MSR_P4_CCCR0, 18);
- clear_msr_range(MSR_P4_PERFCTR0, 18);
-
- wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
- wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
- Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
- wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
- }
- else if ( logical_proc_id[smp_processor_id()] == 1 )
- {
- wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1 & ~P4_CCCR_ENABLE, 0);
- Dprintk("setting P4_IQ_COUNTER2 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
- wrmsr(MSR_P4_IQ_COUNTER1, -(cpu_khz/nmi_hz*1000), -1);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1, 0);
- }
- else
- {
- return 0;
- }
-
- return 1;
-}
-
-void __pminit setup_apic_nmi_watchdog(void)
-{
- if (!nmi_watchdog)
- return;
-
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
- return;
- setup_k7_watchdog();
- break;
- case X86_VENDOR_INTEL:
- switch (boot_cpu_data.x86) {
- case 6:
- setup_p6_watchdog();
- break;
- case 15:
- if (!setup_p4_watchdog())
- return;
- break;
- default:
- return;
- }
- break;
- default:
- return;
- }
- nmi_pm_init();
-}
-
-
-static unsigned int
-last_irq_sums [NR_CPUS],
- alert_counter [NR_CPUS];
-
-void touch_nmi_watchdog (void)
-{
- int i;
- for (i = 0; i < smp_num_cpus; i++)
- alert_counter[i] = 0;
-}
-
-void nmi_watchdog_tick (struct pt_regs * regs)
-{
- extern spinlock_t console_lock;
- extern void die(const char * str, struct pt_regs * regs, long err);
-
- int sum, cpu = smp_processor_id();
-
- sum = apic_timer_irqs[cpu];
-
- if ( (last_irq_sums[cpu] == sum) && watchdog_on )
- {
- /*
- * Ayiee, looks like this CPU is stuck ... wait a few IRQs (5 seconds)
- * before doing the oops ...
- */
- alert_counter[cpu]++;
- if (alert_counter[cpu] == 5*nmi_hz) {
- console_lock = SPIN_LOCK_UNLOCKED;
- die("NMI Watchdog detected LOCKUP on CPU", regs, cpu);
- }
- }
- else
- {
- last_irq_sums[cpu] = sum;
- alert_counter[cpu] = 0;
- }
-
- if ( nmi_perfctr_msr )
- {
- if ( nmi_perfctr_msr == MSR_P4_IQ_COUNTER0 )
- {
- if ( logical_proc_id[cpu] == 0 )
- {
- wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
- }
- else
- {
- wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1, 0);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- wrmsr(MSR_P4_IQ_COUNTER1, -(cpu_khz/nmi_hz*1000), -1);
- }
- }
- else
- {
- wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
- }
- }
-}
+++ /dev/null
-/*
- * Dynamic DMA mapping support.
- *
- * On i386 there is no hardware dynamic DMA address translation,
- * so consistent alloc/free are merely page allocation/freeing.
- * The rest of the dynamic DMA mapping interface is implemented
- * in asm/pci.h.
- */
-
-#include <xen/types.h>
-#include <xen/mm.h>
-#include <xen/lib.h>
-#include <xen/pci.h>
-#include <asm/io.h>
-
-void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
- dma_addr_t *dma_handle)
-{
- void *ret;
- int gfp = GFP_ATOMIC;
-
- if (hwdev == NULL || ((u32)hwdev->dma_mask < 0xffffffff))
- gfp |= GFP_DMA;
- ret = (void *)__get_free_pages(gfp, get_order(size));
-
- if (ret != NULL) {
- memset(ret, 0, size);
- *dma_handle = virt_to_bus(ret);
- }
- return ret;
-}
-
-void pci_free_consistent(struct pci_dev *hwdev, size_t size,
- void *vaddr, dma_addr_t dma_handle)
-{
- free_pages((unsigned long)vaddr, get_order(size));
-}
+++ /dev/null
-/*
- * Low-Level PCI Access for i386 machines
- *
- * Copyright 1993, 1994 Drew Eckhardt
- * Visionary Computing
- * (Unix and Linux consulting and custom programming)
- * Drew@Colorado.EDU
- * +1 (303) 786-7975
- *
- * Drew's work was sponsored by:
- * iX Multiuser Multitasking Magazine
- * Hannover, Germany
- * hm@ix.de
- *
- * Copyright 1997--2000 Martin Mares <mj@ucw.cz>
- *
- * For more information, please consult the following manuals (look at
- * http://www.pcisig.com/ for how to get them):
- *
- * PCI BIOS Specification
- * PCI Local Bus Specification
- * PCI to PCI Bridge Specification
- * PCI System Design Guide
- *
- *
- * CHANGELOG :
- * Jun 17, 1994 : Modified to accommodate the broken pre-PCI BIOS SPECIFICATION
- * Revision 2.0 present on <thys@dennis.ee.up.ac.za>'s ASUS mainboard.
- *
- * Jan 5, 1995 : Modified to probe PCI hardware at boot time by Frederic
- * Potter, potter@cao-vlsi.ibp.fr
- *
- * Jan 10, 1995 : Modified to store the information about configured pci
- * devices into a list, which can be accessed via /proc/pci by
- * Curtis Varner, cvarner@cs.ucr.edu
- *
- * Jan 12, 1995 : CPU-PCI bridge optimization support by Frederic Potter.
- * Alpha version. Intel & UMC chipset support only.
- *
- * Apr 16, 1995 : Source merge with the DEC Alpha PCI support. Most of the code
- * moved to drivers/pci/pci.c.
- *
- * Dec 7, 1996 : Added support for direct configuration access of boards
- * with Intel compatible access schemes (tsbogend@alpha.franken.de)
- *
- * Feb 3, 1997 : Set internal functions to static, save/restore flags
- * avoid dead locks reading broken PCI BIOS, werner@suse.de
- *
- * Apr 26, 1997 : Fixed case when there is BIOS32, but not PCI BIOS
- * (mj@atrey.karlin.mff.cuni.cz)
- *
- * May 7, 1997 : Added some missing cli()'s. [mj]
- *
- * Jun 20, 1997 : Corrected problems in "conf1" type accesses.
- * (paubert@iram.es)
- *
- * Aug 2, 1997 : Split to PCI BIOS handling and direct PCI access parts
- * and cleaned it up... Martin Mares <mj@atrey.karlin.mff.cuni.cz>
- *
- * Feb 6, 1998 : No longer using BIOS to find devices and device classes. [mj]
- *
- * May 1, 1998 : Support for peer host bridges. [mj]
- *
- * Jun 19, 1998 : Changed to use spinlocks, so that PCI configuration space
- * can be accessed from interrupts even on SMP systems. [mj]
- *
- * August 1998 : Better support for peer host bridges and more paranoid
- * checks for direct hardware access. Ugh, this file starts to look as
- * a large gallery of common hardware bug workarounds (watch the comments)
- * -- the PCI specs themselves are sane, but most implementors should be
- * hit hard with \hammer scaled \magstep5. [mj]
- *
- * Jan 23, 1999 : More improvements to peer host bridge logic. i450NX fixup. [mj]
- *
- * Feb 8, 1999 : Added UM8886BF I/O address fixup. [mj]
- *
- * August 1999 : New resource management and configuration access stuff. [mj]
- *
- * Sep 19, 1999 : Use PCI IRQ routing tables for detection of peer host bridges.
- * Based on ideas by Chris Frantz and David Hinds. [mj]
- *
- * Sep 28, 1999 : Handle unreported/unassigned IRQs. Thanks to Shuu Yamaguchi
- * for a lot of patience during testing. [mj]
- *
- * Oct 8, 1999 : Split to pci-i386.c, pci-pc.c and pci-visws.c. [mj]
- */
-
-#include <xen/types.h>
-#include <xen/lib.h>
-#include <xen/pci.h>
-#include <xen/init.h>
-#include <xen/ioport.h>
-#include <xen/errno.h>
-
-#include "pci-i386.h"
-
-void
-pcibios_update_resource(struct pci_dev *dev, struct resource *root,
- struct resource *res, int resource)
-{
- u32 new, check;
- int reg;
-
- new = res->start | (res->flags & PCI_REGION_FLAG_MASK);
- if (resource < 6) {
- reg = PCI_BASE_ADDRESS_0 + 4*resource;
- } else if (resource == PCI_ROM_RESOURCE) {
- res->flags |= PCI_ROM_ADDRESS_ENABLE;
- new |= PCI_ROM_ADDRESS_ENABLE;
- reg = dev->rom_base_reg;
- } else {
- /* Somebody might have asked allocation of a non-standard resource */
- return;
- }
-
- pci_write_config_dword(dev, reg, new);
- pci_read_config_dword(dev, reg, &check);
- if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) {
- printk(KERN_ERR "PCI: Error while updating region "
- "%s/%d (%08x != %08x)\n", dev->slot_name, resource,
- new, check);
- }
-}
-
-/*
- * We need to avoid collisions with `mirrored' VGA ports
- * and other strange ISA hardware, so we always want the
- * addresses to be allocated in the 0x000-0x0ff region
- * modulo 0x400.
- *
- * Why? Because some silly external IO cards only decode
- * the low 10 bits of the IO address. The 0x00-0xff region
- * is reserved for motherboard devices that decode all 16
- * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
- * but we want to try to avoid allocating at 0x2900-0x2bff
- * which might have be mirrored at 0x0100-0x03ff..
- */
-void
-pcibios_align_resource(void *data, struct resource *res,
- unsigned long size, unsigned long align)
-{
- if (res->flags & IORESOURCE_IO) {
- unsigned long start = res->start;
-
- if (start & 0x300) {
- start = (start + 0x3ff) & ~0x3ff;
- res->start = start;
- }
- }
-}
-
-
-/*
- * Handle resources of PCI devices. If the world were perfect, we could
- * just allocate all the resource regions and do nothing more. It isn't.
- * On the other hand, we cannot just re-allocate all devices, as it would
- * require us to know lots of host bridge internals. So we attempt to
- * keep as much of the original configuration as possible, but tweak it
- * when it's found to be wrong.
- *
- * Known BIOS problems we have to work around:
- * - I/O or memory regions not configured
- * - regions configured, but not enabled in the command register
- * - bogus I/O addresses above 64K used
- * - expansion ROMs left enabled (this may sound harmless, but given
- * the fact the PCI specs explicitly allow address decoders to be
- * shared between expansion ROMs and other resource regions, it's
- * at least dangerous)
- *
- * Our solution:
- * (1) Allocate resources for all buses behind PCI-to-PCI bridges.
- * This gives us fixed barriers on where we can allocate.
- * (2) Allocate resources for all enabled devices. If there is
- * a collision, just mark the resource as unallocated. Also
- * disable expansion ROMs during this step.
- * (3) Try to allocate resources for disabled devices. If the
- * resources were assigned correctly, everything goes well,
- * if they weren't, they won't disturb allocation of other
- * resources.
- * (4) Assign new addresses to resources which were either
- * not configured at all or misconfigured. If explicitly
- * requested by the user, configure expansion ROM address
- * as well.
- */
-
-static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
-{
- struct list_head *ln;
- struct pci_bus *bus;
- struct pci_dev *dev;
- int idx;
- struct resource *r, *pr;
-
- /* Depth-First Search on bus tree */
- for (ln=bus_list->next; ln != bus_list; ln=ln->next) {
- bus = pci_bus_b(ln);
- if ((dev = bus->self)) {
- for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) {
- r = &dev->resource[idx];
- if (!r->start)
- continue;
- pr = pci_find_parent_resource(dev, r);
- if (!pr || request_resource(pr, r) < 0)
- printk(KERN_ERR "PCI: Cannot allocate resource region %d of bridge %s\n", idx, dev->slot_name);
- }
- }
- pcibios_allocate_bus_resources(&bus->children);
- }
-}
-
-static void __init pcibios_allocate_resources(int pass)
-{
- struct pci_dev *dev;
- int idx, disabled;
- u16 command;
- struct resource *r, *pr;
-
- pci_for_each_dev(dev) {
- pci_read_config_word(dev, PCI_COMMAND, &command);
- for(idx = 0; idx < 6; idx++) {
- r = &dev->resource[idx];
- if (r->parent) /* Already allocated */
- continue;
- if (!r->start) /* Address not assigned at all */
- continue;
- if (r->flags & IORESOURCE_IO)
- disabled = !(command & PCI_COMMAND_IO);
- else
- disabled = !(command & PCI_COMMAND_MEMORY);
- if (pass == disabled) {
- DBG("PCI: Resource %08lx-%08lx (f=%lx, d=%d, p=%d)\n",
- r->start, r->end, r->flags, disabled, pass);
- pr = pci_find_parent_resource(dev, r);
- if (!pr || request_resource(pr, r) < 0) {
- printk(KERN_ERR "PCI: Cannot allocate resource region %d of device %s\n", idx, dev->slot_name);
- /* We'll assign a new address later */
- r->end -= r->start;
- r->start = 0;
- }
- }
- }
- if (!pass) {
- r = &dev->resource[PCI_ROM_RESOURCE];
- if (r->flags & PCI_ROM_ADDRESS_ENABLE) {
- /* Turn the ROM off, leave the resource region, but keep it unregistered. */
- u32 reg;
- DBG("PCI: Switching off ROM of %s\n", dev->slot_name);
- r->flags &= ~PCI_ROM_ADDRESS_ENABLE;
- pci_read_config_dword(dev, dev->rom_base_reg, ®);
- pci_write_config_dword(dev, dev->rom_base_reg, reg & ~PCI_ROM_ADDRESS_ENABLE);
- }
- }
- }
-}
-
-static void __init pcibios_assign_resources(void)
-{
- struct pci_dev *dev;
- int idx;
- struct resource *r;
-
- pci_for_each_dev(dev) {
- int class = dev->class >> 8;
-
- /* Don't touch classless devices and host bridges */
- if (!class || class == PCI_CLASS_BRIDGE_HOST)
- continue;
-
- for(idx=0; idx<6; idx++) {
- r = &dev->resource[idx];
-
- /*
- * Don't touch IDE controllers and I/O ports of video cards!
- */
- if ((class == PCI_CLASS_STORAGE_IDE && idx < 4) ||
- (class == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO)))
- continue;
-
- /*
- * We shall assign a new address to this resource, either because
- * the BIOS forgot to do so or because we have decided the old
- * address was unusable for some reason.
- */
- if (!r->start && r->end)
- pci_assign_resource(dev, idx);
- }
-
- if (pci_probe & PCI_ASSIGN_ROMS) {
- r = &dev->resource[PCI_ROM_RESOURCE];
- r->end -= r->start;
- r->start = 0;
- if (r->end)
- pci_assign_resource(dev, PCI_ROM_RESOURCE);
- }
- }
-}
-
-void __init pcibios_set_cacheline_size(void)
-{
- struct cpuinfo_x86 *c = &boot_cpu_data;
-
- pci_cache_line_size = 32 >> 2;
- if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD)
- pci_cache_line_size = 64 >> 2; /* K7 & K8 */
- else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL)
- pci_cache_line_size = 128 >> 2; /* P4 */
-}
-
-void __init pcibios_resource_survey(void)
-{
- DBG("PCI: Allocating resources\n");
- pcibios_allocate_bus_resources(&pci_root_buses);
- pcibios_allocate_resources(0);
- pcibios_allocate_resources(1);
- pcibios_assign_resources();
-}
-
-int pcibios_enable_resources(struct pci_dev *dev, int mask)
-{
- u16 cmd, old_cmd;
- int idx;
- struct resource *r;
-
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
- old_cmd = cmd;
- for(idx=0; idx<6; idx++) {
- /* Only set up the requested stuff */
- if (!(mask & (1<<idx)))
- continue;
-
- r = &dev->resource[idx];
- if (!r->start && r->end) {
- printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", dev->slot_name);
- return -EINVAL;
- }
- if (r->flags & IORESOURCE_IO)
- cmd |= PCI_COMMAND_IO;
- if (r->flags & IORESOURCE_MEM)
- cmd |= PCI_COMMAND_MEMORY;
- }
- if (dev->resource[PCI_ROM_RESOURCE].start)
- cmd |= PCI_COMMAND_MEMORY;
- if (cmd != old_cmd) {
- printk("PCI: Enabling device %s (%04x -> %04x)\n", dev->slot_name, old_cmd, cmd);
- pci_write_config_word(dev, PCI_COMMAND, cmd);
- }
- return 0;
-}
-
-/*
- * If we set up a device for bus mastering, we need to check the latency
- * timer as certain crappy BIOSes forget to set it properly.
- */
-unsigned int pcibios_max_latency = 255;
-
-void pcibios_set_master(struct pci_dev *dev)
-{
- u8 lat;
- pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
- if (lat < 16)
- lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
- else if (lat > pcibios_max_latency)
- lat = pcibios_max_latency;
- else
- return;
- printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", dev->slot_name, lat);
- pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
-}
-
-#if 0
-int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
- enum pci_mmap_state mmap_state, int write_combine)
-{
- unsigned long prot;
-
- /* I/O space cannot be accessed via normal processor loads and
- * stores on this platform.
- */
- if (mmap_state == pci_mmap_io)
- return -EINVAL;
-
- /* Leave vm_pgoff as-is, the PCI space address is the physical
- * address on this platform.
- */
- vma->vm_flags |= (VM_SHM | VM_LOCKED | VM_IO);
-
- prot = pgprot_val(vma->vm_page_prot);
- if (boot_cpu_data.x86 > 3)
- prot |= _PAGE_PCD | _PAGE_PWT;
- vma->vm_page_prot = __pgprot(prot);
-
- /* Write-combine setting is ignored, it is changed via the mtrr
- * interfaces on this platform.
- */
- if (remap_page_range(vma->vm_start, vma->vm_pgoff << PAGE_SHIFT,
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot))
- return -EAGAIN;
-
- return 0;
-}
-#endif
+++ /dev/null
-/*
- * Low-Level PCI Access for i386 machines.
- *
- * (c) 1999 Martin Mares <mj@ucw.cz>
- */
-
-#undef DEBUG
-
-#ifdef DEBUG
-#define DBG(x...) printk(x)
-#else
-#define DBG(x...)
-#endif
-
-#define PCI_PROBE_BIOS 0x0001
-#define PCI_PROBE_CONF1 0x0002
-#define PCI_PROBE_CONF2 0x0004
-#define PCI_NO_SORT 0x0100
-#define PCI_BIOS_SORT 0x0200
-#define PCI_NO_CHECKS 0x0400
-#define PCI_ASSIGN_ROMS 0x1000
-#define PCI_BIOS_IRQ_SCAN 0x2000
-#define PCI_ASSIGN_ALL_BUSSES 0x4000
-
-extern unsigned int pci_probe;
-
-/* pci-i386.c */
-
-extern unsigned int pcibios_max_latency;
-extern u8 pci_cache_line_size;
-
-void pcibios_resource_survey(void);
-void pcibios_set_cacheline_size(void);
-int pcibios_enable_resources(struct pci_dev *, int);
-
-/* pci-pc.c */
-
-extern int pcibios_last_bus;
-extern struct pci_bus *pci_root_bus;
-extern struct pci_ops *pci_root_ops;
-
-/* pci-irq.c */
-
-struct irq_info {
- u8 bus, devfn; /* Bus, device and function */
- struct {
- u8 link; /* IRQ line ID, chipset dependent, 0=not routed */
- u16 bitmap; /* Available IRQs */
- } __attribute__((packed)) irq[4];
- u8 slot; /* Slot number, 0=onboard */
- u8 rfu;
-} __attribute__((packed));
-
-struct irq_routing_table {
- u32 signature; /* PIRQ_SIGNATURE should be here */
- u16 version; /* PIRQ_VERSION */
- u16 size; /* Table size in bytes */
- u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */
- u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */
- u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */
- u32 miniport_data; /* Crap */
- u8 rfu[11];
- u8 checksum; /* Modulo 256 checksum must give zero */
- struct irq_info slots[0];
-} __attribute__((packed));
-
-extern unsigned int pcibios_irq_mask;
-
-void pcibios_irq_init(void);
-void pcibios_fixup_irqs(void);
-void pcibios_enable_irq(struct pci_dev *dev);
+++ /dev/null
-/*
- * Low-Level PCI Support for PC -- Routing of Interrupts
- *
- * (c) 1999--2000 Martin Mares <mj@ucw.cz>
- */
-
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/kernel.h>
-#include <xen/pci.h>
-#include <xen/init.h>
-#include <xen/slab.h>
-#include <xen/interrupt.h>
-#include <xen/irq.h>
-
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/io_apic.h>
-
-#include "pci-i386.h"
-
-#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
-#define PIRQ_VERSION 0x0100
-
-int broken_hp_bios_irq9;
-
-static struct irq_routing_table *pirq_table;
-
-/*
- * Never use: 0, 1, 2 (timer, keyboard, and cascade)
- * Avoid using: 13, 14 and 15 (FP error and IDE).
- * Penalize: 3, 4, 6, 7, 12 (known ISA uses: serial, floppy, parallel and mouse)
- */
-unsigned int pcibios_irq_mask = 0xfff8;
-
-static int pirq_penalty[16] = {
- 1000000, 1000000, 1000000, 1000, 1000, 0, 1000, 1000,
- 0, 0, 0, 0, 1000, 100000, 100000, 100000
-};
-
-struct irq_router {
- char *name;
- u16 vendor, device;
- int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq);
- int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new);
-};
-
-struct irq_router_handler {
- u16 vendor;
- int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device);
-};
-
-/*
- * Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table.
- */
-
-static struct irq_routing_table * __init pirq_find_routing_table(void)
-{
- u8 *addr;
- struct irq_routing_table *rt;
- int i;
- u8 sum;
-
- for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) {
- rt = (struct irq_routing_table *) addr;
- if (rt->signature != PIRQ_SIGNATURE ||
- rt->version != PIRQ_VERSION ||
- rt->size % 16 ||
- rt->size < sizeof(struct irq_routing_table))
- continue;
- sum = 0;
- for(i=0; i<rt->size; i++)
- sum += addr[i];
- if (!sum) {
- DBG("PCI: Interrupt Routing Table found at 0x%p\n", rt);
- return rt;
- }
- }
- return NULL;
-}
-
-/*
- * If we have a IRQ routing table, use it to search for peer host
- * bridges. It's a gross hack, but since there are no other known
- * ways how to get a list of buses, we have to go this way.
- */
-
-static void __init pirq_peer_trick(void)
-{
- struct irq_routing_table *rt = pirq_table;
- u8 busmap[256];
- int i;
- struct irq_info *e;
-
- memset(busmap, 0, sizeof(busmap));
- for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) {
- e = &rt->slots[i];
-#ifdef DEBUG
- {
- int j;
- DBG("%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot);
- for(j=0; j<4; j++)
- DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap);
- DBG("\n");
- }
-#endif
- busmap[e->bus] = 1;
- }
- for(i=1; i<256; i++)
- /*
- * It might be a secondary bus, but in this case its parent is already
- * known (ascending bus order) and therefore pci_scan_bus returns immediately.
- */
- if (busmap[i] && pci_scan_bus(i, pci_root_bus->ops, NULL))
- printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i);
- pcibios_last_bus = -1;
-}
-
-/*
- * Code for querying and setting of IRQ routes on various interrupt routers.
- */
-
-void eisa_set_level_irq(unsigned int irq)
-{
- unsigned char mask = 1 << (irq & 7);
- unsigned int port = 0x4d0 + (irq >> 3);
- unsigned char val = inb(port);
-
- if (!(val & mask)) {
- DBG(" -> edge");
- outb(val | mask, port);
- }
-}
-
-/*
- * Common IRQ routing practice: nybbles in config space,
- * offset by some magic constant.
- */
-static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr)
-{
- u8 x;
- unsigned reg = offset + (nr >> 1);
-
- pci_read_config_byte(router, reg, &x);
- return (nr & 1) ? (x >> 4) : (x & 0xf);
-}
-
-static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val)
-{
- u8 x;
- unsigned reg = offset + (nr >> 1);
-
- pci_read_config_byte(router, reg, &x);
- x = (nr & 1) ? ((x & 0x0f) | (val << 4)) : ((x & 0xf0) | val);
- pci_write_config_byte(router, reg, x);
-}
-
-/*
- * ALI pirq entries are damn ugly, and completely undocumented.
- * This has been figured out from pirq tables, and it's not a pretty
- * picture.
- */
-static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- static unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
-
- return irqmap[read_config_nybble(router, 0x48, pirq-1)];
-}
-
-static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- static unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
- unsigned int val = irqmap[irq];
-
- if (val) {
- write_config_nybble(router, 0x48, pirq-1, val);
- return 1;
- }
- return 0;
-}
-
-/*
- * The Intel PIIX4 pirq rules are fairly simple: "pirq" is
- * just a pointer to the config space.
- */
-static int pirq_piix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- u8 x;
-
- pci_read_config_byte(router, pirq, &x);
- return (x < 16) ? x : 0;
-}
-
-static int pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- pci_write_config_byte(router, pirq, irq);
- return 1;
-}
-
-/*
- * The VIA pirq rules are nibble-based, like ALI,
- * but without the ugly irq number munging.
- * However, PIRQD is in the upper instead of lower nibble.
- */
-static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- return read_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq);
-}
-
-static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- write_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq, irq);
- return 1;
-}
-
-/*
- * ITE 8330G pirq rules are nibble-based
- * FIXME: pirqmap may be { 1, 0, 3, 2 },
- * 2+3 are both mapped to irq 9 on my system
- */
-static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- static unsigned char pirqmap[4] = { 1, 0, 2, 3 };
- return read_config_nybble(router,0x43, pirqmap[pirq-1]);
-}
-
-static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- static unsigned char pirqmap[4] = { 1, 0, 2, 3 };
- write_config_nybble(router, 0x43, pirqmap[pirq-1], irq);
- return 1;
-}
-
-/*
- * OPTI: high four bits are nibble pointer..
- * I wonder what the low bits do?
- */
-static int pirq_opti_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- return read_config_nybble(router, 0xb8, pirq >> 4);
-}
-
-static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- write_config_nybble(router, 0xb8, pirq >> 4, irq);
- return 1;
-}
-
-/*
- * Cyrix: nibble offset 0x5C
- */
-static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- return read_config_nybble(router, 0x5C, (pirq-1)^1);
-}
-
-static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- write_config_nybble(router, 0x5C, (pirq-1)^1, irq);
- return 1;
-}
-
-/*
- * PIRQ routing for SiS 85C503 router used in several SiS chipsets.
- * We have to deal with the following issues here:
- * - vendors have different ideas about the meaning of link values
- * - some onboard devices (integrated in the chipset) have special
- * links and are thus routed differently (i.e. not via PCI INTA-INTD)
- * - different revision of the router have a different layout for
- * the routing registers, particularly for the onchip devices
- *
- * For all routing registers the common thing is we have one byte
- * per routeable link which is defined as:
- * bit 7 IRQ mapping enabled (0) or disabled (1)
- * bits [6:4] reserved (sometimes used for onchip devices)
- * bits [3:0] IRQ to map to
- * allowed: 3-7, 9-12, 14-15
- * reserved: 0, 1, 2, 8, 13
- *
- * The config-space registers located at 0x41/0x42/0x43/0x44 are
- * always used to route the normal PCI INT A/B/C/D respectively.
- * Apparently there are systems implementing PCI routing table using
- * link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D.
- * We try our best to handle both link mappings.
- *
- * Currently (2003-05-21) it appears most SiS chipsets follow the
- * definition of routing registers from the SiS-5595 southbridge.
- * According to the SiS 5595 datasheets the revision id's of the
- * router (ISA-bridge) should be 0x01 or 0xb0.
- *
- * Furthermore we've also seen lspci dumps with revision 0x00 and 0xb1.
- * Looks like these are used in a number of SiS 5xx/6xx/7xx chipsets.
- * They seem to work with the current routing code. However there is
- * some concern because of the two USB-OHCI HCs (original SiS 5595
- * had only one). YMMV.
- *
- * Onchip routing for router rev-id 0x01/0xb0 and probably 0x00/0xb1:
- *
- * 0x61: IDEIRQ:
- * bits [6:5] must be written 01
- * bit 4 channel-select primary (0), secondary (1)
- *
- * 0x62: USBIRQ:
- * bit 6 OHCI function disabled (0), enabled (1)
- *
- * 0x6a: ACPI/SCI IRQ: bits 4-6 reserved
- *
- * 0x7e: Data Acq. Module IRQ - bits 4-6 reserved
- *
- * We support USBIRQ (in addition to INTA-INTD) and keep the
- * IDE, ACPI and DAQ routing untouched as set by the BIOS.
- *
- * Currently the only reported exception is the new SiS 65x chipset
- * which includes the SiS 69x southbridge. Here we have the 85C503
- * router revision 0x04 and there are changes in the register layout
- * mostly related to the different USB HCs with USB 2.0 support.
- *
- * Onchip routing for router rev-id 0x04 (try-and-error observation)
- *
- * 0x60/0x61/0x62/0x63: 1xEHCI and 3xOHCI (companion) USB-HCs
- * bit 6-4 are probably unused, not like 5595
- */
-
-#define PIRQ_SIS_IRQ_MASK 0x0f
-#define PIRQ_SIS_IRQ_DISABLE 0x80
-#define PIRQ_SIS_USB_ENABLE 0x40
-#define PIRQ_SIS_DETECT_REGISTER 0x40
-
-/* return value:
- * -1 on error
- * 0 for PCI INTA-INTD
- * 0 or enable bit mask to check or set for onchip functions
- */
-static inline int pirq_sis5595_onchip(int pirq, int *reg)
-{
- int ret = -1;
-
- *reg = pirq;
- switch(pirq) {
- case 0x01:
- case 0x02:
- case 0x03:
- case 0x04:
- *reg += 0x40;
- case 0x41:
- case 0x42:
- case 0x43:
- case 0x44:
- ret = 0;
- break;
-
- case 0x62:
- ret = PIRQ_SIS_USB_ENABLE; /* documented for 5595 */
- break;
-
- case 0x61:
- case 0x6a:
- case 0x7e:
- printk(KERN_INFO "SiS pirq: IDE/ACPI/DAQ mapping not implemented: (%u)\n",
- (unsigned) pirq);
- /* fall thru */
- default:
- printk(KERN_INFO "SiS router unknown request: (%u)\n",
- (unsigned) pirq);
- break;
- }
- return ret;
-}
-
-/* return value:
- * -1 on error
- * 0 for PCI INTA-INTD
- * 0 or enable bit mask to check or set for onchip functions
- */
-static inline int pirq_sis96x_onchip(int pirq, int *reg)
-{
- int ret = -1;
-
- *reg = pirq;
- switch(pirq) {
- case 0x01:
- case 0x02:
- case 0x03:
- case 0x04:
- *reg += 0x40;
- case 0x41:
- case 0x42:
- case 0x43:
- case 0x44:
- case 0x60:
- case 0x61:
- case 0x62:
- case 0x63:
- ret = 0;
- break;
-
- default:
- printk(KERN_INFO "SiS router unknown request: (%u)\n",
- (unsigned) pirq);
- break;
- }
- return ret;
-}
-
-
-static int pirq_sis5595_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- u8 x;
- int reg, check;
-
- check = pirq_sis5595_onchip(pirq, ®);
- if (check < 0)
- return 0;
-
- pci_read_config_byte(router, reg, &x);
- if (check != 0 && !(x & check))
- return 0;
-
- return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK);
-}
-
-static int pirq_sis96x_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- u8 x;
- int reg, check;
-
- check = pirq_sis96x_onchip(pirq, ®);
- if (check < 0)
- return 0;
-
- pci_read_config_byte(router, reg, &x);
- if (check != 0 && !(x & check))
- return 0;
-
- return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK);
-}
-
-static int pirq_sis5595_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- u8 x;
- int reg, set;
-
- set = pirq_sis5595_onchip(pirq, ®);
- if (set < 0)
- return 0;
-
- x = (irq & PIRQ_SIS_IRQ_MASK);
- if (x == 0)
- x = PIRQ_SIS_IRQ_DISABLE;
- else
- x |= set;
-
- pci_write_config_byte(router, reg, x);
-
- return 1;
-}
-
-static int pirq_sis96x_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- u8 x;
- int reg, set;
-
- set = pirq_sis96x_onchip(pirq, ®);
- if (set < 0)
- return 0;
-
- x = (irq & PIRQ_SIS_IRQ_MASK);
- if (x == 0)
- x = PIRQ_SIS_IRQ_DISABLE;
- else
- x |= set;
-
- pci_write_config_byte(router, reg, x);
-
- return 1;
-}
-
-
-/*
- * VLSI: nibble offset 0x74 - educated guess due to routing table and
- * config space of VLSI 82C534 PCI-bridge/router (1004:0102)
- * Tested on HP OmniBook 800 covering PIRQ 1, 2, 4, 8 for onboard
- * devices, PIRQ 3 for non-pci(!) soundchip and (untested) PIRQ 6
- * for the busbridge to the docking station.
- */
-
-static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- if (pirq > 8) {
- printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
- return 0;
- }
- return read_config_nybble(router, 0x74, pirq-1);
-}
-
-static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- if (pirq > 8) {
- printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
- return 0;
- }
- write_config_nybble(router, 0x74, pirq-1, irq);
- return 1;
-}
-
-/*
- * ServerWorks: PCI interrupts mapped to system IRQ lines through Index
- * and Redirect I/O registers (0x0c00 and 0x0c01). The Index register
- * format is (PCIIRQ## | 0x10), e.g.: PCIIRQ10=0x1a. The Redirect
- * register is a straight binary coding of desired PIC IRQ (low nibble).
- *
- * The 'link' value in the PIRQ table is already in the correct format
- * for the Index register. There are some special index values:
- * 0x00 for ACPI (SCI), 0x01 for USB, 0x02 for IDE0, 0x04 for IDE1,
- * and 0x03 for SMBus.
- */
-static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- outb_p(pirq, 0xc00);
- return inb(0xc01) & 0xf;
-}
-
-static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- outb_p(pirq, 0xc00);
- outb_p(irq, 0xc01);
- return 1;
-}
-
-/* Support for AMD756 PCI IRQ Routing
- * Jhon H. Caicedo <jhcaiced@osso.org.co>
- * Jun/21/2001 0.2.0 Release, fixed to use "nybble" functions... (jhcaiced)
- * Jun/19/2001 Alpha Release 0.1.0 (jhcaiced)
- * The AMD756 pirq rules are nibble-based
- * offset 0x56 0-3 PIRQA 4-7 PIRQB
- * offset 0x57 0-3 PIRQC 4-7 PIRQD
- */
-static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- u8 irq;
- irq = 0;
- if (pirq <= 4)
- {
- irq = read_config_nybble(router, 0x56, pirq - 1);
- }
- printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n",
- dev->vendor, dev->device, pirq, irq);
- return irq;
-}
-
-static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n",
- dev->vendor, dev->device, pirq, irq);
- if (pirq <= 4)
- {
- write_config_nybble(router, 0x56, pirq - 1, irq);
- }
- return 1;
-}
-
-#ifdef CONFIG_PCI_BIOS
-
-static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- struct pci_dev *bridge;
- int pin = pci_get_interrupt_pin(dev, &bridge);
- return pcibios_set_irq_routing(bridge, pin, irq);
-}
-
-#endif
-
-
-static __init int intel_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- /* We must not touch 440GX even if we have tables. 440GX has
- different IRQ routing weirdness */
- if(pci_find_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0, NULL) ||
- pci_find_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2, NULL))
- return 0;
- switch(device)
- {
- case PCI_DEVICE_ID_INTEL_82371FB_0:
- case PCI_DEVICE_ID_INTEL_82371SB_0:
- case PCI_DEVICE_ID_INTEL_82371AB_0:
- case PCI_DEVICE_ID_INTEL_82371MX:
- case PCI_DEVICE_ID_INTEL_82443MX_0:
- case PCI_DEVICE_ID_INTEL_82801AA_0:
- case PCI_DEVICE_ID_INTEL_82801AB_0:
- case PCI_DEVICE_ID_INTEL_82801BA_0:
- case PCI_DEVICE_ID_INTEL_82801BA_10:
- case PCI_DEVICE_ID_INTEL_82801CA_0:
- case PCI_DEVICE_ID_INTEL_82801CA_12:
- case PCI_DEVICE_ID_INTEL_82801DB_0:
- case PCI_DEVICE_ID_INTEL_82801E_0:
- case PCI_DEVICE_ID_INTEL_82801EB_0:
- case PCI_DEVICE_ID_INTEL_ESB_0:
- r->name = "PIIX/ICH";
- r->get = pirq_piix_get;
- r->set = pirq_piix_set;
- return 1;
- }
- return 0;
-}
-
-static __init int via_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- /* FIXME: We should move some of the quirk fixup stuff here */
- switch(device)
- {
- case PCI_DEVICE_ID_VIA_82C586_0:
- case PCI_DEVICE_ID_VIA_82C596:
- case PCI_DEVICE_ID_VIA_82C686:
- case PCI_DEVICE_ID_VIA_8231:
- /* FIXME: add new ones for 8233/5 */
- r->name = "VIA";
- r->get = pirq_via_get;
- r->set = pirq_via_set;
- return 1;
- }
- return 0;
-}
-
-static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- switch(device)
- {
- case PCI_DEVICE_ID_VLSI_82C534:
- r->name = "VLSI 82C534";
- r->get = pirq_vlsi_get;
- r->set = pirq_vlsi_set;
- return 1;
- }
- return 0;
-}
-
-
-static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- switch(device)
- {
- case PCI_DEVICE_ID_SERVERWORKS_OSB4:
- case PCI_DEVICE_ID_SERVERWORKS_CSB5:
- r->name = "ServerWorks";
- r->get = pirq_serverworks_get;
- r->set = pirq_serverworks_set;
- return 1;
- }
- return 0;
-}
-
-static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- u8 reg;
- u16 devid;
-
- if (device != PCI_DEVICE_ID_SI_503)
- return 0;
-
- /*
- * In case of SiS south bridge, we need to detect the two
- * kinds of routing tables we have seen so far (5595 and 96x).
- * Since the maintain the same device ID, we need to do poke
- * the PCI configuration space to find the router type we are
- * dealing with.
- */
-
- /*
- * Factoid: writing bit6 of register 0x40 of the router config space
- * will make the SB to show up 0x096x inside the device id. Note,
- * we need to restore register 0x40 after the device id poke.
- */
-
- pci_read_config_byte(router, PIRQ_SIS_DETECT_REGISTER, ®);
- pci_write_config_byte(router, PIRQ_SIS_DETECT_REGISTER, reg | (1 << 6));
- pci_read_config_word(router, PCI_DEVICE_ID, &devid);
- pci_write_config_byte(router, PIRQ_SIS_DETECT_REGISTER, reg);
-
- if ((devid & 0xfff0) == 0x0960) {
- r->name = "SIS96x";
- r->get = pirq_sis96x_get;
- r->set = pirq_sis96x_set;
- DBG("PCI: Detecting SiS router at %02x:%02x : SiS096x detected\n",
- rt->rtr_bus, rt->rtr_devfn);
- } else {
- r->name = "SIS5595";
- r->get = pirq_sis5595_get;
- r->set = pirq_sis5595_set;
- DBG("PCI: Detecting SiS router at %02x:%02x : SiS5595 detected\n",
- rt->rtr_bus, rt->rtr_devfn);
- }
- return 1;
-}
-
-static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- switch(device)
- {
- case PCI_DEVICE_ID_CYRIX_5520:
- r->name = "NatSemi";
- r->get = pirq_cyrix_get;
- r->set = pirq_cyrix_set;
- return 1;
- }
- return 0;
-}
-
-static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- switch(device)
- {
- case PCI_DEVICE_ID_OPTI_82C700:
- r->name = "OPTI";
- r->get = pirq_opti_get;
- r->set = pirq_opti_set;
- return 1;
- }
- return 0;
-}
-
-static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- switch(device)
- {
- case PCI_DEVICE_ID_ITE_IT8330G_0:
- r->name = "ITE";
- r->get = pirq_ite_get;
- r->set = pirq_ite_set;
- return 1;
- }
- return 0;
-}
-
-static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- switch(device)
- {
- case PCI_DEVICE_ID_AL_M1533:
- r->name = "ALI";
- r->get = pirq_ali_get;
- r->set = pirq_ali_set;
- return 1;
- /* Should add 156x some day */
- }
- return 0;
-}
-
-static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- switch(device)
- {
- case PCI_DEVICE_ID_AMD_VIPER_740B:
- r->name = "AMD756";
- break;
- case PCI_DEVICE_ID_AMD_VIPER_7413:
- r->name = "AMD766";
- break;
- case PCI_DEVICE_ID_AMD_VIPER_7443:
- r->name = "AMD768";
- break;
- default:
- return 0;
- }
- r->get = pirq_amd756_get;
- r->set = pirq_amd756_set;
- return 1;
-}
-
-static __initdata struct irq_router_handler pirq_routers[] = {
- { PCI_VENDOR_ID_INTEL, intel_router_probe },
- { PCI_VENDOR_ID_AL, ali_router_probe },
- { PCI_VENDOR_ID_ITE, ite_router_probe },
- { PCI_VENDOR_ID_VIA, via_router_probe },
- { PCI_VENDOR_ID_OPTI, opti_router_probe },
- { PCI_VENDOR_ID_SI, sis_router_probe },
- { PCI_VENDOR_ID_CYRIX, cyrix_router_probe },
- { PCI_VENDOR_ID_VLSI, vlsi_router_probe },
- { PCI_VENDOR_ID_SERVERWORKS, serverworks_router_probe },
- { PCI_VENDOR_ID_AMD, amd_router_probe },
- /* Someone with docs needs to add the ATI Radeon IGP */
- { 0, NULL }
-};
-static struct irq_router pirq_router;
-static struct pci_dev *pirq_router_dev;
-
-/*
- * FIXME: should we have an option to say "generic for
- * chipset" ?
- */
-
-static void __init pirq_find_router(struct irq_router *r)
-{
- struct irq_routing_table *rt = pirq_table;
- struct irq_router_handler *h;
-
-#ifdef CONFIG_PCI_BIOS
- if (!rt->signature) {
- printk(KERN_INFO "PCI: Using BIOS for IRQ routing\n");
- r->set = pirq_bios_set;
- r->name = "BIOS";
- return;
- }
-#endif
-
- /* Default unless a driver reloads it */
- r->name = "default";
- r->get = NULL;
- r->set = NULL;
-
- DBG("PCI: Attempting to find IRQ router for %04x:%04x\n",
- rt->rtr_vendor, rt->rtr_device);
-
- pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn);
- if (!pirq_router_dev) {
- DBG("PCI: Interrupt router not found at %02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
- return;
- }
-
- for( h = pirq_routers; h->vendor; h++) {
- /* First look for a router match */
- if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device))
- break;
- /* Fall back to a device match */
- if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device))
- break;
- }
- printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n",
- pirq_router.name,
- pirq_router_dev->vendor,
- pirq_router_dev->device,
- pirq_router_dev->slot_name);
-}
-
-static struct irq_info *pirq_get_info(struct pci_dev *dev)
-{
- struct irq_routing_table *rt = pirq_table;
- int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info);
- struct irq_info *info;
-
- for (info = rt->slots; entries--; info++)
- if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
- return info;
- return NULL;
-}
-
-static void pcibios_test_irq_handler(int irq, void *dev_id, struct pt_regs *regs)
-{
-}
-
-static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
-{
- u8 pin;
- struct irq_info *info;
- int i, pirq, newirq;
- int irq = 0;
- u32 mask;
- struct irq_router *r = &pirq_router;
- struct pci_dev *dev2;
- char *msg = NULL;
-
- if (!pirq_table)
- return 0;
-
- /* Find IRQ routing entry */
- pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
- if (!pin) {
- DBG(" -> no interrupt pin\n");
- return 0;
- }
- pin = pin - 1;
-
- DBG("IRQ for %s:%d", dev->slot_name, pin);
- info = pirq_get_info(dev);
- if (!info) {
- DBG(" -> not found in routing table\n");
- return 0;
- }
- pirq = info->irq[pin].link;
- mask = info->irq[pin].bitmap;
- if (!pirq) {
- DBG(" -> not routed\n");
- return 0;
- }
- DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs);
- mask &= pcibios_irq_mask;
-
- /* Work around broken HP Pavilion Notebooks which assign USB to
- IRQ 9 even though it is actually wired to IRQ 11 */
-
- if (broken_hp_bios_irq9 && pirq == 0x59 && dev->irq == 9) {
- dev->irq = 11;
- pci_write_config_byte(dev, PCI_INTERRUPT_LINE, 11);
- r->set(pirq_router_dev, dev, pirq, 11);
- }
-
- /*
- * Find the best IRQ to assign: use the one
- * reported by the device if possible.
- */
- newirq = dev->irq;
- if (!newirq && assign) {
- for (i = 0; i < 16; i++) {
- if (!(mask & (1 << i)))
- continue;
- if (pirq_penalty[i] < pirq_penalty[newirq] &&
- !request_irq(i, pcibios_test_irq_handler, SA_SHIRQ, "pci-test", dev)) {
- free_irq(i, dev);
- newirq = i;
- }
- }
- }
- DBG(" -> newirq=%d", newirq);
-
- /* Check if it is hardcoded */
- if ((pirq & 0xf0) == 0xf0) {
- irq = pirq & 0xf;
- DBG(" -> hardcoded IRQ %d\n", irq);
- msg = "Hardcoded";
- } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq))) {
- DBG(" -> got IRQ %d\n", irq);
- msg = "Found";
- } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
- DBG(" -> assigning IRQ %d", newirq);
- if (r->set(pirq_router_dev, dev, pirq, newirq)) {
- eisa_set_level_irq(newirq);
- DBG(" ... OK\n");
- msg = "Assigned";
- irq = newirq;
- }
- }
-
- if (!irq) {
- DBG(" ... failed\n");
- if (newirq && mask == (1 << newirq)) {
- msg = "Guessed";
- irq = newirq;
- } else
- return 0;
- }
- printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, dev->slot_name);
-
- /* Update IRQ for all devices with the same pirq value */
- pci_for_each_dev(dev2) {
- pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin);
- if (!pin)
- continue;
- pin--;
- info = pirq_get_info(dev2);
- if (!info)
- continue;
- if (info->irq[pin].link == pirq) {
- /* We refuse to override the dev->irq information. Give a warning! */
- if (dev2->irq && dev2->irq != irq) {
- printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n",
- dev2->slot_name, dev2->irq, irq);
- continue;
- }
- dev2->irq = irq;
- pirq_penalty[irq]++;
- if (dev != dev2)
- printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, dev2->slot_name);
- }
- }
- return 1;
-}
-
-void __init pcibios_irq_init(void)
-{
- DBG("PCI: IRQ init\n");
- pirq_table = pirq_find_routing_table();
-#ifdef CONFIG_PCI_BIOS
- if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN))
- pirq_table = pcibios_get_irq_routing_table();
-#endif
- if (pirq_table) {
- pirq_peer_trick();
- pirq_find_router(&pirq_router);
- if (pirq_table->exclusive_irqs) {
- int i;
- for (i=0; i<16; i++)
- if (!(pirq_table->exclusive_irqs & (1 << i)))
- pirq_penalty[i] += 100;
- }
- /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */
- if (io_apic_assign_pci_irqs)
- pirq_table = NULL;
- }
-}
-
-void __init pcibios_fixup_irqs(void)
-{
- struct pci_dev *dev;
- u8 pin;
-
- DBG("PCI: IRQ fixup\n");
- pci_for_each_dev(dev) {
- /*
- * If the BIOS has set an out of range IRQ number, just ignore it.
- * Also keep track of which IRQ's are already in use.
- */
- if (dev->irq >= 16) {
- DBG("%s: ignoring bogus IRQ %d\n", dev->slot_name, dev->irq);
- dev->irq = 0;
- }
- /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */
- if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000)
- pirq_penalty[dev->irq] = 0;
- pirq_penalty[dev->irq]++;
- }
-
- pci_for_each_dev(dev) {
- pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
-#ifdef CONFIG_X86_IO_APIC
- /*
- * Recalculate IRQ numbers if we use the I/O APIC.
- */
- if (io_apic_assign_pci_irqs)
- {
- int irq;
-
- if (pin) {
- pin--; /* interrupt pins are numbered starting from 1 */
- irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
- /*
- * Busses behind bridges are typically not listed in the MP-table.
- * In this case we have to look up the IRQ based on the parent bus,
- * parent slot, and pin number. The SMP code detects such bridged
- * busses itself so we should get into this branch reliably.
- */
- if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
- struct pci_dev * bridge = dev->bus->self;
-
- pin = (pin + PCI_SLOT(dev->devfn)) % 4;
- irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
- PCI_SLOT(bridge->devfn), pin);
- if (irq >= 0)
- printk(KERN_WARNING "PCI: using PPB(B%d,I%d,P%d) to get irq %d\n",
- bridge->bus->number, PCI_SLOT(bridge->devfn), pin, irq);
- }
- if (irq >= 0) {
- printk(KERN_INFO "PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %d\n",
- dev->bus->number, PCI_SLOT(dev->devfn), pin, irq);
- dev->irq = irq;
- }
- }
- }
-#endif
- /*
- * Still no IRQ? Try to lookup one...
- */
- if (pin && !dev->irq)
- pcibios_lookup_irq(dev, 0);
- }
-}
-
-void pcibios_penalize_isa_irq(int irq)
-{
- /*
- * If any ISAPnP device reports an IRQ in its list of possible
- * IRQ's, we try to avoid assigning it to PCI devices.
- */
- pirq_penalty[irq] += 100;
-}
-
-void pcibios_enable_irq(struct pci_dev *dev)
-{
- u8 pin;
- extern int interrupt_line_quirk;
-
- pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
- if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
- char *msg;
-
- /* With IDE legacy devices the IRQ lookup failure is not a problem.. */
- if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5))
- return;
-
- if (io_apic_assign_pci_irqs)
- msg = " Probably buggy MP table.";
- else if (pci_probe & PCI_BIOS_IRQ_SCAN)
- msg = "";
- else
- msg = " Please try using pci=biosirq.";
- printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
- 'A' + pin - 1, dev->slot_name, msg);
- }
- /* VIA bridges use interrupt line for apic/pci steering across
- the V-Link */
- else if (interrupt_line_quirk)
- pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
-
-}
+++ /dev/null
-/*
- * Low-Level PCI Support for PC
- *
- * (c) 1999--2000 Martin Mares <mj@ucw.cz>
- */
-
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/kernel.h>
-#include <xen/sched.h>
-#include <xen/pci.h>
-#include <xen/init.h>
-#include <xen/ioport.h>
-#include <xen/acpi.h>
-
-/*#include <asm/segment.h>*/
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/smpboot.h>
-
-#include "pci-i386.h"
-
-extern int numnodes;
-#define __KERNEL_CS __HYPERVISOR_CS
-#define __KERNEL_DS __HYPERVISOR_DS
-
-unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2;
-
-int pcibios_last_bus = -1;
-struct pci_bus *pci_root_bus = NULL;
-struct pci_ops *pci_root_ops = NULL;
-
-int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value) = NULL;
-int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value) = NULL;
-
-static int pci_using_acpi_prt = 0;
-
-#ifdef CONFIG_MULTIQUAD
-#define BUS2QUAD(global) (mp_bus_id_to_node[global])
-#define BUS2LOCAL(global) (mp_bus_id_to_local[global])
-#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
-#else
-#define BUS2QUAD(global) (0)
-#define BUS2LOCAL(global) (global)
-#define QUADLOCAL2BUS(quad,local) (local)
-#endif
-
-/*
- * This interrupt-safe spinlock protects all accesses to PCI
- * configuration space.
- */
-static spinlock_t pci_config_lock = SPIN_LOCK_UNLOCKED;
-
-
-/*
- * Functions for accessing PCI configuration space with type 1 accesses
- */
-
-#ifdef CONFIG_PCI_DIRECT
-
-#ifdef CONFIG_MULTIQUAD
-#define PCI_CONF1_ADDRESS(bus, dev, fn, reg) \
- (0x80000000 | (BUS2LOCAL(bus) << 16) | (dev << 11) | (fn << 8) | (reg & ~3))
-
-static int pci_conf1_mq_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) /* CONFIG_MULTIQUAD */
-{
- unsigned long flags;
-
- if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
- return -EINVAL;
-
- spin_lock_irqsave(&pci_config_lock, flags);
-
- outl_quad(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8, BUS2QUAD(bus));
-
- switch (len) {
- case 1:
- *value = inb_quad(0xCFC + (reg & 3), BUS2QUAD(bus));
- break;
- case 2:
- *value = inw_quad(0xCFC + (reg & 2), BUS2QUAD(bus));
- break;
- case 4:
- *value = inl_quad(0xCFC, BUS2QUAD(bus));
- break;
- }
-
- spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return 0;
-}
-
-static int pci_conf1_mq_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) /* CONFIG_MULTIQUAD */
-{
- unsigned long flags;
-
- if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
- return -EINVAL;
-
- spin_lock_irqsave(&pci_config_lock, flags);
-
- outl_quad(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8, BUS2QUAD(bus));
-
- switch (len) {
- case 1:
- outb_quad((u8)value, 0xCFC + (reg & 3), BUS2QUAD(bus));
- break;
- case 2:
- outw_quad((u16)value, 0xCFC + (reg & 2), BUS2QUAD(bus));
- break;
- case 4:
- outl_quad((u32)value, 0xCFC, BUS2QUAD(bus));
- break;
- }
-
- spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return 0;
-}
-
-static int pci_conf1_read_mq_config_byte(struct pci_dev *dev, int where, u8 *value)
-{
- int result;
- u32 data;
-
- result = pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 1, &data);
-
- *value = (u8)data;
-
- return result;
-}
-
-static int pci_conf1_read_mq_config_word(struct pci_dev *dev, int where, u16 *value)
-{
- int result;
- u32 data;
-
- result = pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 2, &data);
-
- *value = (u16)data;
-
- return result;
-}
-
-static int pci_conf1_read_mq_config_dword(struct pci_dev *dev, int where, u32 *value)
-{
- if (!value)
- return -EINVAL;
-
- return pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 4, value);
-}
-
-static int pci_conf1_write_mq_config_byte(struct pci_dev *dev, int where, u8 value)
-{
- return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 1, value);
-}
-
-static int pci_conf1_write_mq_config_word(struct pci_dev *dev, int where, u16 value)
-{
- return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 2, value);
-}
-
-static int pci_conf1_write_mq_config_dword(struct pci_dev *dev, int where, u32 value)
-{
- return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 4, value);
-}
-
-static struct pci_ops pci_direct_mq_conf1 = {
- pci_conf1_read_mq_config_byte,
- pci_conf1_read_mq_config_word,
- pci_conf1_read_mq_config_dword,
- pci_conf1_write_mq_config_byte,
- pci_conf1_write_mq_config_word,
- pci_conf1_write_mq_config_dword
-};
-
-#endif /* !CONFIG_MULTIQUAD */
-#define PCI_CONF1_ADDRESS(bus, dev, fn, reg) \
- (0x80000000 | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3))
-
-static int pci_conf1_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) /* !CONFIG_MULTIQUAD */
-{
- unsigned long flags;
-
- if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
- return -EINVAL;
-
- spin_lock_irqsave(&pci_config_lock, flags);
-
- outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8);
-
- switch (len) {
- case 1:
- *value = inb(0xCFC + (reg & 3));
- break;
- case 2:
- *value = inw(0xCFC + (reg & 2));
- break;
- case 4:
- *value = inl(0xCFC);
- break;
- }
-
- spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return 0;
-}
-
-static int pci_conf1_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) /* !CONFIG_MULTIQUAD */
-{
- unsigned long flags;
-
- if ((bus > 255 || dev > 31 || fn > 7 || reg > 255))
- return -EINVAL;
-
- spin_lock_irqsave(&pci_config_lock, flags);
-
- outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8);
-
- switch (len) {
- case 1:
- outb((u8)value, 0xCFC + (reg & 3));
- break;
- case 2:
- outw((u16)value, 0xCFC + (reg & 2));
- break;
- case 4:
- outl((u32)value, 0xCFC);
- break;
- }
-
- spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return 0;
-}
-
-#undef PCI_CONF1_ADDRESS
-
-static int pci_conf1_read_config_byte(struct pci_dev *dev, int where, u8 *value)
-{
- int result;
- u32 data;
-
- result = pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 1, &data);
-
- *value = (u8)data;
-
- return result;
-}
-
-static int pci_conf1_read_config_word(struct pci_dev *dev, int where, u16 *value)
-{
- int result;
- u32 data;
-
- result = pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 2, &data);
-
- *value = (u16)data;
-
- return result;
-}
-
-static int pci_conf1_read_config_dword(struct pci_dev *dev, int where, u32 *value)
-{
- return pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 4, value);
-}
-
-static int pci_conf1_write_config_byte(struct pci_dev *dev, int where, u8 value)
-{
- return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 1, value);
-}
-
-static int pci_conf1_write_config_word(struct pci_dev *dev, int where, u16 value)
-{
- return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 2, value);
-}
-
-static int pci_conf1_write_config_dword(struct pci_dev *dev, int where, u32 value)
-{
- return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 4, value);
-}
-
-static struct pci_ops pci_direct_conf1 = {
- pci_conf1_read_config_byte,
- pci_conf1_read_config_word,
- pci_conf1_read_config_dword,
- pci_conf1_write_config_byte,
- pci_conf1_write_config_word,
- pci_conf1_write_config_dword
-};
-
-
-/*
- * Functions for accessing PCI configuration space with type 2 accesses
- */
-
-#define PCI_CONF2_ADDRESS(dev, reg) (u16)(0xC000 | (dev << 8) | reg)
-
-static int pci_conf2_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value)
-{
- unsigned long flags;
-
- if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
- return -EINVAL;
-
- if (dev & 0x10)
- return PCIBIOS_DEVICE_NOT_FOUND;
-
- spin_lock_irqsave(&pci_config_lock, flags);
-
- outb((u8)(0xF0 | (fn << 1)), 0xCF8);
- outb((u8)bus, 0xCFA);
-
- switch (len) {
- case 1:
- *value = inb(PCI_CONF2_ADDRESS(dev, reg));
- break;
- case 2:
- *value = inw(PCI_CONF2_ADDRESS(dev, reg));
- break;
- case 4:
- *value = inl(PCI_CONF2_ADDRESS(dev, reg));
- break;
- }
-
- outb (0, 0xCF8);
-
- spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return 0;
-}
-
-static int pci_conf2_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value)
-{
- unsigned long flags;
-
- if ((bus > 255 || dev > 31 || fn > 7 || reg > 255))
- return -EINVAL;
-
- if (dev & 0x10)
- return PCIBIOS_DEVICE_NOT_FOUND;
-
- spin_lock_irqsave(&pci_config_lock, flags);
-
- outb((u8)(0xF0 | (fn << 1)), 0xCF8);
- outb((u8)bus, 0xCFA);
-
- switch (len) {
- case 1:
- outb ((u8)value, PCI_CONF2_ADDRESS(dev, reg));
- break;
- case 2:
- outw ((u16)value, PCI_CONF2_ADDRESS(dev, reg));
- break;
- case 4:
- outl ((u32)value, PCI_CONF2_ADDRESS(dev, reg));
- break;
- }
-
- outb (0, 0xCF8);
-
- spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return 0;
-}
-
-#undef PCI_CONF2_ADDRESS
-
-static int pci_conf2_read_config_byte(struct pci_dev *dev, int where, u8 *value)
-{
- int result;
- u32 data;
- result = pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 1, &data);
- *value = (u8)data;
- return result;
-}
-
-static int pci_conf2_read_config_word(struct pci_dev *dev, int where, u16 *value)
-{
- int result;
- u32 data;
- result = pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 2, &data);
- *value = (u16)data;
- return result;
-}
-
-static int pci_conf2_read_config_dword(struct pci_dev *dev, int where, u32 *value)
-{
- return pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 4, value);
-}
-
-static int pci_conf2_write_config_byte(struct pci_dev *dev, int where, u8 value)
-{
- return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 1, value);
-}
-
-static int pci_conf2_write_config_word(struct pci_dev *dev, int where, u16 value)
-{
- return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 2, value);
-}
-
-static int pci_conf2_write_config_dword(struct pci_dev *dev, int where, u32 value)
-{
- return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 4, value);
-}
-
-static struct pci_ops pci_direct_conf2 = {
- pci_conf2_read_config_byte,
- pci_conf2_read_config_word,
- pci_conf2_read_config_dword,
- pci_conf2_write_config_byte,
- pci_conf2_write_config_word,
- pci_conf2_write_config_dword
-};
-
-
-/*
- * Before we decide to use direct hardware access mechanisms, we try to do some
- * trivial checks to ensure it at least _seems_ to be working -- we just test
- * whether bus 00 contains a host bridge (this is similar to checking
- * techniques used in XFree86, but ours should be more reliable since we
- * attempt to make use of direct access hints provided by the PCI BIOS).
- *
- * This should be close to trivial, but it isn't, because there are buggy
- * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID.
- */
-static int __devinit pci_sanity_check(struct pci_ops *o)
-{
- u16 x;
- struct pci_bus bus; /* Fake bus and device */
- struct pci_dev dev;
-
- if (pci_probe & PCI_NO_CHECKS)
- return 1;
- bus.number = 0;
- dev.bus = &bus;
- for(dev.devfn=0; dev.devfn < 0x100; dev.devfn++)
- if ((!o->read_word(&dev, PCI_CLASS_DEVICE, &x) &&
- (x == PCI_CLASS_BRIDGE_HOST || x == PCI_CLASS_DISPLAY_VGA)) ||
- (!o->read_word(&dev, PCI_VENDOR_ID, &x) &&
- (x == PCI_VENDOR_ID_INTEL || x == PCI_VENDOR_ID_COMPAQ)))
- return 1;
- DBG("PCI: Sanity check failed\n");
- return 0;
-}
-
-static struct pci_ops * __devinit pci_check_direct(void)
-{
- unsigned int tmp;
- unsigned long flags;
-
- __save_flags(flags); __cli();
-
- /*
- * Check if configuration type 1 works.
- */
- if (pci_probe & PCI_PROBE_CONF1) {
- outb (0x01, 0xCFB);
- tmp = inl (0xCF8);
- outl (0x80000000, 0xCF8);
- if (inl (0xCF8) == 0x80000000 &&
- pci_sanity_check(&pci_direct_conf1)) {
- outl (tmp, 0xCF8);
- __restore_flags(flags);
- printk(KERN_INFO "PCI: Using configuration type 1\n");
- request_region(0xCF8, 8, "PCI conf1");
-
-#ifdef CONFIG_MULTIQUAD
- /* Multi-Quad has an extended PCI Conf1 */
- if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
- return &pci_direct_mq_conf1;
-#endif
- return &pci_direct_conf1;
- }
- outl (tmp, 0xCF8);
- }
-
- /*
- * Check if configuration type 2 works.
- */
- if (pci_probe & PCI_PROBE_CONF2) {
- outb (0x00, 0xCFB);
- outb (0x00, 0xCF8);
- outb (0x00, 0xCFA);
- if (inb (0xCF8) == 0x00 && inb (0xCFA) == 0x00 &&
- pci_sanity_check(&pci_direct_conf2)) {
- __restore_flags(flags);
- printk(KERN_INFO "PCI: Using configuration type 2\n");
- request_region(0xCF8, 4, "PCI conf2");
- return &pci_direct_conf2;
- }
- }
-
- __restore_flags(flags);
- return NULL;
-}
-
-#endif
-
-/*
- * BIOS32 and PCI BIOS handling.
- */
-
-#ifdef CONFIG_PCI_BIOS
-
-#define PCIBIOS_PCI_FUNCTION_ID 0xb1XX
-#define PCIBIOS_PCI_BIOS_PRESENT 0xb101
-#define PCIBIOS_FIND_PCI_DEVICE 0xb102
-#define PCIBIOS_FIND_PCI_CLASS_CODE 0xb103
-#define PCIBIOS_GENERATE_SPECIAL_CYCLE 0xb106
-#define PCIBIOS_READ_CONFIG_BYTE 0xb108
-#define PCIBIOS_READ_CONFIG_WORD 0xb109
-#define PCIBIOS_READ_CONFIG_DWORD 0xb10a
-#define PCIBIOS_WRITE_CONFIG_BYTE 0xb10b
-#define PCIBIOS_WRITE_CONFIG_WORD 0xb10c
-#define PCIBIOS_WRITE_CONFIG_DWORD 0xb10d
-#define PCIBIOS_GET_ROUTING_OPTIONS 0xb10e
-#define PCIBIOS_SET_PCI_HW_INT 0xb10f
-
-/* BIOS32 signature: "_32_" */
-#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
-
-/* PCI signature: "PCI " */
-#define PCI_SIGNATURE (('P' << 0) + ('C' << 8) + ('I' << 16) + (' ' << 24))
-
-/* PCI service signature: "$PCI" */
-#define PCI_SERVICE (('$' << 0) + ('P' << 8) + ('C' << 16) + ('I' << 24))
-
-/* PCI BIOS hardware mechanism flags */
-#define PCIBIOS_HW_TYPE1 0x01
-#define PCIBIOS_HW_TYPE2 0x02
-#define PCIBIOS_HW_TYPE1_SPEC 0x10
-#define PCIBIOS_HW_TYPE2_SPEC 0x20
-
-/*
- * This is the standard structure used to identify the entry point
- * to the BIOS32 Service Directory, as documented in
- * Standard BIOS 32-bit Service Directory Proposal
- * Revision 0.4 May 24, 1993
- * Phoenix Technologies Ltd.
- * Norwood, MA
- * and the PCI BIOS specification.
- */
-
-union bios32 {
- struct {
- unsigned long signature; /* _32_ */
- unsigned long entry; /* 32 bit physical address */
- unsigned char revision; /* Revision level, 0 */
- unsigned char length; /* Length in paragraphs should be 01 */
- unsigned char checksum; /* All bytes must add up to zero */
- unsigned char reserved[5]; /* Must be zero */
- } fields;
- char chars[16];
-};
-
-/*
- * Physical address of the service directory. I don't know if we're
- * allowed to have more than one of these or not, so just in case
- * we'll make pcibios_present() take a memory start parameter and store
- * the array there.
- */
-
-static struct {
- unsigned long address;
- unsigned short segment;
-} bios32_indirect = { 0, __KERNEL_CS };
-
-/*
- * Returns the entry point for the given service, NULL on error
- */
-
-static unsigned long bios32_service(unsigned long service)
-{
- unsigned char return_code; /* %al */
- unsigned long address; /* %ebx */
- unsigned long length; /* %ecx */
- unsigned long entry; /* %edx */
- unsigned long flags;
-
- __save_flags(flags); __cli();
- __asm__("lcall *(%%edi); cld"
- : "=a" (return_code),
- "=b" (address),
- "=c" (length),
- "=d" (entry)
- : "0" (service),
- "1" (0),
- "D" (&bios32_indirect));
- __restore_flags(flags);
-
- switch (return_code) {
- case 0:
- return address + entry;
- case 0x80: /* Not present */
- printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service);
- return 0;
- default: /* Shouldn't happen */
- printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n",
- service, return_code);
- return 0;
- }
-}
-
-static struct {
- unsigned long address;
- unsigned short segment;
-} pci_indirect = { 0, __KERNEL_CS };
-
-static int pci_bios_present;
-
-static int __devinit check_pcibios(void)
-{
- u32 signature, eax, ebx, ecx;
- u8 status, major_ver, minor_ver, hw_mech;
- unsigned long flags, pcibios_entry;
-
- if ((pcibios_entry = bios32_service(PCI_SERVICE))) {
- pci_indirect.address = pcibios_entry + PAGE_OFFSET;
-
- __save_flags(flags); __cli();
- __asm__(
- "lcall *(%%edi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=d" (signature),
- "=a" (eax),
- "=b" (ebx),
- "=c" (ecx)
- : "1" (PCIBIOS_PCI_BIOS_PRESENT),
- "D" (&pci_indirect)
- : "memory");
- __restore_flags(flags);
-
- status = (eax >> 8) & 0xff;
- hw_mech = eax & 0xff;
- major_ver = (ebx >> 8) & 0xff;
- minor_ver = ebx & 0xff;
- if (pcibios_last_bus < 0)
- pcibios_last_bus = ecx & 0xff;
- DBG("PCI: BIOS probe returned s=%02x hw=%02x ver=%02x.%02x l=%02x\n",
- status, hw_mech, major_ver, minor_ver, pcibios_last_bus);
- if (status || signature != PCI_SIGNATURE) {
- printk (KERN_ERR "PCI: BIOS BUG #%x[%08x] found\n",
- status, signature);
- return 0;
- }
- printk(KERN_INFO "PCI: PCI BIOS revision %x.%02x entry at 0x%lx, last bus=%d\n",
- major_ver, minor_ver, pcibios_entry, pcibios_last_bus);
-#ifdef CONFIG_PCI_DIRECT
- if (!(hw_mech & PCIBIOS_HW_TYPE1))
- pci_probe &= ~PCI_PROBE_CONF1;
- if (!(hw_mech & PCIBIOS_HW_TYPE2))
- pci_probe &= ~PCI_PROBE_CONF2;
-#endif
- return 1;
- }
- return 0;
-}
-
-static int __devinit pci_bios_find_device (unsigned short vendor, unsigned short device_id,
- unsigned short index, unsigned char *bus, unsigned char *device_fn)
-{
- unsigned short bx;
- unsigned short ret;
-
- __asm__("lcall *(%%edi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=b" (bx),
- "=a" (ret)
- : "1" (PCIBIOS_FIND_PCI_DEVICE),
- "c" (device_id),
- "d" (vendor),
- "S" ((int) index),
- "D" (&pci_indirect));
- *bus = (bx >> 8) & 0xff;
- *device_fn = bx & 0xff;
- return (int) (ret & 0xff00) >> 8;
-}
-
-static int pci_bios_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value)
-{
- unsigned long result = 0;
- unsigned long flags;
- unsigned long bx = ((bus << 8) | (dev << 3) | fn);
-
- if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
- return -EINVAL;
-
- spin_lock_irqsave(&pci_config_lock, flags);
-
- switch (len) {
- case 1:
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=c" (*value),
- "=a" (result)
- : "1" (PCIBIOS_READ_CONFIG_BYTE),
- "b" (bx),
- "D" ((long)reg),
- "S" (&pci_indirect));
- break;
- case 2:
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=c" (*value),
- "=a" (result)
- : "1" (PCIBIOS_READ_CONFIG_WORD),
- "b" (bx),
- "D" ((long)reg),
- "S" (&pci_indirect));
- break;
- case 4:
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=c" (*value),
- "=a" (result)
- : "1" (PCIBIOS_READ_CONFIG_DWORD),
- "b" (bx),
- "D" ((long)reg),
- "S" (&pci_indirect));
- break;
- }
-
- spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return (int)((result & 0xff00) >> 8);
-}
-
-static int pci_bios_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value)
-{
- unsigned long result = 0;
- unsigned long flags;
- unsigned long bx = ((bus << 8) | (dev << 3) | fn);
-
- if ((bus > 255 || dev > 31 || fn > 7 || reg > 255))
- return -EINVAL;
-
- spin_lock_irqsave(&pci_config_lock, flags);
-
- switch (len) {
- case 1:
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=a" (result)
- : "0" (PCIBIOS_WRITE_CONFIG_BYTE),
- "c" (value),
- "b" (bx),
- "D" ((long)reg),
- "S" (&pci_indirect));
- break;
- case 2:
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=a" (result)
- : "0" (PCIBIOS_WRITE_CONFIG_WORD),
- "c" (value),
- "b" (bx),
- "D" ((long)reg),
- "S" (&pci_indirect));
- break;
- case 4:
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=a" (result)
- : "0" (PCIBIOS_WRITE_CONFIG_DWORD),
- "c" (value),
- "b" (bx),
- "D" ((long)reg),
- "S" (&pci_indirect));
- break;
- }
-
- spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return (int)((result & 0xff00) >> 8);
-}
-
-static int pci_bios_read_config_byte(struct pci_dev *dev, int where, u8 *value)
-{
- int result;
- u32 data;
-
- if (!value)
- BUG();
-
- result = pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 1, &data);
-
- *value = (u8)data;
-
- return result;
-}
-
-static int pci_bios_read_config_word(struct pci_dev *dev, int where, u16 *value)
-{
- int result;
- u32 data;
-
- if (!value)
- BUG();
-
- result = pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 2, &data);
-
- *value = (u16)data;
-
- return result;
-}
-
-static int pci_bios_read_config_dword(struct pci_dev *dev, int where, u32 *value)
-{
- if (!value)
- BUG();
-
- return pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 4, value);
-}
-
-static int pci_bios_write_config_byte(struct pci_dev *dev, int where, u8 value)
-{
- return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 1, value);
-}
-
-static int pci_bios_write_config_word(struct pci_dev *dev, int where, u16 value)
-{
- return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 2, value);
-}
-
-static int pci_bios_write_config_dword(struct pci_dev *dev, int where, u32 value)
-{
- return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 4, value);
-}
-
-
-/*
- * Function table for BIOS32 access
- */
-
-static struct pci_ops pci_bios_access = {
- pci_bios_read_config_byte,
- pci_bios_read_config_word,
- pci_bios_read_config_dword,
- pci_bios_write_config_byte,
- pci_bios_write_config_word,
- pci_bios_write_config_dword
-};
-
-/*
- * Try to find PCI BIOS.
- */
-
-static struct pci_ops * __devinit pci_find_bios(void)
-{
- union bios32 *check;
- unsigned char sum;
- int i, length;
-
- /*
- * Follow the standard procedure for locating the BIOS32 Service
- * directory by scanning the permissible address range from
- * 0xe0000 through 0xfffff for a valid BIOS32 structure.
- */
-
- for (check = (union bios32 *) __va(0xe0000);
- check <= (union bios32 *) __va(0xffff0);
- ++check) {
- if (check->fields.signature != BIOS32_SIGNATURE)
- continue;
- length = check->fields.length * 16;
- if (!length)
- continue;
- sum = 0;
- for (i = 0; i < length ; ++i)
- sum += check->chars[i];
- if (sum != 0)
- continue;
- if (check->fields.revision != 0) {
- printk("PCI: unsupported BIOS32 revision %d at 0x%p\n",
- check->fields.revision, check);
- continue;
- }
- DBG("PCI: BIOS32 Service Directory structure at 0x%p\n", check);
- if (check->fields.entry >= 0x100000) {
- printk("PCI: BIOS32 entry (0x%p) in high memory, cannot use.\n", check);
- return NULL;
- } else {
- unsigned long bios32_entry = check->fields.entry;
- DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", bios32_entry);
- bios32_indirect.address = bios32_entry + PAGE_OFFSET;
- if (check_pcibios())
- return &pci_bios_access;
- }
- break; /* Hopefully more than one BIOS32 cannot happen... */
- }
-
- return NULL;
-}
-
-/*
- * Sort the device list according to PCI BIOS. Nasty hack, but since some
- * fool forgot to define the `correct' device order in the PCI BIOS specs
- * and we want to be (possibly bug-to-bug ;-]) compatible with older kernels
- * which used BIOS ordering, we are bound to do this...
- */
-
-static void __devinit pcibios_sort(void)
-{
- LIST_HEAD(sorted_devices);
- struct list_head *ln;
- struct pci_dev *dev, *d;
- int idx, found;
- unsigned char bus, devfn;
-
- DBG("PCI: Sorting device list...\n");
- while (!list_empty(&pci_devices)) {
- ln = pci_devices.next;
- dev = pci_dev_g(ln);
- idx = found = 0;
- while (pci_bios_find_device(dev->vendor, dev->device, idx, &bus, &devfn) == PCIBIOS_SUCCESSFUL) {
- idx++;
- for (ln=pci_devices.next; ln != &pci_devices; ln=ln->next) {
- d = pci_dev_g(ln);
- if (d->bus->number == bus && d->devfn == devfn) {
- list_del(&d->global_list);
- list_add_tail(&d->global_list, &sorted_devices);
- if (d == dev)
- found = 1;
- break;
- }
- }
- if (ln == &pci_devices) {
- printk(KERN_WARNING "PCI: BIOS reporting unknown device %02x:%02x\n", bus, devfn);
- /*
- * We must not continue scanning as several buggy BIOSes
- * return garbage after the last device. Grr.
- */
- break;
- }
- }
- if (!found) {
- printk(KERN_WARNING "PCI: Device %02x:%02x not found by BIOS\n",
- dev->bus->number, dev->devfn);
- list_del(&dev->global_list);
- list_add_tail(&dev->global_list, &sorted_devices);
- }
- }
- list_splice(&sorted_devices, &pci_devices);
-}
-
-/*
- * BIOS Functions for IRQ Routing
- */
-
-struct irq_routing_options {
- u16 size;
- struct irq_info *table;
- u16 segment;
-} __attribute__((packed));
-
-struct irq_routing_table * __devinit pcibios_get_irq_routing_table(void)
-{
- struct irq_routing_options opt;
- struct irq_routing_table *rt = NULL;
- int ret, map;
- unsigned long page;
-
- if (!pci_bios_present)
- return NULL;
- page = __get_free_page(GFP_KERNEL);
- if (!page)
- return NULL;
- opt.table = (struct irq_info *) page;
- opt.size = PAGE_SIZE;
- opt.segment = __KERNEL_DS;
-
- DBG("PCI: Fetching IRQ routing table... ");
- __asm__("push %%es\n\t"
- "push %%ds\n\t"
- "pop %%es\n\t"
- "lcall *(%%esi); cld\n\t"
- "pop %%es\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=a" (ret),
- "=b" (map),
- "+m" (opt)
- : "0" (PCIBIOS_GET_ROUTING_OPTIONS),
- "1" (0),
- "D" ((long) &opt),
- "S" (&pci_indirect));
- DBG("OK ret=%d, size=%d, map=%x\n", ret, opt.size, map);
- if (ret & 0xff00)
- printk(KERN_ERR "PCI: Error %02x when fetching IRQ routing table.\n", (ret >> 8) & 0xff);
- else if (opt.size) {
- rt = kmalloc(sizeof(struct irq_routing_table) + opt.size, GFP_KERNEL);
- if (rt) {
- memset(rt, 0, sizeof(struct irq_routing_table));
- rt->size = opt.size + sizeof(struct irq_routing_table);
- rt->exclusive_irqs = map;
- memcpy(rt->slots, (void *) page, opt.size);
- printk(KERN_INFO "PCI: Using BIOS Interrupt Routing Table\n");
- }
- }
- free_page(page);
- return rt;
-}
-
-
-int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq)
-{
- int ret;
-
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=a" (ret)
- : "0" (PCIBIOS_SET_PCI_HW_INT),
- "b" ((dev->bus->number << 8) | dev->devfn),
- "c" ((irq << 8) | (pin + 10)),
- "S" (&pci_indirect));
- return !(ret & 0xff00);
-}
-
-#endif
-
-/*
- * Several buggy motherboards address only 16 devices and mirror
- * them to next 16 IDs. We try to detect this `feature' on all
- * primary buses (those containing host bridges as they are
- * expected to be unique) and remove the ghost devices.
- */
-
-static void __devinit pcibios_fixup_ghosts(struct pci_bus *b)
-{
- struct list_head *ln, *mn;
- struct pci_dev *d, *e;
- int mirror = PCI_DEVFN(16,0);
- int seen_host_bridge = 0;
- int i;
-
- DBG("PCI: Scanning for ghost devices on bus %d\n", b->number);
- for (ln=b->devices.next; ln != &b->devices; ln=ln->next) {
- d = pci_dev_b(ln);
- if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST)
- seen_host_bridge++;
- for (mn=ln->next; mn != &b->devices; mn=mn->next) {
- e = pci_dev_b(mn);
- if (e->devfn != d->devfn + mirror ||
- e->vendor != d->vendor ||
- e->device != d->device ||
- e->class != d->class)
- continue;
- for(i=0; i<PCI_NUM_RESOURCES; i++)
- if (e->resource[i].start != d->resource[i].start ||
- e->resource[i].end != d->resource[i].end ||
- e->resource[i].flags != d->resource[i].flags)
- continue;
- break;
- }
- if (mn == &b->devices)
- return;
- }
- if (!seen_host_bridge)
- return;
- printk(KERN_WARNING "PCI: Ignoring ghost devices on bus %02x\n", b->number);
-
- ln = &b->devices;
- while (ln->next != &b->devices) {
- d = pci_dev_b(ln->next);
- if (d->devfn >= mirror) {
- list_del(&d->global_list);
- list_del(&d->bus_list);
- kfree(d);
- } else
- ln = ln->next;
- }
-}
-
-/*
- * Discover remaining PCI buses in case there are peer host bridges.
- * We use the number of last PCI bus provided by the PCI BIOS.
- */
-static void __devinit pcibios_fixup_peer_bridges(void)
-{
- int n;
- struct pci_bus bus;
- struct pci_dev dev;
- u16 l;
-
- if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff)
- return;
- DBG("PCI: Peer bridge fixup\n");
- for (n=0; n <= pcibios_last_bus; n++) {
- if (pci_bus_exists(&pci_root_buses, n))
- continue;
- bus.number = n;
- bus.ops = pci_root_ops;
- dev.bus = &bus;
- for(dev.devfn=0; dev.devfn<256; dev.devfn += 8)
- if (!pci_read_config_word(&dev, PCI_VENDOR_ID, &l) &&
- l != 0x0000 && l != 0xffff) {
- DBG("Found device at %02x:%02x [%04x]\n", n, dev.devfn, l);
- printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n);
- pci_scan_bus(n, pci_root_ops, NULL);
- break;
- }
- }
-}
-
-/*
- * Exceptions for specific devices. Usually work-arounds for fatal design flaws.
- */
-
-static void __devinit pci_fixup_i450nx(struct pci_dev *d)
-{
- /*
- * i450NX -- Find and scan all secondary buses on all PXB's.
- */
- int pxb, reg;
- u8 busno, suba, subb;
-#ifdef CONFIG_MULTIQUAD
- int quad = BUS2QUAD(d->bus->number);
-#endif
- printk("PCI: Searching for i450NX host bridges on %s\n", d->slot_name);
- reg = 0xd0;
- for(pxb=0; pxb<2; pxb++) {
- pci_read_config_byte(d, reg++, &busno);
- pci_read_config_byte(d, reg++, &suba);
- pci_read_config_byte(d, reg++, &subb);
- DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb);
- if (busno)
- pci_scan_bus(QUADLOCAL2BUS(quad,busno), pci_root_ops, NULL); /* Bus A */
- if (suba < subb)
- pci_scan_bus(QUADLOCAL2BUS(quad,suba+1), pci_root_ops, NULL); /* Bus B */
- }
- pcibios_last_bus = -1;
-}
-
-static void __devinit pci_fixup_i450gx(struct pci_dev *d)
-{
- /*
- * i450GX and i450KX -- Find and scan all secondary buses.
- * (called separately for each PCI bridge found)
- */
- u8 busno;
- pci_read_config_byte(d, 0x4a, &busno);
- printk(KERN_INFO "PCI: i440KX/GX host bridge %s: secondary bus %02x\n", d->slot_name, busno);
- pci_scan_bus(busno, pci_root_ops, NULL);
- pcibios_last_bus = -1;
-}
-
-static void __devinit pci_fixup_umc_ide(struct pci_dev *d)
-{
- /*
- * UM8886BF IDE controller sets region type bits incorrectly,
- * therefore they look like memory despite of them being I/O.
- */
- int i;
-
- printk(KERN_WARNING "PCI: Fixing base address flags for device %s\n", d->slot_name);
- for(i=0; i<4; i++)
- d->resource[i].flags |= PCI_BASE_ADDRESS_SPACE_IO;
-}
-
-static void __devinit pci_fixup_ncr53c810(struct pci_dev *d)
-{
- /*
- * NCR 53C810 returns class code 0 (at least on some systems).
- * Fix class to be PCI_CLASS_STORAGE_SCSI
- */
- if (!d->class) {
- printk("PCI: fixing NCR 53C810 class code for %s\n", d->slot_name);
- d->class = PCI_CLASS_STORAGE_SCSI << 8;
- }
-}
-
-static void __devinit pci_fixup_ide_bases(struct pci_dev *d)
-{
- int i;
-
- /*
- * PCI IDE controllers use non-standard I/O port decoding, respect it.
- */
- if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE)
- return;
- DBG("PCI: IDE base address fixup for %s\n", d->slot_name);
- for(i=0; i<4; i++) {
- struct resource *r = &d->resource[i];
- if ((r->start & ~0x80) == 0x374) {
- r->start |= 2;
- r->end = r->start;
- }
- }
-}
-
-static void __devinit pci_fixup_ide_trash(struct pci_dev *d)
-{
- int i;
-
- /*
- * There exist PCI IDE controllers which have utter garbage
- * in first four base registers. Ignore that.
- */
- DBG("PCI: IDE base address trash cleared for %s\n", d->slot_name);
- for(i=0; i<4; i++)
- d->resource[i].start = d->resource[i].end = d->resource[i].flags = 0;
-}
-
-static void __devinit pci_fixup_latency(struct pci_dev *d)
-{
- /*
- * SiS 5597 and 5598 chipsets require latency timer set to
- * at most 32 to avoid lockups.
- */
- DBG("PCI: Setting max latency to 32\n");
- pcibios_max_latency = 32;
-}
-
-static void __devinit pci_fixup_piix4_acpi(struct pci_dev *d)
-{
- /*
- * PIIX4 ACPI device: hardwired IRQ9
- */
- d->irq = 9;
-}
-
-/*
- * Addresses issues with problems in the memory write queue timer in
- * certain VIA Northbridges. This bugfix is per VIA's specifications,
- * except for the KL133/KM133: clearing bit 5 on those Northbridges seems
- * to trigger a bug in its integrated ProSavage video card, which
- * causes screen corruption. We only clear bits 6 and 7 for that chipset,
- * until VIA can provide us with definitive information on why screen
- * corruption occurs, and what exactly those bits do.
- *
- * VIA 8363,8622,8361 Northbridges:
- * - bits 5, 6, 7 at offset 0x55 need to be turned off
- * VIA 8367 (KT266x) Northbridges:
- * - bits 5, 6, 7 at offset 0x95 need to be turned off
- * VIA 8363 rev 0x81/0x84 (KL133/KM133) Northbridges:
- * - bits 6, 7 at offset 0x55 need to be turned off
- */
-
-#define VIA_8363_KL133_REVISION_ID 0x81
-#define VIA_8363_KM133_REVISION_ID 0x84
-
-static void __init pci_fixup_via_northbridge_bug(struct pci_dev *d)
-{
- u8 v;
- u8 revision;
- int where = 0x55;
- int mask = 0x1f; /* clear bits 5, 6, 7 by default */
-
- pci_read_config_byte(d, PCI_REVISION_ID, &revision);
-
- if (d->device == PCI_DEVICE_ID_VIA_8367_0) {
- /* fix pci bus latency issues resulted by NB bios error
- it appears on bug free^Wreduced kt266x's bios forces
- NB latency to zero */
- pci_write_config_byte(d, PCI_LATENCY_TIMER, 0);
-
- where = 0x95; /* the memory write queue timer register is
- different for the KT266x's: 0x95 not 0x55 */
- } else if (d->device == PCI_DEVICE_ID_VIA_8363_0 &&
- (revision == VIA_8363_KL133_REVISION_ID ||
- revision == VIA_8363_KM133_REVISION_ID)) {
- mask = 0x3f; /* clear only bits 6 and 7; clearing bit 5
- causes screen corruption on the KL133/KM133 */
- }
-
- pci_read_config_byte(d, where, &v);
- if (v & ~mask) {
- printk("Disabling VIA memory write queue (PCI ID %04x, rev %02x): [%02x] %02x & %02x -> %02x\n", \
- d->device, revision, where, v, mask, v & mask);
- v &= mask;
- pci_write_config_byte(d, where, v);
- }
-}
-
-/*
- * For some reasons Intel decided that certain parts of their
- * 815, 845 and some other chipsets must look like PCI-to-PCI bridges
- * while they are obviously not. The 82801 family (AA, AB, BAM/CAM,
- * BA/CA/DB and E) PCI bridges are actually HUB-to-PCI ones, according
- * to Intel terminology. These devices do forward all addresses from
- * system to PCI bus no matter what are their window settings, so they are
- * "transparent" (or subtractive decoding) from programmers point of view.
- */
-static void __init pci_fixup_transparent_bridge(struct pci_dev *dev)
-{
- if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI &&
- (dev->device & 0xff00) == 0x2400)
- dev->transparent = 1;
-}
-
-struct pci_fixup pcibios_fixups[] = {
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5513, pci_fixup_ide_trash },
- { PCI_FIXUP_HEADER, PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, pci_fixup_latency },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5598, pci_fixup_latency },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, pci_fixup_piix4_acpi },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, pci_fixup_via_northbridge_bug },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8622, pci_fixup_via_northbridge_bug },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8361, pci_fixup_via_northbridge_bug },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_via_northbridge_bug },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810 },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_bridge },
- { 0 }
-};
-
-/*
- * Called after each bus is probed, but before its children
- * are examined.
- */
-
-void __devinit pcibios_fixup_bus(struct pci_bus *b)
-{
- pcibios_fixup_ghosts(b);
- pci_read_bridge_bases(b);
-}
-
-struct pci_bus * __devinit pcibios_scan_root(int busnum)
-{
- struct list_head *list;
- struct pci_bus *bus;
-
- list_for_each(list, &pci_root_buses) {
- bus = pci_bus_b(list);
- if (bus->number == busnum) {
- /* Already scanned */
- return bus;
- }
- }
-
- printk("PCI: Probing PCI hardware (bus %02x)\n", busnum);
-
- return pci_scan_bus(busnum, pci_root_ops, NULL);
-}
-
-void __devinit pcibios_config_init(void)
-{
- /*
- * Try all known PCI access methods. Note that we support using
- * both PCI BIOS and direct access, with a preference for direct.
- */
-
-#ifdef CONFIG_PCI_DIRECT
- struct pci_ops *tmp = NULL;
-#endif
-
-
-#ifdef CONFIG_PCI_BIOS
- if ((pci_probe & PCI_PROBE_BIOS)
- && ((pci_root_ops = pci_find_bios()))) {
- pci_probe |= PCI_BIOS_SORT;
- pci_bios_present = 1;
- pci_config_read = pci_bios_read;
- pci_config_write = pci_bios_write;
- }
-#endif
-
-#ifdef CONFIG_PCI_DIRECT
- if ((pci_probe & (PCI_PROBE_CONF1 | PCI_PROBE_CONF2))
- && (tmp = pci_check_direct())) {
- pci_root_ops = tmp;
- if (pci_root_ops == &pci_direct_conf1) {
- pci_config_read = pci_conf1_read;
- pci_config_write = pci_conf1_write;
- }
- else {
- pci_config_read = pci_conf2_read;
- pci_config_write = pci_conf2_write;
- }
- }
-#endif
-
- return;
-}
-
-void __init pcibios_init(void)
-{
- int quad;
-
- if (!pci_root_ops)
- pcibios_config_init();
- if (!pci_root_ops) {
- printk(KERN_WARNING "PCI: System does not support PCI\n");
- return;
- }
-
- pcibios_set_cacheline_size();
-
- printk(KERN_INFO "PCI: Probing PCI hardware\n");
-#ifdef CONFIG_ACPI_PCI
- if (!acpi_noirq && !acpi_pci_irq_init()) {
- pci_using_acpi_prt = 1;
- printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
- printk(KERN_INFO "PCI: if you experience problems, try using option 'pci=noacpi' or even 'acpi=off'\n");
- }
-#endif
- if (!pci_using_acpi_prt) {
- pci_root_bus = pcibios_scan_root(0);
- pcibios_irq_init();
- pcibios_fixup_peer_bridges();
- pcibios_fixup_irqs();
- }
- if (clustered_apic_mode && (numnodes > 1)) {
- for (quad = 1; quad < numnodes; ++quad) {
- printk("Scanning PCI bus %d for quad %d\n",
- QUADLOCAL2BUS(quad,0), quad);
- pci_scan_bus(QUADLOCAL2BUS(quad,0),
- pci_root_ops, NULL);
- }
- }
-
- pcibios_resource_survey();
-
-#ifdef CONFIG_PCI_BIOS
- if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT))
- pcibios_sort();
-#endif
-}
-
-char * __devinit pcibios_setup(char *str)
-{
- if (!strcmp(str, "off")) {
- pci_probe = 0;
- return NULL;
- }
-#ifdef CONFIG_PCI_BIOS
- else if (!strcmp(str, "bios")) {
- pci_probe = PCI_PROBE_BIOS;
- return NULL;
- } else if (!strcmp(str, "nobios")) {
- pci_probe &= ~PCI_PROBE_BIOS;
- return NULL;
- } else if (!strcmp(str, "nosort")) {
- pci_probe |= PCI_NO_SORT;
- return NULL;
- } else if (!strcmp(str, "biosirq")) {
- pci_probe |= PCI_BIOS_IRQ_SCAN;
- return NULL;
- }
-#endif
-#ifdef CONFIG_PCI_DIRECT
- else if (!strcmp(str, "conf1")) {
- pci_probe = PCI_PROBE_CONF1 | PCI_NO_CHECKS;
- return NULL;
- }
- else if (!strcmp(str, "conf2")) {
- pci_probe = PCI_PROBE_CONF2 | PCI_NO_CHECKS;
- return NULL;
- }
-#endif
- else if (!strcmp(str, "rom")) {
- pci_probe |= PCI_ASSIGN_ROMS;
- return NULL;
- } else if (!strcmp(str, "assign-busses")) {
- pci_probe |= PCI_ASSIGN_ALL_BUSSES;
- return NULL;
- } else if (!strncmp(str, "irqmask=", 8)) {
- pcibios_irq_mask = simple_strtol(str+8, NULL, 0);
- return NULL;
- } else if (!strncmp(str, "lastbus=", 8)) {
- pcibios_last_bus = simple_strtol(str+8, NULL, 0);
- return NULL;
- } else if (!strncmp(str, "noacpi", 6)) {
- acpi_noirq_set();
- return NULL;
- }
- return str;
-}
-
-unsigned int pcibios_assign_all_busses(void)
-{
- return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;
-}
-
-int pcibios_enable_device(struct pci_dev *dev, int mask)
-{
- int err;
-
- if ((err = pcibios_enable_resources(dev, mask)) < 0)
- return err;
-
-#ifdef CONFIG_ACPI_PCI
- if (pci_using_acpi_prt) {
- acpi_pci_irq_enable(dev);
- return 0;
- }
-#endif
-
- pcibios_enable_irq(dev);
-
- return 0;
-}
+++ /dev/null
-
-/*
- * pervasive debugger
- * www.cl.cam.ac.uk/netos/pdb
- *
- * alex ho
- * 2004
- * university of cambridge computer laboratory
- *
- * linux & i386 dependent code. bleech.
- */
-
-#include <asm/pdb.h>
-
-/* offset to the first instruction in the linux system call code
- where we can safely set a breakpoint */
-unsigned int pdb_linux_syscall_enter_bkpt_offset = 20;
-
-/* offset to eflags saved on the stack after an int 80 */
-unsigned int pdb_linux_syscall_eflags_offset = 48;
-
-/* offset to the instruction pointer saved on the stack after an int 80 */
-unsigned int pdb_linux_syscall_eip_offset = 40;
-
-unsigned char
-pdb_linux_set_bkpt (unsigned long addr)
-{
- unsigned char old_instruction = *(unsigned char *)addr;
- *(unsigned char *)addr = 0xcc;
- return old_instruction;
-}
-
-void
-pdb_linux_clr_bkpt (unsigned long addr, unsigned char value)
-{
- *(unsigned char *)addr = value;
-}
-
-void
-pdb_linux_syscall_enter_bkpt (struct pt_regs *regs, long error_code,
- trap_info_t *ti)
-{
- /* set at breakpoint at the beginning of the
- system call in the target domain */
-
- pdb_system_call_enter_instr = pdb_linux_set_bkpt(ti->address +
- pdb_linux_syscall_enter_bkpt_offset);
- pdb_system_call = 1;
-}
-
-void
-pdb_linux_syscall_exit_bkpt (struct pt_regs *regs, struct pdb_context *pdb_ctx)
-{
- /*
- we've hit an int 0x80 in a user's program, jumped into xen
- (traps.c::do_general_protection()) which re-wrote the next
- instruction in the os kernel to 0xcc, and then hit that
- exception.
-
- we need to re-write the return instruction in the user's
- program so that we know when we have finished the system call
- and are back in the user's program.
-
- at this point our stack should look something like this:
-
- esp = 0x80a59f0
- esp + 4 = 0x0
- esp + 8 = 0x80485a0
- esp + 12 = 0x2d
- esp + 16 = 0x80485f4
- esp + 20 = 0xbffffa48
- esp + 24 = 0xd
- esp + 28 = 0xc00a0833
- esp + 32 = 0x833
- esp + 36 = 0xd
- esp + 40 = 0x804dcdd saved eip
- esp + 44 = 0x82b saved cs
- esp + 48 = 0x213392 saved eflags
- esp + 52 = 0xbffffa2c saved esp
- esp + 56 = 0x833 saved ss
- esp + 60 = 0x1000000
- */
-
- /* restore the entry instruction for the system call */
- pdb_linux_clr_bkpt(regs->eip - 1, pdb_system_call_enter_instr);
-
- /* save the address of eflags that was saved on the stack */
- pdb_system_call_eflags_addr = (regs->esp +
- pdb_linux_syscall_eflags_offset);
-
- /* muck with the return instruction so that we trap back into the
- debugger when re-entering user space */
- pdb_system_call_next_addr = *(unsigned long *)(regs->esp +
- pdb_linux_syscall_eip_offset);
- pdb_linux_get_values (&pdb_system_call_leave_instr, 1,
- pdb_system_call_next_addr,
- pdb_ctx->process, pdb_ctx->ptbr);
- pdb_linux_set_values ("cc", 1, pdb_system_call_next_addr,
- pdb_ctx->process, pdb_ctx->ptbr);
-}
+++ /dev/null
-
-/*
- * pervasive debugger
- * www.cl.cam.ac.uk/netos/pdb
- *
- * alex ho
- * 2004
- * university of cambridge computer laboratory
- *
- * code adapted originally from kgdb, nemesis, & gdbserver
- */
-
-#include <xen/lib.h>
-#include <xen/sched.h>
-#include <asm-i386/ptrace.h>
-#include <xen/keyhandler.h>
-#include <asm/apic.h>
-#include <asm/domain_page.h> /* [un]map_domain_mem */
-#include <asm/processor.h>
-#include <asm/pdb.h>
-#include <xen/list.h>
-#include <xen/serial.h>
-
-#undef PDB_DEBUG_TRACE
-#ifdef PDB_DEBUG_TRACE
-#define TRC(_x) _x
-#else
-#define TRC(_x)
-#endif
-
-#define DEBUG_EXCEPTION 0x01
-#define BREAKPT_EXCEPTION 0x03
-#define PDB_LIVE_EXCEPTION 0x58
-#define KEYPRESS_EXCEPTION 0x88
-
-#define BUFMAX 400
-
-static const char hexchars[] = "0123456789abcdef";
-
-static int remote_debug;
-
-#define PDB_BUFMAX 1024
-static char pdb_in_buffer[PDB_BUFMAX];
-static char pdb_out_buffer[PDB_BUFMAX];
-static char pdb_buffer[PDB_BUFMAX];
-static int pdb_in_buffer_ptr;
-static unsigned char pdb_in_checksum;
-static unsigned char pdb_xmit_checksum;
-
-struct pdb_context pdb_ctx;
-int pdb_continue_thread = 0;
-int pdb_general_thread = 0;
-
-void pdb_put_packet (unsigned char *buffer, int ack);
-void pdb_bkpt_check (u_char *buffer, int length,
- unsigned long cr3, unsigned long addr);
-
-int pdb_initialized = 0;
-int pdb_page_fault_possible = 0;
-int pdb_page_fault_scratch = 0; /* just a handy variable */
-int pdb_page_fault = 0;
-static int pdb_serhnd = -1;
-static int pdb_stepping = 0;
-
-int pdb_system_call = 0;
-unsigned char pdb_system_call_enter_instr = 0; /* original enter instr */
-unsigned char pdb_system_call_leave_instr = 0; /* original next instr */
-unsigned long pdb_system_call_next_addr = 0; /* instr after int 0x80 */
-unsigned long pdb_system_call_eflags_addr = 0; /* saved eflags on stack */
-
-static inline void pdb_put_char(unsigned char c)
-{
- serial_putc(pdb_serhnd, c);
-}
-
-static inline unsigned char pdb_get_char(void)
-{
- return serial_getc(pdb_serhnd);
-}
-
-int
-get_char (char *addr)
-{
- return *addr;
-}
-
-void
-set_char (char *addr, int val)
-{
- *addr = val;
-}
-
-void
-pdb_process_query (char *ptr)
-{
- if (strcmp(ptr, "C") == 0)
- {
- /* empty string */
- }
- else if (strcmp(ptr, "fThreadInfo") == 0)
- {
-#ifdef PDB_PAST
- struct task_struct *p;
- u_long flags;
-#endif /* PDB_PAST */
-
- int buf_idx = 0;
-
- pdb_out_buffer[buf_idx++] = 'l';
- pdb_out_buffer[buf_idx++] = 0;
-
-#ifdef PDB_PAST
- switch (pdb_level)
- {
- case PDB_LVL_XEN: /* return a list of domains */
- {
- int count = 0;
-
- read_lock_irqsave (&tasklist_lock, flags);
-
- pdb_out_buffer[buf_idx++] = 'm';
- for_each_domain ( p )
- {
- domid_t domain = p->domain + PDB_ID_OFFSET;
-
- if (count > 0)
- {
- pdb_out_buffer[buf_idx++] = ',';
- }
- if (domain > 15)
- {
- pdb_out_buffer[buf_idx++] = hexchars[domain >> 4];
- }
- pdb_out_buffer[buf_idx++] = hexchars[domain % 16];
- count++;
- }
- pdb_out_buffer[buf_idx++] = 0;
-
- read_unlock_irqrestore(&tasklist_lock, flags);
- break;
- }
- case PDB_LVL_GUESTOS: /* return a list of processes */
- {
- int foobar[20];
- int loop, total;
-
- /* this cr3 is wrong! */
- total = pdb_linux_process_list(pdb_ctx[pdb_level].info_cr3,
- foobar, 20);
-
- pdb_out_buffer[buf_idx++] = 'm';
- pdb_out_buffer[buf_idx++] = '1'; /* 1 is to go back */
- for (loop = 0; loop < total; loop++)
- {
- int pid = foobar[loop] + PDB_ID_OFFSET;
-
- pdb_out_buffer[buf_idx++] = ',';
- if (pid > 15)
- {
- pdb_out_buffer[buf_idx++] = hexchars[pid >> 4];
- }
- pdb_out_buffer[buf_idx++] = hexchars[pid % 16];
- }
- pdb_out_buffer[buf_idx++] = 0;
- break;
- }
- case PDB_LVL_PROCESS: /* hmmm... */
- {
- pdb_out_buffer[buf_idx++] = 'm';
- pdb_out_buffer[buf_idx++] = '1'; /* 1 is to go back */
- break;
- }
- default:
- break;
- }
-#endif /* PDB_PAST */
-
- }
- else if (strcmp(ptr, "sThreadInfo") == 0)
- {
- int buf_idx = 0;
-
- pdb_out_buffer[buf_idx++] = 'l';
- pdb_out_buffer[buf_idx++] = 0;
- }
- else if (strncmp(ptr, "ThreadExtraInfo,", 16) == 0)
- {
- int thread = 0;
- char *message = "foobar ?";
-
- ptr += 16;
- if (hexToInt (&ptr, &thread))
- {
- mem2hex (message, pdb_out_buffer, strlen(message) + 1);
- }
-
-#ifdef PDB_PAST
- int thread = 0;
- char message[16];
- struct task_struct *p;
-
- p = find_domain_by_id(pdb_ctx[pdb_level].info);
- strncpy (message, p->name, 16);
- put_task_struct(p);
-
- ptr += 16;
- if (hexToInt (&ptr, &thread))
- {
- mem2hex ((char *)message, pdb_out_buffer, strlen(message) + 1);
- }
-#endif /* PDB_PAST */
-
-#ifdef PDB_FUTURE
- {
- char string[task_struct_comm_length];
-
- string[0] = 0;
- pdb_linux_process_details (cr3, pid, string);
- printk (" (%s)", string);
- }
-#endif /* PDB_FUTURE*/
-
- }
- else if (strcmp(ptr, "Offsets") == 0)
- {
- /* empty string */
- }
- else if (strncmp(ptr, "Symbol", 6) == 0)
- {
- strcpy (pdb_out_buffer, "OK");
- }
- else
- {
- printk("pdb: error, unknown query [%s]\n", ptr);
- }
-}
-
-void
-pdb_x86_to_gdb_regs (char *buffer, struct pt_regs *regs)
-{
- int idx = 0;
-
- mem2hex ((char *)®s->eax, &buffer[idx], sizeof(regs->eax));
- idx += sizeof(regs->eax) * 2;
- mem2hex ((char *)®s->ecx, &buffer[idx], sizeof(regs->ecx));
- idx += sizeof(regs->ecx) * 2;
- mem2hex ((char *)®s->edx, &buffer[idx], sizeof(regs->edx));
- idx += sizeof(regs->edx) * 2;
- mem2hex ((char *)®s->ebx, &buffer[idx], sizeof(regs->ebx));
- idx += sizeof(regs->ebx) * 2;
- mem2hex ((char *)®s->esp, &buffer[idx], sizeof(regs->esp));
- idx += sizeof(regs->esp) * 2;
- mem2hex ((char *)®s->ebp, &buffer[idx], sizeof(regs->ebp));
- idx += sizeof(regs->ebp) * 2;
- mem2hex ((char *)®s->esi, &buffer[idx], sizeof(regs->esi));
- idx += sizeof(regs->esi) * 2;
- mem2hex ((char *)®s->edi, &buffer[idx], sizeof(regs->edi));
- idx += sizeof(regs->edi) * 2;
- mem2hex ((char *)®s->eip, &buffer[idx], sizeof(regs->eip));
- idx += sizeof(regs->eip) * 2;
- mem2hex ((char *)®s->eflags, &buffer[idx], sizeof(regs->eflags));
- idx += sizeof(regs->eflags) * 2;
- mem2hex ((char *)®s->xcs, &buffer[idx], sizeof(regs->xcs));
- idx += sizeof(regs->xcs) * 2;
- mem2hex ((char *)®s->xss, &buffer[idx], sizeof(regs->xss));
- idx += sizeof(regs->xss) * 2;
- mem2hex ((char *)®s->xds, &buffer[idx], sizeof(regs->xds));
- idx += sizeof(regs->xds) * 2;
- mem2hex ((char *)®s->xes, &buffer[idx], sizeof(regs->xes));
- idx += sizeof(regs->xes) * 2;
- mem2hex ((char *)®s->xfs, &buffer[idx], sizeof(regs->xfs));
- idx += sizeof(regs->xfs) * 2;
- mem2hex ((char *)®s->xgs, &buffer[idx], sizeof(regs->xgs));
-}
-
-/* at this point we allow any register to be changed, caveat emptor */
-void
-pdb_gdb_to_x86_regs (struct pt_regs *regs, char *buffer)
-{
- hex2mem(buffer, (char *)®s->eax, sizeof(regs->eax));
- buffer += sizeof(regs->eax) * 2;
- hex2mem(buffer, (char *)®s->ecx, sizeof(regs->ecx));
- buffer += sizeof(regs->ecx) * 2;
- hex2mem(buffer, (char *)®s->edx, sizeof(regs->edx));
- buffer += sizeof(regs->edx) * 2;
- hex2mem(buffer, (char *)®s->ebx, sizeof(regs->ebx));
- buffer += sizeof(regs->ebx) * 2;
- hex2mem(buffer, (char *)®s->esp, sizeof(regs->esp));
- buffer += sizeof(regs->esp) * 2;
- hex2mem(buffer, (char *)®s->ebp, sizeof(regs->ebp));
- buffer += sizeof(regs->ebp) * 2;
- hex2mem(buffer, (char *)®s->esi, sizeof(regs->esi));
- buffer += sizeof(regs->esi) * 2;
- hex2mem(buffer, (char *)®s->edi, sizeof(regs->edi));
- buffer += sizeof(regs->edi) * 2;
- hex2mem(buffer, (char *)®s->eip, sizeof(regs->eip));
- buffer += sizeof(regs->eip) * 2;
- hex2mem(buffer, (char *)®s->eflags, sizeof(regs->eflags));
- buffer += sizeof(regs->eflags) * 2;
- hex2mem(buffer, (char *)®s->xcs, sizeof(regs->xcs));
- buffer += sizeof(regs->xcs) * 2;
- hex2mem(buffer, (char *)®s->xss, sizeof(regs->xss));
- buffer += sizeof(regs->xss) * 2;
- hex2mem(buffer, (char *)®s->xds, sizeof(regs->xds));
- buffer += sizeof(regs->xds) * 2;
- hex2mem(buffer, (char *)®s->xes, sizeof(regs->xes));
- buffer += sizeof(regs->xes) * 2;
- hex2mem(buffer, (char *)®s->xfs, sizeof(regs->xfs));
- buffer += sizeof(regs->xfs) * 2;
- hex2mem(buffer, (char *)®s->xgs, sizeof(regs->xgs));
-}
-
-int
-pdb_process_command (char *ptr, struct pt_regs *regs, unsigned long cr3,
- int sigval)
-{
- int length;
- unsigned long addr;
- int ack = 1; /* wait for ack in pdb_put_packet */
- int go = 0;
-
- TRC(printf("pdb: [%s]\n", ptr));
-
- pdb_out_buffer[0] = 0;
-
- if (pdb_ctx.valid == 1)
- {
- if (pdb_ctx.domain == -1) /* pdb context: xen */
- {
- struct task_struct *p;
-
- p = &idle0_task;
- if (p->mm.shadow_mode)
- pdb_ctx.ptbr = pagetable_val(p->mm.shadow_table);
- else
- pdb_ctx.ptbr = pagetable_val(p->mm.pagetable);
- }
- else if (pdb_ctx.process == -1) /* pdb context: guest os */
- {
- struct task_struct *p;
-
- if (pdb_ctx.domain == -2)
- {
- p = find_last_domain();
- }
- else
- {
- p = find_domain_by_id(pdb_ctx.domain);
- }
- if (p == NULL)
- {
- printk ("pdb error: unknown domain [0x%x]\n", pdb_ctx.domain);
- strcpy (pdb_out_buffer, "E01");
- pdb_ctx.domain = -1;
- goto exit;
- }
- if (p->mm.shadow_mode)
- pdb_ctx.ptbr = pagetable_val(p->mm.shadow_table);
- else
- pdb_ctx.ptbr = pagetable_val(p->mm.pagetable);
- put_task_struct(p);
- }
- else /* pdb context: process */
- {
- struct task_struct *p;
- unsigned long domain_ptbr;
-
- p = find_domain_by_id(pdb_ctx.domain);
- if (p == NULL)
- {
- printk ("pdb error: unknown domain [0x%x][0x%x]\n",
- pdb_ctx.domain, pdb_ctx.process);
- strcpy (pdb_out_buffer, "E01");
- pdb_ctx.domain = -1;
- goto exit;
- }
- if (p->mm.shadow_mode)
- domain_ptbr = pagetable_val(p->mm.shadow_table);
- else
- domain_ptbr = pagetable_val(p->mm.pagetable);
- put_task_struct(p);
-
- pdb_ctx.ptbr = domain_ptbr;
- /*pdb_ctx.ptbr=pdb_linux_pid_ptbr(domain_ptbr, pdb_ctx.process);*/
- }
-
- pdb_ctx.valid = 0;
- TRC(printk ("pdb change context (dom:%d, proc:%d) now 0x%lx\n",
- pdb_ctx.domain, pdb_ctx.process, pdb_ctx.ptbr));
- }
-
- switch (*ptr++)
- {
- case '?':
- pdb_out_buffer[0] = 'S';
- pdb_out_buffer[1] = hexchars[sigval >> 4];
- pdb_out_buffer[2] = hexchars[sigval % 16];
- pdb_out_buffer[3] = 0;
- break;
- case 'S': /* step with signal */
- case 's': /* step */
- {
- if ( pdb_system_call_eflags_addr != 0 )
- {
- unsigned long eflags;
- char eflags_buf[sizeof(eflags)*2]; /* STUPID STUPID STUPID */
-
- pdb_linux_get_values((u_char*)&eflags, sizeof(eflags),
- pdb_system_call_eflags_addr,
- pdb_ctx.process, pdb_ctx.ptbr);
- eflags |= X86_EFLAGS_TF;
- mem2hex ((u_char *)&eflags, eflags_buf, sizeof(eflags));
- pdb_linux_set_values(eflags_buf, sizeof(eflags),
- pdb_system_call_eflags_addr,
- pdb_ctx.process, pdb_ctx.ptbr);
- }
-
- regs->eflags |= X86_EFLAGS_TF;
- pdb_stepping = 1;
- return 1;
- /* not reached */
- }
- case 'C': /* continue with signal */
- case 'c': /* continue */
- {
- if ( pdb_system_call_eflags_addr != 0 )
- {
- unsigned long eflags;
- char eflags_buf[sizeof(eflags)*2]; /* STUPID STUPID STUPID */
-
- pdb_linux_get_values((u_char*)&eflags, sizeof(eflags),
- pdb_system_call_eflags_addr,
- pdb_ctx.process, pdb_ctx.ptbr);
- eflags &= ~X86_EFLAGS_TF;
- mem2hex ((u_char *)&eflags, eflags_buf, sizeof(eflags));
- pdb_linux_set_values(eflags_buf, sizeof(eflags),
- pdb_system_call_eflags_addr,
- pdb_ctx.process, pdb_ctx.ptbr);
- }
-
- regs->eflags &= ~X86_EFLAGS_TF;
- return 1; /* jump out before replying to gdb */
- /* not reached */
- }
- case 'd':
- remote_debug = !(remote_debug); /* toggle debug flag */
- break;
- case 'D': /* detach */
- return go;
- /* not reached */
- case 'g': /* return the value of the CPU registers */
- {
- pdb_x86_to_gdb_regs (pdb_out_buffer, regs);
- break;
- }
- case 'G': /* set the value of the CPU registers - return OK */
- {
- pdb_gdb_to_x86_regs (regs, ptr);
- break;
- }
- case 'H':
- {
- int thread;
- char *next = &ptr[1];
-
- if (hexToInt (&next, &thread))
- {
- if (*ptr == 'c')
- {
- pdb_continue_thread = thread;
- }
- else if (*ptr == 'g')
- {
- pdb_general_thread = thread;
- }
- else
- {
- printk ("pdb error: unknown set thread command %c (%d)\n",
- *ptr, thread);
- strcpy (pdb_out_buffer, "E00");
- break;
- }
- }
- strcpy (pdb_out_buffer, "OK");
- break;
- }
- case 'k': /* kill request */
- {
- strcpy (pdb_out_buffer, "OK"); /* ack for fun */
- printk ("don't kill bill...\n");
- ack = 0;
- break;
- }
-
- case 'q':
- {
- pdb_process_query(ptr);
- break;
- }
-
- /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */
- case 'm':
- {
- /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */
- if (hexToInt (&ptr, (int *)&addr))
- if (*(ptr++) == ',')
- if (hexToInt (&ptr, &length))
- {
- ptr = 0;
-
- pdb_page_fault_possible = 1;
- pdb_page_fault = 0;
- if (addr >= PAGE_OFFSET)
- {
- mem2hex ((char *) addr, pdb_out_buffer, length);
- }
- else if (pdb_ctx.process != -1)
- {
- pdb_linux_get_values(pdb_buffer, length, addr,
- pdb_ctx.process, pdb_ctx.ptbr);
- mem2hex (pdb_buffer, pdb_out_buffer, length);
- }
- else
- {
- pdb_get_values (pdb_buffer, length,
- pdb_ctx.ptbr, addr);
- mem2hex (pdb_buffer, pdb_out_buffer, length);
- }
-
- pdb_page_fault_possible = 0;
- if (pdb_page_fault)
- {
- strcpy (pdb_out_buffer, "E03");
- }
- }
-
- if (ptr)
- {
- strcpy (pdb_out_buffer, "E01");
- }
- break;
- }
-
- /* MAA..AA,LLLL: Write LLLL bytes at address AA.AA return OK */
- case 'M':
- {
- /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */
- if (hexToInt (&ptr, (int *)&addr))
- if (*(ptr++) == ',')
- if (hexToInt (&ptr, &length))
- if (*(ptr++) == ':')
- {
-
- pdb_page_fault_possible = 1;
- pdb_page_fault = 0;
- if (addr >= PAGE_OFFSET)
- {
- hex2mem (ptr, (char *)addr, length);
- pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr);
- }
- else if (pdb_ctx.process != -1)
- {
- pdb_linux_set_values(ptr, length, addr,
- pdb_ctx.process,
- pdb_ctx.ptbr);
- pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr);
- }
- else
- {
- pdb_set_values (ptr, length,
- pdb_ctx.ptbr, addr);
- pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr);
- }
- pdb_page_fault_possible = 0;
- if (pdb_page_fault)
- {
- strcpy (pdb_out_buffer, "E03");
- }
- else
- {
- strcpy (pdb_out_buffer, "OK");
- }
-
- ptr = 0;
- }
- if (ptr)
- {
- strcpy (pdb_out_buffer, "E02");
- }
- break;
- }
- case 'T':
- {
- int id;
-
- if (hexToInt (&ptr, &id))
- {
- strcpy (pdb_out_buffer, "E00");
-
-#ifdef PDB_PAST
-
- switch (pdb_level) /* previous level */
- {
- case PDB_LVL_XEN:
- {
- struct task_struct *p;
- id -= PDB_ID_OFFSET;
- if ( (p = find_domain_by_id(id)) == NULL)
- strcpy (pdb_out_buffer, "E00");
- else
- strcpy (pdb_out_buffer, "OK");
- put_task_struct(p);
-
- pdb_level = PDB_LVL_GUESTOS;
- pdb_ctx[pdb_level].ctrl = id;
- pdb_ctx[pdb_level].info = id;
- break;
- }
- case PDB_LVL_GUESTOS:
- {
- if (pdb_level == -1)
- {
- pdb_level = PDB_LVL_XEN;
- }
- else
- {
- pdb_level = PDB_LVL_PROCESS;
- pdb_ctx[pdb_level].ctrl = id;
- pdb_ctx[pdb_level].info = id;
- }
- break;
- }
- case PDB_LVL_PROCESS:
- {
- if (pdb_level == -1)
- {
- pdb_level = PDB_LVL_GUESTOS;
- }
- break;
- }
- default:
- {
- printk ("pdb internal error: invalid level [%d]\n",
- pdb_level);
- }
- }
-
-#endif /* PDB_PAST */
- }
- break;
- }
- }
-
-exit:
- /* reply to the request */
- pdb_put_packet (pdb_out_buffer, ack);
-
- return go;
-}
-
-/*
- * process an input character from the serial line.
- *
- * return "1" if the character is a gdb debug string
- * (and hence shouldn't be further processed).
- */
-
-int pdb_debug_state = 0; /* small parser state machine */
-
-int pdb_serial_input(u_char c, struct pt_regs *regs)
-{
- int out = 1;
- int loop, count;
- unsigned long cr3;
-
- __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : );
-
- switch (pdb_debug_state)
- {
- case 0: /* not currently processing debug string */
- if ( c == '$' ) /* start token */
- {
- pdb_debug_state = 1;
- pdb_in_buffer_ptr = 0;
- pdb_in_checksum = 0;
- pdb_xmit_checksum = 0;
- }
- else
- {
- out = 0;
- }
- break;
- case 1: /* saw '$' */
- if ( c == '#' ) /* checksum token */
- {
- pdb_debug_state = 2;
- pdb_in_buffer[pdb_in_buffer_ptr] = 0;
- }
- else
- {
- pdb_in_checksum += c;
- pdb_in_buffer[pdb_in_buffer_ptr++] = c;
- }
- break;
- case 2: /* 1st checksum digit */
- pdb_xmit_checksum = hex(c) << 4;
- pdb_debug_state = 3;
- break;
- case 3: /* 2nd checksum digit */
- pdb_xmit_checksum += hex(c);
- if (pdb_in_checksum != pdb_xmit_checksum)
- {
- pdb_put_char('-'); /* checksum failure */
- printk ("checksum failure [%s.%02x.%02x]\n", pdb_in_buffer,
- pdb_in_checksum, pdb_xmit_checksum);
- }
- else
- {
- pdb_put_char('+'); /* checksum okay */
- if ( pdb_in_buffer_ptr > 1 && pdb_in_buffer[2] == ':' )
- {
- pdb_put_char(pdb_in_buffer[0]);
- pdb_put_char(pdb_in_buffer[1]);
- /* remove sequence chars from buffer */
- count = strlen(pdb_in_buffer);
- for (loop = 3; loop < count; loop++)
- pdb_in_buffer[loop - 3] = pdb_in_buffer[loop];
- }
-
- pdb_process_command (pdb_in_buffer, regs, cr3,
- PDB_LIVE_EXCEPTION);
- }
- pdb_debug_state = 0;
- break;
- }
-
- return out;
-}
-
-int hex(char ch)
-{
- if ((ch >= 'a') && (ch <= 'f')) return (ch-'a'+10);
- if ((ch >= '0') && (ch <= '9')) return (ch-'0');
- if ((ch >= 'A') && (ch <= 'F')) return (ch-'A'+10);
- return (-1);
-}
-
-/* convert the memory pointed to by mem into hex, placing result in buf */
-/* return a pointer to the last char put in buf (null) */
-char *
-mem2hex (mem, buf, count)
- char *mem;
- char *buf;
- int count;
-{
- int i;
- unsigned char ch;
-
- for (i = 0; i < count; i++)
- {
- ch = get_char (mem++);
- *buf++ = hexchars[ch >> 4];
- *buf++ = hexchars[ch % 16];
- }
- *buf = 0;
- return (buf);
-}
-
-/* convert the hex array pointed to by buf into binary to be placed in mem */
-/* return a pointer to the character AFTER the last byte written */
-char *
-hex2mem (buf, mem, count)
- char *buf;
- char *mem;
- int count;
-{
- int i;
- unsigned char ch;
-
- for (i = 0; i < count; i++)
- {
- ch = hex (*buf++) << 4;
- ch = ch + hex (*buf++);
- set_char (mem++, ch);
- }
- return (mem);
-}
-
-int
-hexToInt (char **ptr, int *intValue)
-{
- int numChars = 0;
- int hexValue;
- int negative = 0;
-
- *intValue = 0;
-
- if (**ptr == '-')
- {
- negative = 1;
- numChars++;
- (*ptr)++;
- }
-
- while (**ptr)
- {
- hexValue = hex (**ptr);
- if (hexValue >= 0)
- {
- *intValue = (*intValue << 4) | hexValue;
- numChars++;
- }
- else
- break;
-
- (*ptr)++;
- }
-
- if ( negative )
- *intValue *= -1;
-
- return (numChars);
-}
-
-/***********************************************************************/
-/***********************************************************************/
-
-
-/*
- * Add a breakpoint to the list of known breakpoints.
- * For now there should only be two or three breakpoints so
- * we use a simple linked list. In the future, maybe a red-black tree?
- */
-struct pdb_breakpoint breakpoints;
-
-void pdb_bkpt_add (unsigned long cr3, unsigned long address)
-{
- struct pdb_breakpoint *bkpt = kmalloc(sizeof(*bkpt), GFP_KERNEL);
- bkpt->cr3 = cr3;
- bkpt->address = address;
- list_add(&bkpt->list, &breakpoints.list);
-}
-
-/*
- * Check to see of the breakpoint is in the list of known breakpoints
- * Return 1 if it has been set, NULL otherwise.
- */
-struct pdb_breakpoint* pdb_bkpt_search (unsigned long cr3,
- unsigned long address)
-{
- struct list_head *list_entry;
- struct pdb_breakpoint *bkpt;
-
- list_for_each(list_entry, &breakpoints.list)
- {
- bkpt = list_entry(list_entry, struct pdb_breakpoint, list);
- if ( bkpt->cr3 == cr3 && bkpt->address == address )
- return bkpt;
- }
-
- return NULL;
-}
-
-/*
- * Remove a breakpoint to the list of known breakpoints.
- * Return 1 if the element was not found, otherwise 0.
- */
-int pdb_bkpt_remove (unsigned long cr3, unsigned long address)
-{
- struct list_head *list_entry;
- struct pdb_breakpoint *bkpt;
-
- list_for_each(list_entry, &breakpoints.list)
- {
- bkpt = list_entry(list_entry, struct pdb_breakpoint, list);
- if ( bkpt->cr3 == cr3 && bkpt->address == address )
- {
- list_del(&bkpt->list);
- kfree(bkpt);
- return 0;
- }
- }
-
- return 1;
-}
-
-/*
- * Check to see if a memory write is really gdb setting a breakpoint
- */
-void pdb_bkpt_check (u_char *buffer, int length,
- unsigned long cr3, unsigned long addr)
-{
- if (length == 1 && buffer[0] == 'c' && buffer[1] == 'c')
- {
- /* inserting a new breakpoint */
- pdb_bkpt_add(cr3, addr);
- TRC(printk("pdb breakpoint detected at 0x%lx:0x%lx\n", cr3, addr));
- }
- else if ( pdb_bkpt_remove(cr3, addr) == 0 )
- {
- /* removing a breakpoint */
- TRC(printk("pdb breakpoint cleared at 0x%lx:0x%lx\n", cr3, addr));
- }
-}
-
-/***********************************************************************/
-
-int pdb_change_values(u_char *buffer, int length,
- unsigned long cr3, unsigned long addr, int rw);
-int pdb_change_values_one_page(u_char *buffer, int length,
- unsigned long cr3, unsigned long addr, int rw);
-
-#define __PDB_GET_VAL 1
-#define __PDB_SET_VAL 2
-
-/*
- * Set memory in a domain's address space
- * Set "length" bytes at "address" from "domain" to the values in "buffer".
- * Return the number of bytes set, 0 if there was a problem.
- */
-
-int pdb_set_values(u_char *buffer, int length,
- unsigned long cr3, unsigned long addr)
-{
- int count = pdb_change_values(buffer, length, cr3, addr, __PDB_SET_VAL);
- return count;
-}
-
-/*
- * Read memory from a domain's address space.
- * Fetch "length" bytes at "address" from "domain" into "buffer".
- * Return the number of bytes read, 0 if there was a problem.
- */
-
-int pdb_get_values(u_char *buffer, int length,
- unsigned long cr3, unsigned long addr)
-{
- return pdb_change_values(buffer, length, cr3, addr, __PDB_GET_VAL);
-}
-
-/*
- * Read or write memory in an address space
- */
-int pdb_change_values(u_char *buffer, int length,
- unsigned long cr3, unsigned long addr, int rw)
-{
- int remaining; /* number of bytes to touch past this page */
- int bytes = 0;
-
- while ( (remaining = (addr + length - 1) - (addr | (PAGE_SIZE - 1))) > 0)
- {
- bytes += pdb_change_values_one_page(buffer, length - remaining,
- cr3, addr, rw);
- buffer = buffer + (2 * (length - remaining));
- length = remaining;
- addr = (addr | (PAGE_SIZE - 1)) + 1;
- }
-
- bytes += pdb_change_values_one_page(buffer, length, cr3, addr, rw);
- return bytes;
-}
-
-/*
- * Change memory in a process' address space in one page
- * Read or write "length" bytes at "address" into/from "buffer"
- * from the virtual address space referenced by "cr3".
- * Return the number of bytes read, 0 if there was a problem.
- */
-
-int pdb_change_values_one_page(u_char *buffer, int length,
- unsigned long cr3, unsigned long addr, int rw)
-{
- l2_pgentry_t* l2_table = NULL;
- l1_pgentry_t* l1_table = NULL;
- u_char *page;
- int bytes = 0;
-
- l2_table = map_domain_mem(cr3);
- l2_table += l2_table_offset(addr);
- if (!(l2_pgentry_val(*l2_table) & _PAGE_PRESENT))
- {
- if (pdb_page_fault_possible == 1)
- {
- pdb_page_fault = 1;
- TRC(printk("pdb: L2 error (0x%lx)\n", addr));
- }
- else
- {
- struct task_struct *p = find_domain_by_id(0);
- printk ("pdb error: cr3: 0x%lx dom0cr3: 0x%lx\n", cr3,
- p->mm.shadow_mode ? pagetable_val(p->mm.shadow_table)
- : pagetable_val(p->mm.pagetable));
- put_task_struct(p);
- printk ("pdb error: L2:0x%p (0x%lx)\n",
- l2_table, l2_pgentry_val(*l2_table));
- }
- goto exit2;
- }
-
- if (l2_pgentry_val(*l2_table) & _PAGE_PSE)
- {
-#define PSE_PAGE_SHIFT L2_PAGETABLE_SHIFT
-#define PSE_PAGE_SIZE (1UL << PSE_PAGE_SHIFT)
-#define PSE_PAGE_MASK (~(PSE_PAGE_SIZE-1))
-
-#define L1_PAGE_BITS ( (ENTRIES_PER_L1_PAGETABLE - 1) << L1_PAGETABLE_SHIFT )
-
-#define pse_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PSE_PAGE_MASK)
-
- page = map_domain_mem(pse_pgentry_to_phys(*l2_table) + /* 10 bits */
- (addr & L1_PAGE_BITS)); /* 10 bits */
- page += addr & (PAGE_SIZE - 1); /* 12 bits */
- }
- else
- {
- l1_table = map_domain_mem(l2_pgentry_to_phys(*l2_table));
- l1_table += l1_table_offset(addr);
- if (!(l1_pgentry_val(*l1_table) & _PAGE_PRESENT))
- {
- if (pdb_page_fault_possible == 1)
- {
- pdb_page_fault = 1;
- TRC(printk ("pdb: L1 error (0x%lx)\n", addr));
- }
- else
- {
- printk ("L2:0x%p (0x%lx) L1:0x%p (0x%lx)\n",
- l2_table, l2_pgentry_val(*l2_table),
- l1_table, l1_pgentry_val(*l1_table));
- }
- goto exit1;
- }
-
- page = map_domain_mem(l1_pgentry_to_phys(*l1_table));
- page += addr & (PAGE_SIZE - 1);
- }
-
- switch (rw)
- {
- case __PDB_GET_VAL: /* read */
- memcpy (buffer, page, length);
- bytes = length;
- break;
- case __PDB_SET_VAL: /* write */
- hex2mem (buffer, page, length);
- bytes = length;
- break;
- default: /* unknown */
- printk ("error: unknown RW flag: %d\n", rw);
- return 0;
- }
-
- unmap_domain_mem((void *)page);
-exit1:
- if (l1_table != NULL)
- unmap_domain_mem((void *)l1_table);
-exit2:
- unmap_domain_mem((void *)l2_table);
-
- return bytes;
-}
-
-/***********************************************************************/
-
-void breakpoint(void);
-
-/* send the packet in buffer. */
-void pdb_put_packet (unsigned char *buffer, int ack)
-{
- unsigned char checksum;
- int count;
- char ch;
-
- /* $<packet info>#<checksum> */
- /* do */
- {
- pdb_put_char ('$');
- checksum = 0;
- count = 0;
-
- while ((ch = buffer[count]))
- {
- pdb_put_char (ch);
- checksum += ch;
- count += 1;
- }
-
- pdb_put_char('#');
- pdb_put_char(hexchars[checksum >> 4]);
- pdb_put_char(hexchars[checksum % 16]);
- }
-
- if (ack)
- {
- if ((ch = pdb_get_char()) != '+')
- {
- printk(" pdb return error: %c 0x%x [%s]\n", ch, ch, buffer);
- }
- }
-}
-
-void pdb_get_packet(char *buffer)
-{
- int count;
- char ch;
- unsigned char checksum = 0;
- unsigned char xmitcsum = 0;
-
- do
- {
- while ((ch = pdb_get_char()) != '$');
-
- count = 0;
- checksum = 0;
-
- while (count < BUFMAX)
- {
- ch = pdb_get_char();
- if (ch == '#') break;
- checksum += ch;
- buffer[count] = ch;
- count++;
- }
- buffer[count] = 0;
-
- if (ch == '#')
- {
- xmitcsum = hex(pdb_get_char()) << 4;
- xmitcsum += hex(pdb_get_char());
-
- if (xmitcsum == checksum)
- {
- pdb_put_char('+');
- if (buffer[2] == ':')
- {
- printk ("pdb: obsolete gdb packet (sequence ID)\n");
- }
- }
- else
- {
- pdb_put_char('-');
- }
- }
- } while (checksum != xmitcsum);
-
- return;
-}
-
-/*
- * process a machine interrupt or exception
- * Return 1 if pdb is not interested in the exception; it should
- * be propagated to the guest os.
- */
-
-int pdb_handle_exception(int exceptionVector,
- struct pt_regs *xen_regs)
-{
- int signal = 0;
- struct pdb_breakpoint* bkpt;
- int watchdog_save;
- unsigned long cr3;
-
- __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : );
-
- /* If the exception is an int3 from user space then pdb is only
- interested if it re-wrote an instruction set the breakpoint.
- This occurs when leaving a system call from a domain.
- */
- if ( exceptionVector == 3 &&
- (xen_regs->xcs & 3) == 3 &&
- xen_regs->eip != pdb_system_call_next_addr + 1)
- {
- TRC(printf("pdb: user bkpt (0x%x) at 0x%x:0x%lx:0x%lx\n",
- exceptionVector, xen_regs->xcs & 3, cr3, xen_regs->eip));
- return 1;
- }
-
- /*
- * If PDB didn't set the breakpoint, is not single stepping,
- * is not entering a system call in a domain,
- * the user didn't press the magic debug key,
- * then we don't handle the exception.
- */
- bkpt = pdb_bkpt_search(cr3, xen_regs->eip - 1);
- if ( (bkpt == NULL) &&
- !pdb_stepping &&
- !pdb_system_call &&
- xen_regs->eip != pdb_system_call_next_addr + 1 &&
- (exceptionVector != KEYPRESS_EXCEPTION) &&
- xen_regs->eip < 0xc0000000) /* Linux-specific for now! */
- {
- TRC(printf("pdb: user bkpt (0x%x) at 0x%lx:0x%lx\n",
- exceptionVector, cr3, xen_regs->eip));
- return 1;
- }
-
- printk("pdb_handle_exception [0x%x][0x%lx:0x%lx]\n",
- exceptionVector, cr3, xen_regs->eip);
-
- if ( pdb_stepping )
- {
- /* Stepped one instruction; now return to normal execution. */
- xen_regs->eflags &= ~X86_EFLAGS_TF;
- pdb_stepping = 0;
- }
-
- if ( pdb_system_call )
- {
- pdb_system_call = 0;
-
- pdb_linux_syscall_exit_bkpt (xen_regs, &pdb_ctx);
-
- /* we don't have a saved breakpoint so we need to rewind eip */
- xen_regs->eip--;
-
- /* if ther user doesn't care about breaking when entering a
- system call then we'll just ignore the exception */
- if ( (pdb_ctx.system_call & 0x01) == 0 )
- {
- return 0;
- }
- }
-
- if ( exceptionVector == BREAKPT_EXCEPTION && bkpt != NULL)
- {
- /* Executed Int3: replace breakpoint byte with real program byte. */
- xen_regs->eip--;
- }
-
- /* returning to user space after a system call */
- if ( xen_regs->eip == pdb_system_call_next_addr + 1)
- {
- u_char instr[2]; /* REALLY REALLY REALLY STUPID */
-
- mem2hex (&pdb_system_call_leave_instr, instr, sizeof(instr));
-
- pdb_linux_set_values (instr, 1, pdb_system_call_next_addr,
- pdb_ctx.process, pdb_ctx.ptbr);
-
- pdb_system_call_next_addr = 0;
- pdb_system_call_leave_instr = 0;
-
- /* manually rewind eip */
- xen_regs->eip--;
-
- /* if the user doesn't care about breaking when returning
- to user space after a system call then we'll just ignore
- the exception */
- if ( (pdb_ctx.system_call & 0x02) == 0 )
- {
- return 0;
- }
- }
-
- /* Generate a signal for GDB. */
- switch ( exceptionVector )
- {
- case KEYPRESS_EXCEPTION:
- signal = 2; break; /* SIGINT */
- case DEBUG_EXCEPTION:
- signal = 5; break; /* SIGTRAP */
- case BREAKPT_EXCEPTION:
- signal = 5; break; /* SIGTRAP */
- default:
- printk("pdb: can't generate signal for unknown exception vector %d\n",
- exceptionVector);
- break;
- }
-
- pdb_out_buffer[0] = 'S';
- pdb_out_buffer[1] = hexchars[signal >> 4];
- pdb_out_buffer[2] = hexchars[signal % 16];
- pdb_out_buffer[3] = 0;
- pdb_put_packet(pdb_out_buffer, 1);
-
- watchdog_save = watchdog_on;
- watchdog_on = 0;
-
- do {
- pdb_out_buffer[0] = 0;
- pdb_get_packet(pdb_in_buffer);
- }
- while ( pdb_process_command(pdb_in_buffer, xen_regs, cr3, signal) == 0 );
-
- watchdog_on = watchdog_save;
-
- return 0;
-}
-
-void pdb_key_pressed(u_char key, void *dev_id, struct pt_regs *regs)
-{
- pdb_handle_exception(KEYPRESS_EXCEPTION, regs);
- return;
-}
-
-void initialize_pdb()
-{
- extern char opt_pdb[];
-
- /* Certain state must be initialised even when PDB will not be used. */
- memset((void *) &breakpoints, 0, sizeof(breakpoints));
- INIT_LIST_HEAD(&breakpoints.list);
- pdb_stepping = 0;
-
- if ( strcmp(opt_pdb, "none") == 0 )
- return;
-
- if ( (pdb_serhnd = parse_serial_handle(opt_pdb)) == -1 )
- {
- printk("error: failed to initialize PDB on port %s\n", opt_pdb);
- return;
- }
-
- pdb_ctx.valid = 1;
- pdb_ctx.domain = -1;
- pdb_ctx.process = -1;
- pdb_ctx.system_call = 0;
- pdb_ctx.ptbr = 0;
-
- printk("pdb: pervasive debugger (%s) www.cl.cam.ac.uk/netos/pdb\n",
- opt_pdb);
-
- /* Acknowledge any spurious GDB packets. */
- pdb_put_char('+');
-
- add_key_handler('D', pdb_key_pressed, "enter pervasive debugger");
-
- pdb_initialized = 1;
-}
-
-void breakpoint(void)
-{
- if ( pdb_initialized )
- asm("int $3");
-}
+++ /dev/null
-/*
- * linux/arch/i386/kernel/process.c
- *
- * Copyright (C) 1995 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-/*
- * This file handles the architecture-dependent parts of process handling..
- */
-
-#define __KERNEL_SYSCALLS__
-#include <xen/config.h>
-#include <xen/lib.h>
-#include <xen/errno.h>
-#include <xen/sched.h>
-#include <xen/smp.h>
-#include <asm/ptrace.h>
-#include <xen/delay.h>
-#include <xen/interrupt.h>
-#include <asm/mc146818rtc.h>
-
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-#include <asm/desc.h>
-#include <asm/i387.h>
-#include <asm/mpspec.h>
-#include <asm/ldt.h>
-#include <xen/irq.h>
-#include <xen/event.h>
-#include <xen/shadow.h>
-
-int hlt_counter;
-
-void disable_hlt(void)
-{
- hlt_counter++;
-}
-
-void enable_hlt(void)
-{
- hlt_counter--;
-}
-
-/*
- * We use this if we don't have any better
- * idle routine..
- */
-static void default_idle(void)
-{
- if (!hlt_counter) {
- __cli();
- if (!current->hyp_events && !softirq_pending(smp_processor_id()))
- safe_halt();
- else
- __sti();
- }
-}
-
-void continue_cpu_idle_loop(void)
-{
- int cpu = smp_processor_id();
- for ( ; ; )
- {
- irq_stat[cpu].idle_timestamp = jiffies;
- while (!current->hyp_events && !softirq_pending(cpu))
- default_idle();
- do_hyp_events();
- do_softirq();
- }
-}
-
-void startup_cpu_idle_loop(void)
-{
- /* Just some sanity to ensure that the scheduler is set up okay. */
- ASSERT(current->domain == IDLE_DOMAIN_ID);
- (void)wake_up(current);
- __enter_scheduler();
-
- /*
- * Declares CPU setup done to the boot processor.
- * Therefore memory barrier to ensure state is visible.
- */
- smp_mb();
- init_idle();
-
- continue_cpu_idle_loop();
-}
-
-static long no_idt[2];
-static int reboot_mode;
-int reboot_thru_bios = 0;
-
-#ifdef CONFIG_SMP
-int reboot_smp = 0;
-static int reboot_cpu = -1;
-/* shamelessly grabbed from lib/vsprintf.c for readability */
-#define is_digit(c) ((c) >= '0' && (c) <= '9')
-#endif
-
-
-static inline void kb_wait(void)
-{
- int i;
-
- for (i=0; i<0x10000; i++)
- if ((inb_p(0x64) & 0x02) == 0)
- break;
-}
-
-
-void machine_restart(char * __unused)
-{
- extern int opt_noreboot;
-#ifdef CONFIG_SMP
- int cpuid;
-#endif
-
- if ( opt_noreboot )
- {
- printk("Reboot disabled on cmdline: require manual reset\n");
- for ( ; ; ) __asm__ __volatile__ ("hlt");
- }
-
-#ifdef CONFIG_SMP
- cpuid = GET_APIC_ID(apic_read(APIC_ID));
-
- /* KAF: Need interrupts enabled for safe IPI. */
- __sti();
-
- if (reboot_smp) {
-
- /* check to see if reboot_cpu is valid
- if its not, default to the BSP */
- if ((reboot_cpu == -1) ||
- (reboot_cpu > (NR_CPUS -1)) ||
- !(phys_cpu_present_map & (1<<cpuid)))
- reboot_cpu = boot_cpu_physical_apicid;
-
- reboot_smp = 0; /* use this as a flag to only go through this once*/
- /* re-run this function on the other CPUs
- it will fall though this section since we have
- cleared reboot_smp, and do the reboot if it is the
- correct CPU, otherwise it halts. */
- if (reboot_cpu != cpuid)
- smp_call_function((void *)machine_restart , NULL, 1, 0);
- }
-
- /* if reboot_cpu is still -1, then we want a tradional reboot,
- and if we are not running on the reboot_cpu,, halt */
- if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
- for (;;)
- __asm__ __volatile__ ("hlt");
- }
- /*
- * Stop all CPUs and turn off local APICs and the IO-APIC, so
- * other OSs see a clean IRQ state.
- */
- smp_send_stop();
- disable_IO_APIC();
-#endif
-
- if(!reboot_thru_bios) {
- /* rebooting needs to touch the page at absolute addr 0 */
- *((unsigned short *)__va(0x472)) = reboot_mode;
- for (;;) {
- int i;
- for (i=0; i<100; i++) {
- kb_wait();
- udelay(50);
- outb(0xfe,0x64); /* pulse reset low */
- udelay(50);
- }
- /* That didn't work - force a triple fault.. */
- __asm__ __volatile__("lidt %0": "=m" (no_idt));
- __asm__ __volatile__("int3");
- }
- }
-
- panic("Need to reinclude BIOS reboot code\n");
-}
-
-void machine_halt(void)
-{
- machine_restart(0);
-}
-
-void machine_power_off(void)
-{
- machine_restart(0);
-}
-
-void new_thread(struct task_struct *p,
- unsigned long start_pc,
- unsigned long start_stack,
- unsigned long start_info)
-{
- execution_context_t *ec = &p->shared_info->execution_context;
-
- /*
- * Initial register values:
- * DS,ES,FS,GS = FLAT_RING1_DS
- * CS:EIP = FLAT_RING1_CS:start_pc
- * SS:ESP = FLAT_RING1_DS:start_stack
- * ESI = start_info
- * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
- */
- ec->ds = ec->es = ec->fs = ec->gs = ec->ss = FLAT_RING1_DS;
- ec->cs = FLAT_RING1_CS;
- ec->eip = start_pc;
- ec->esp = start_stack;
- ec->esi = start_info;
-
- __save_flags(ec->eflags);
- ec->eflags |= X86_EFLAGS_IF;
-
- /* No fast trap at start of day. */
- SET_DEFAULT_FAST_TRAP(&p->thread);
-}
-
-
-/*
- * This special macro can be used to load a debugging register
- */
-#define loaddebug(thread,register) \
- __asm__("movl %0,%%db" #register \
- : /* no output */ \
- :"r" (thread->debugreg[register]))
-
-
-void switch_to(struct task_struct *prev_p, struct task_struct *next_p)
-{
- struct thread_struct *next = &next_p->thread;
- struct tss_struct *tss = init_tss + smp_processor_id();
- execution_context_t *stack_ec = get_execution_context();
- int i;
-
- __cli();
-
- /* Switch guest general-register state. */
- if ( !is_idle_task(prev_p) )
- {
- memcpy(&prev_p->shared_info->execution_context,
- stack_ec,
- sizeof(*stack_ec));
- unlazy_fpu(prev_p);
- CLEAR_FAST_TRAP(&prev_p->thread);
- }
-
- if ( !is_idle_task(next_p) )
- {
- memcpy(stack_ec,
- &next_p->shared_info->execution_context,
- sizeof(*stack_ec));
-
- /*
- * This is sufficient! If the descriptor DPL differs from CS RPL then
- * we'll #GP. If DS, ES, FS, GS are DPL 0 then they'll be cleared
- * automatically. If SS RPL or DPL differs from CS RPL then we'll #GP.
- */
- if ( (stack_ec->cs & 3) == 0 )
- stack_ec->cs = FLAT_RING1_CS;
- if ( (stack_ec->ss & 3) == 0 )
- stack_ec->ss = FLAT_RING1_DS;
-
- SET_FAST_TRAP(&next_p->thread);
-
- /* Switch the guest OS ring-1 stack. */
- tss->esp1 = next->guestos_sp;
- tss->ss1 = next->guestos_ss;
-
- /* Maybe switch the debug registers. */
- if ( unlikely(next->debugreg[7]) )
- {
- loaddebug(next, 0);
- loaddebug(next, 1);
- loaddebug(next, 2);
- loaddebug(next, 3);
- /* no 4 and 5 */
- loaddebug(next, 6);
- loaddebug(next, 7);
- }
-
- /* Switch page tables. */
- write_ptbase(&next_p->mm);
- tlb_clocktick();
- }
-
- if ( unlikely(prev_p->io_bitmap != NULL) ||
- unlikely(next_p->io_bitmap != NULL) )
- {
- if ( next_p->io_bitmap != NULL )
- {
- /* Copy in the appropriate parts of the IO bitmap. We use the
- * selector to copy only the interesting parts of the bitmap. */
-
- u64 old_sel = ~0ULL; /* IO bitmap selector for previous task. */
-
- if ( prev_p->io_bitmap != NULL)
- {
- old_sel = prev_p->io_bitmap_sel;
-
- /* Replace any areas of the IO bitmap that had bits cleared. */
- for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ )
- if ( !test_bit(i, &prev_p->io_bitmap_sel) )
- memcpy(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS],
- &next_p->io_bitmap[i * IOBMP_SELBIT_LWORDS],
- IOBMP_SELBIT_LWORDS * sizeof(unsigned long));
- }
-
- /* Copy in any regions of the new task's bitmap that have bits
- * clear and we haven't already dealt with. */
- for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ )
- {
- if ( test_bit(i, &old_sel)
- && !test_bit(i, &next_p->io_bitmap_sel) )
- memcpy(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS],
- &next_p->io_bitmap[i * IOBMP_SELBIT_LWORDS],
- IOBMP_SELBIT_LWORDS * sizeof(unsigned long));
- }
-
- tss->bitmap = IO_BITMAP_OFFSET;
-
- }
- else
- {
- /* In this case, we're switching FROM a task with IO port access,
- * to a task that doesn't use the IO bitmap. We set any TSS bits
- * that might have been cleared, ready for future use. */
- for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ )
- if ( !test_bit(i, &prev_p->io_bitmap_sel) )
- memset(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS],
- 0xFF, IOBMP_SELBIT_LWORDS * sizeof(unsigned long));
-
- /*
- * a bitmap offset pointing outside of the TSS limit
- * causes a nicely controllable SIGSEGV if a process
- * tries to use a port IO instruction. The first
- * sys_ioperm() call sets up the bitmap properly.
- */
- tss->bitmap = INVALID_IO_BITMAP_OFFSET;
- }
- }
-
- set_current(next_p);
-
- /* Switch GDT and LDT. */
- __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->mm.gdt));
- load_LDT(next_p);
-
- __sti();
-}
-
-
-/* XXX Currently the 'domain' field is ignored! XXX */
-long do_iopl(domid_t domain, unsigned int new_io_pl)
-{
- execution_context_t *ec = get_execution_context();
- ec->eflags = (ec->eflags & 0xffffcfff) | ((new_io_pl&3) << 12);
- return 0;
-}
+++ /dev/null
-#include <asm/atomic.h>
-#include <asm/rwlock.h>
-
-#if defined(CONFIG_SMP)
-asm(
-".align 4\n"
-".globl __write_lock_failed\n"
-"__write_lock_failed:\n"
-" " LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n"
-"1: rep; nop\n"
-" cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n"
-" jne 1b\n"
-" " LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n"
-" jnz __write_lock_failed\n"
-" ret\n"
-
-".align 4\n"
-".globl __read_lock_failed\n"
-"__read_lock_failed:\n"
-" lock ; incl (%eax)\n"
-"1: rep; nop\n"
-" cmpl $1,(%eax)\n"
-" js 1b\n"
-" lock ; decl (%eax)\n"
-" js __read_lock_failed\n"
-" ret\n"
-);
-#endif
+++ /dev/null
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/interrupt.h>
-#include <xen/lib.h>
-#include <xen/sched.h>
-#include <xen/pci.h>
-#include <xen/serial.h>
-#include <xen/acpi.h>
-#include <xen/module.h>
-#include <asm/bitops.h>
-#include <asm/smp.h>
-#include <asm/processor.h>
-#include <asm/mpspec.h>
-#include <asm/apic.h>
-#include <asm/desc.h>
-#include <asm/domain_page.h>
-#include <asm/pdb.h>
-
-char ignore_irq13; /* set if exception 16 works */
-struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
-
-/* Lots of nice things, since we only target PPro+. */
-unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE;
-EXPORT_SYMBOL(mmu_cr4_features);
-
-unsigned long wait_init_idle;
-
-struct task_struct *idle_task[NR_CPUS] = { &idle0_task };
-
-#ifdef CONFIG_ACPI_INTERPRETER
-int acpi_disabled = 0;
-#else
-int acpi_disabled = 1;
-#endif
-EXPORT_SYMBOL(acpi_disabled);
-
-#ifdef CONFIG_ACPI_BOOT
-extern int __initdata acpi_ht;
-int acpi_force __initdata = 0;
-#endif
-
-int phys_proc_id[NR_CPUS];
-int logical_proc_id[NR_CPUS];
-
-/* Standard macro to see if a specific flag is changeable */
-static inline int flag_is_changeable_p(u32 flag)
-{
- u32 f1, f2;
-
- asm("pushfl\n\t"
- "pushfl\n\t"
- "popl %0\n\t"
- "movl %0,%1\n\t"
- "xorl %2,%0\n\t"
- "pushl %0\n\t"
- "popfl\n\t"
- "pushfl\n\t"
- "popl %0\n\t"
- "popfl\n\t"
- : "=&r" (f1), "=&r" (f2)
- : "ir" (flag));
-
- return ((f1^f2) & flag) != 0;
-}
-
-/* Probe for the CPUID instruction */
-static int __init have_cpuid_p(void)
-{
- return flag_is_changeable_p(X86_EFLAGS_ID);
-}
-
-void __init get_cpu_vendor(struct cpuinfo_x86 *c)
-{
- char *v = c->x86_vendor_id;
-
- if (!strcmp(v, "GenuineIntel"))
- c->x86_vendor = X86_VENDOR_INTEL;
- else if (!strcmp(v, "AuthenticAMD"))
- c->x86_vendor = X86_VENDOR_AMD;
- else if (!strcmp(v, "CyrixInstead"))
- c->x86_vendor = X86_VENDOR_CYRIX;
- else if (!strcmp(v, "UMC UMC UMC "))
- c->x86_vendor = X86_VENDOR_UMC;
- else if (!strcmp(v, "CentaurHauls"))
- c->x86_vendor = X86_VENDOR_CENTAUR;
- else if (!strcmp(v, "NexGenDriven"))
- c->x86_vendor = X86_VENDOR_NEXGEN;
- else if (!strcmp(v, "RiseRiseRise"))
- c->x86_vendor = X86_VENDOR_RISE;
- else if (!strcmp(v, "GenuineTMx86") ||
- !strcmp(v, "TransmetaCPU"))
- c->x86_vendor = X86_VENDOR_TRANSMETA;
- else
- c->x86_vendor = X86_VENDOR_UNKNOWN;
-}
-
-static void __init init_intel(struct cpuinfo_x86 *c)
-{
- extern int opt_noht, opt_noacpi;
-
- /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */
- if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 )
- clear_bit(X86_FEATURE_SEP, &c->x86_capability);
-
- if ( opt_noht )
- {
- opt_noacpi = 1; /* Virtual CPUs only appear in ACPI tables. */
- clear_bit(X86_FEATURE_HT, &c->x86_capability[0]);
- }
-
-#ifdef CONFIG_SMP
- if ( test_bit(X86_FEATURE_HT, &c->x86_capability) )
- {
- u32 eax, ebx, ecx, edx;
- int initial_apic_id, siblings, cpu = smp_processor_id();
-
- cpuid(1, &eax, &ebx, &ecx, &edx);
- siblings = (ebx & 0xff0000) >> 16;
-
- if ( siblings <= 1 )
- {
- printk(KERN_INFO "CPU#%d: Hyper-Threading is disabled\n", cpu);
- }
- else if ( siblings > 2 )
- {
- panic("We don't support more than two logical CPUs per package!");
- }
- else
- {
- initial_apic_id = ebx >> 24 & 0xff;
- phys_proc_id[cpu] = initial_apic_id >> 1;
- logical_proc_id[cpu] = initial_apic_id & 1;
- printk(KERN_INFO "CPU#%d: Physical ID: %d, Logical ID: %d\n",
- cpu, phys_proc_id[cpu], logical_proc_id[cpu]);
- }
- }
-#endif
-}
-
-static void __init init_amd(struct cpuinfo_x86 *c)
-{
- /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
- 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
- clear_bit(0*32+31, &c->x86_capability);
-
- switch(c->x86)
- {
- case 5:
- panic("AMD K6 is not supported.\n");
- case 6: /* An Athlon/Duron. We can trust the BIOS probably */
- break;
- }
-}
-
-/*
- * This does the hard work of actually picking apart the CPU stuff...
- */
-void __init identify_cpu(struct cpuinfo_x86 *c)
-{
- int junk, i, cpu = smp_processor_id();
- u32 xlvl, tfms;
-
- phys_proc_id[cpu] = cpu;
- logical_proc_id[cpu] = 0;
-
- c->x86_vendor = X86_VENDOR_UNKNOWN;
- c->cpuid_level = -1; /* CPUID not detected */
- c->x86_model = c->x86_mask = 0; /* So far unknown... */
- c->x86_vendor_id[0] = '\0'; /* Unset */
- memset(&c->x86_capability, 0, sizeof c->x86_capability);
-
- if ( !have_cpuid_p() )
- panic("Ancient processors not supported\n");
-
- /* Get vendor name */
- cpuid(0x00000000, &c->cpuid_level,
- (int *)&c->x86_vendor_id[0],
- (int *)&c->x86_vendor_id[8],
- (int *)&c->x86_vendor_id[4]);
-
- get_cpu_vendor(c);
-
- if ( c->cpuid_level == 0 )
- panic("Decrepit CPUID not supported\n");
-
- cpuid(0x00000001, &tfms, &junk, &junk,
- &c->x86_capability[0]);
- c->x86 = (tfms >> 8) & 15;
- c->x86_model = (tfms >> 4) & 15;
- c->x86_mask = tfms & 15;
-
- /* AMD-defined flags: level 0x80000001 */
- xlvl = cpuid_eax(0x80000000);
- if ( (xlvl & 0xffff0000) == 0x80000000 ) {
- if ( xlvl >= 0x80000001 )
- c->x86_capability[1] = cpuid_edx(0x80000001);
- }
-
- /* Transmeta-defined flags: level 0x80860001 */
- xlvl = cpuid_eax(0x80860000);
- if ( (xlvl & 0xffff0000) == 0x80860000 ) {
- if ( xlvl >= 0x80860001 )
- c->x86_capability[2] = cpuid_edx(0x80860001);
- }
-
- printk("CPU%d: Before vendor init, caps: %08x %08x %08x, vendor = %d\n",
- smp_processor_id(),
- c->x86_capability[0],
- c->x86_capability[1],
- c->x86_capability[2],
- c->x86_vendor);
-
- switch ( c->x86_vendor ) {
- case X86_VENDOR_INTEL:
- init_intel(c);
- break;
- case X86_VENDOR_AMD:
- init_amd(c);
- break;
- case X86_VENDOR_UNKNOWN: /* Connectix Virtual PC reports this */
- break;
- case X86_VENDOR_CENTAUR:
- break;
- default:
- printk("Unknown CPU identifier (%d): continuing anyway, "
- "but might fail.\n", c->x86_vendor);
- }
-
- printk("CPU caps: %08x %08x %08x %08x\n",
- c->x86_capability[0],
- c->x86_capability[1],
- c->x86_capability[2],
- c->x86_capability[3]);
-
- /*
- * On SMP, boot_cpu_data holds the common feature set between
- * all CPUs; so make sure that we indicate which features are
- * common between the CPUs. The first time this routine gets
- * executed, c == &boot_cpu_data.
- */
- if ( c != &boot_cpu_data ) {
- /* AND the already accumulated flags with these */
- for ( i = 0 ; i < NCAPINTS ; i++ )
- boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
- }
-}
-
-
-unsigned long cpu_initialized;
-void __init cpu_init(void)
-{
- int nr = smp_processor_id();
- struct tss_struct * t = &init_tss[nr];
-
- if ( test_and_set_bit(nr, &cpu_initialized) )
- panic("CPU#%d already initialized!!!\n", nr);
- printk("Initializing CPU#%d\n", nr);
-
- /* Set up GDT and IDT. */
- SET_GDT_ENTRIES(current, DEFAULT_GDT_ENTRIES);
- SET_GDT_ADDRESS(current, DEFAULT_GDT_ADDRESS);
- __asm__ __volatile__("lgdt %0": "=m" (*current->mm.gdt));
- __asm__ __volatile__("lidt %0": "=m" (idt_descr));
-
- /* No nested task. */
- __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
-
- /* Ensure FPU gets initialised for each domain. */
- stts();
-
- /* Set up and load the per-CPU TSS and LDT. */
- t->ss0 = __HYPERVISOR_DS;
- t->esp0 = get_stack_top();
- set_tss_desc(nr,t);
- load_TR(nr);
- __asm__ __volatile__("lldt %%ax"::"a" (0));
-
- /* Clear all 6 debug registers. */
-#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) );
- CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
-#undef CD
-
- /* Install correct page table. */
- write_ptbase(¤t->mm);
-
- init_idle_task();
-}
-
-static void __init do_initcalls(void)
-{
- initcall_t *call;
- for ( call = &__initcall_start; call < &__initcall_end; call++ )
- (*call)();
-}
-
-/*
- * IBM-compatible BIOSes place drive info tables at initial interrupt
- * vectors 0x41 and 0x46. These are in the for of 16-bit-mode far ptrs.
- */
-struct drive_info_struct { unsigned char dummy[32]; } drive_info;
-void get_bios_driveinfo(void)
-{
- unsigned long seg, off, tab1, tab2;
-
- off = (unsigned long)*(unsigned short *)(4*0x41+0);
- seg = (unsigned long)*(unsigned short *)(4*0x41+2);
- tab1 = (seg<<4) + off;
-
- off = (unsigned long)*(unsigned short *)(4*0x46+0);
- seg = (unsigned long)*(unsigned short *)(4*0x46+2);
- tab2 = (seg<<4) + off;
-
- printk("Reading BIOS drive-info tables at 0x%05lx and 0x%05lx\n",
- tab1, tab2);
-
- memcpy(drive_info.dummy+ 0, (char *)tab1, 16);
- memcpy(drive_info.dummy+16, (char *)tab2, 16);
-}
-
-
-unsigned long pci_mem_start = 0x10000000;
-
-void __init start_of_day(void)
-{
- extern void trap_init(void);
- extern void init_IRQ(void);
- extern void time_init(void);
- extern void timer_bh(void);
- extern void init_timervecs(void);
- extern void ac_timer_init(void);
- extern void initialize_keytable();
- extern void initialize_keyboard(void);
- extern int opt_nosmp, opt_watchdog, opt_noacpi, opt_ignorebiostables;
- extern int do_timer_lists_from_pit;
- unsigned long low_mem_size;
-
-#ifdef MEMORY_GUARD
- /* Unmap the first page of CPU0's stack. */
- extern unsigned long cpu0_stack[];
- memguard_guard_range(cpu0_stack, PAGE_SIZE);
-#endif
-
- open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ,
- (void *)new_tlbflush_clock_period,
- NULL);
-
- if ( opt_watchdog )
- nmi_watchdog = NMI_LOCAL_APIC;
-
- /*
- * We do this early, but tables are in the lowest 1MB (usually
- * 0xfe000-0xfffff). Therefore they're unlikely to ever get clobbered.
- */
- get_bios_driveinfo();
-
- /* Tell the PCI layer not to allocate too close to the RAM area.. */
- low_mem_size = ((max_page << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
- if ( low_mem_size > pci_mem_start ) pci_mem_start = low_mem_size;
-
- identify_cpu(&boot_cpu_data); /* get CPU type info */
- if ( cpu_has_fxsr ) set_in_cr4(X86_CR4_OSFXSR);
- if ( cpu_has_xmm ) set_in_cr4(X86_CR4_OSXMMEXCPT);
-#ifdef CONFIG_SMP
- if ( opt_ignorebiostables )
- {
- opt_nosmp = 1; /* No SMP without configuration */
- opt_noacpi = 1; /* ACPI will just confuse matters also */
- }
- else
- {
- find_smp_config();
- smp_alloc_memory(); /* trampoline which other CPUs jump at */
- }
-#endif
- paging_init(); /* not much here now, but sets up fixmap */
- if ( !opt_noacpi )
- acpi_boot_init();
-#ifdef CONFIG_SMP
- if ( smp_found_config )
- get_smp_config();
-#endif
- domain_init();
- scheduler_init();
- trap_init();
- init_IRQ(); /* installs simple interrupt wrappers. Starts HZ clock. */
- time_init(); /* installs software handler for HZ clock. */
- softirq_init();
- init_timervecs();
- init_bh(TIMER_BH, timer_bh);
- init_apic_mappings(); /* make APICs addressable in our pagetables. */
-
-#ifndef CONFIG_SMP
- APIC_init_uniprocessor();
-#else
- if ( opt_nosmp )
- APIC_init_uniprocessor();
- else
- smp_boot_cpus();
- /*
- * Does loads of stuff, including kicking the local
- * APIC, and the IO APIC after other CPUs are booted.
- * Each IRQ is preferably handled by IO-APIC, but
- * fall thru to 8259A if we have to (but slower).
- */
-#endif
-
- __sti();
-
- initialize_keytable(); /* call back handling for key codes */
-
- serial_init_stage2();
- initialize_keyboard(); /* setup keyboard (also for debugging) */
-
-#ifdef XEN_DEBUGGER
- initialize_pdb(); /* pervasive debugger */
-#endif
-
- if ( !cpu_has_apic )
- {
- do_timer_lists_from_pit = 1;
- if ( smp_num_cpus != 1 )
- panic("We need local APICs on SMP machines!");
- }
-
- ac_timer_init(); /* init accurate timers */
- init_xen_time(); /* initialise the time */
- schedulers_start(); /* start scheduler for each CPU */
-
- check_nmi_watchdog();
-
-#ifdef CONFIG_PCI
- pci_init();
-#endif
- do_initcalls();
-
-#ifdef CONFIG_SMP
- wait_init_idle = cpu_online_map;
- clear_bit(smp_processor_id(), &wait_init_idle);
- smp_threads_ready = 1;
- smp_commence(); /* Tell other CPUs that state of the world is stable. */
- while (wait_init_idle)
- {
- cpu_relax();
- barrier();
- }
-#endif
-
- watchdog_on = 1;
-}
+++ /dev/null
-/*
- * Intel SMP support routines.
- *
- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
- *
- * This code is released under the GNU General Public License version 2 or
- * later.
- */
-
-#include <xen/irq.h>
-#include <xen/sched.h>
-#include <xen/delay.h>
-#include <xen/spinlock.h>
-#include <asm/smp.h>
-#include <asm/mc146818rtc.h>
-#include <asm/pgalloc.h>
-#include <asm/smpboot.h>
-#include <asm/hardirq.h>
-
-#ifdef CONFIG_SMP
-
-/*
- * Some notes on x86 processor bugs affecting SMP operation:
- *
- * Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
- * The Linux implications for SMP are handled as follows:
- *
- * Pentium III / [Xeon]
- * None of the E1AP-E3AP errata are visible to the user.
- *
- * E1AP. see PII A1AP
- * E2AP. see PII A2AP
- * E3AP. see PII A3AP
- *
- * Pentium II / [Xeon]
- * None of the A1AP-A3AP errata are visible to the user.
- *
- * A1AP. see PPro 1AP
- * A2AP. see PPro 2AP
- * A3AP. see PPro 7AP
- *
- * Pentium Pro
- * None of 1AP-9AP errata are visible to the normal user,
- * except occasional delivery of 'spurious interrupt' as trap #15.
- * This is very rare and a non-problem.
- *
- * 1AP. Linux maps APIC as non-cacheable
- * 2AP. worked around in hardware
- * 3AP. fixed in C0 and above steppings microcode update.
- * Linux does not use excessive STARTUP_IPIs.
- * 4AP. worked around in hardware
- * 5AP. symmetric IO mode (normal Linux operation) not affected.
- * 'noapic' mode has vector 0xf filled out properly.
- * 6AP. 'noapic' mode might be affected - fixed in later steppings
- * 7AP. We do not assume writes to the LVT deassering IRQs
- * 8AP. We do not enable low power mode (deep sleep) during MP bootup
- * 9AP. We do not use mixed mode
- */
-
-/*
- * the following functions deal with sending IPIs between CPUs.
- *
- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
- */
-
-static inline int __prepare_ICR (unsigned int shortcut, int vector)
-{
- return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
-}
-
-static inline int __prepare_ICR2 (unsigned int mask)
-{
- return SET_APIC_DEST_FIELD(mask);
-}
-
-static inline void __send_IPI_shortcut(unsigned int shortcut, int vector)
-{
- /*
- * Subtle. In the case of the 'never do double writes' workaround
- * we have to lock out interrupts to be safe. As we don't care
- * of the value read we use an atomic rmw access to avoid costly
- * cli/sti. Otherwise we use an even cheaper single atomic write
- * to the APIC.
- */
- unsigned int cfg;
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- /*
- * No need to touch the target chip field
- */
- cfg = __prepare_ICR(shortcut, vector);
-
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- apic_write_around(APIC_ICR, cfg);
-}
-
-void send_IPI_self(int vector)
-{
- __send_IPI_shortcut(APIC_DEST_SELF, vector);
-}
-
-static inline void send_IPI_mask(int mask, int vector)
-{
- unsigned long cfg;
- unsigned long flags;
-
- __save_flags(flags);
- __cli();
-
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- /*
- * prepare target chip field
- */
- cfg = __prepare_ICR2(mask);
- apic_write_around(APIC_ICR2, cfg);
-
- /*
- * program the ICR
- */
- cfg = __prepare_ICR(0, vector);
-
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- apic_write_around(APIC_ICR, cfg);
-
- __restore_flags(flags);
-}
-
-static inline void send_IPI_allbutself(int vector)
-{
- /*
- * if there are no other CPUs in the system then
- * we get an APIC send error if we try to broadcast.
- * thus we have to avoid sending IPIs in this case.
- */
- if (!(smp_num_cpus > 1))
- return;
-
- __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
-}
-
-/*
- * ********* XEN NOTICE **********
- * I've left the following comments lying around as they look liek they might
- * be useful to get multiprocessor guest OSes going. However, I suspect the
- * issues we face will be quite different so I've ripped out all the
- * TLBSTATE logic (I didn't understand it anyway :-). These comments do
- * not apply to Xen, therefore! -- Keir (8th Oct 2003).
- */
-/*
- * Smarter SMP flushing macros.
- * c/o Linus Torvalds.
- *
- * These mean you can really definitely utterly forget about
- * writing to user space from interrupts. (Its not allowed anyway).
- *
- * Optimizations Manfred Spraul <manfred@colorfullife.com>
- *
- * The flush IPI assumes that a thread switch happens in this order:
- * [cpu0: the cpu that switches]
- * 1) switch_mm() either 1a) or 1b)
- * 1a) thread switch to a different mm
- * 1a1) clear_bit(cpu, &old_mm.cpu_vm_mask);
- * Stop ipi delivery for the old mm. This is not synchronized with
- * the other cpus, but smp_invalidate_interrupt ignore flush ipis
- * for the wrong mm, and in the worst case we perform a superflous
- * tlb flush.
- * 1a2) set cpu_tlbstate to TLBSTATE_OK
- * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
- * was in lazy tlb mode.
- * 1a3) update cpu_tlbstate[].active_mm
- * Now cpu0 accepts tlb flushes for the new mm.
- * 1a4) set_bit(cpu, &new_mm.cpu_vm_mask);
- * Now the other cpus will send tlb flush ipis.
- * 1a4) change cr3.
- * 1b) thread switch without mm change
- * cpu_tlbstate[].active_mm is correct, cpu0 already handles
- * flush ipis.
- * 1b1) set cpu_tlbstate to TLBSTATE_OK
- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- * Atomically set the bit [other cpus will start sending flush ipis],
- * and test the bit.
- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
- * 2) switch %%esp, ie current
- *
- * The interrupt must handle 2 special cases:
- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
- * runs in kernel space, the cpu could load tlb entries for user space
- * pages.
- *
- * The good news is that cpu_tlbstate is local to each cpu, no
- * write/read ordering problems.
- *
- * TLB flush IPI:
- *
- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
- * 2) Leave the mm if we are in the lazy tlb mode.
- */
-
-static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED;
-volatile unsigned long flush_cpumask;
-
-asmlinkage void smp_invalidate_interrupt(void)
-{
- ack_APIC_irq();
- perfc_incrc(ipis);
- if ( likely(test_and_clear_bit(smp_processor_id(), &flush_cpumask)) )
- local_flush_tlb();
-}
-
-void flush_tlb_mask(unsigned long mask)
-{
- ASSERT(!in_irq());
-
- if ( mask & (1 << smp_processor_id()) )
- {
- local_flush_tlb();
- mask &= ~(1 << smp_processor_id());
- }
-
- if ( mask != 0 )
- {
- /*
- * We are certainly not reentering a flush_lock region on this CPU
- * because we are not in an IRQ context. We can therefore wait for the
- * other guy to release the lock. This is harder than it sounds because
- * local interrupts might be disabled, and he may be waiting for us to
- * execute smp_invalidate_interrupt(). We deal with this possibility by
- * inlining the meat of that function here.
- */
- while ( unlikely(!spin_trylock(&flush_lock)) )
- {
- if ( test_and_clear_bit(smp_processor_id(), &flush_cpumask) )
- local_flush_tlb();
- rep_nop();
- }
-
- flush_cpumask = mask;
- send_IPI_mask(mask, INVALIDATE_TLB_VECTOR);
- while ( flush_cpumask != 0 )
- {
- rep_nop();
- barrier();
- }
-
- spin_unlock(&flush_lock);
- }
-}
-
-/*
- * NB. Must be called with no locks held and interrupts enabled.
- * (e.g., softirq context).
- */
-void new_tlbflush_clock_period(void)
-{
- spin_lock(&flush_lock);
-
- /* Someone may acquire the lock and execute the flush before us. */
- if ( ((tlbflush_clock+1) & TLBCLOCK_EPOCH_MASK) != 0 )
- goto out;
-
- if ( smp_num_cpus > 1 )
- {
- /* Flush everyone else. We definitely flushed just before entry. */
- flush_cpumask = ((1 << smp_num_cpus) - 1) & ~(1 << smp_processor_id());
- send_IPI_allbutself(INVALIDATE_TLB_VECTOR);
- while ( flush_cpumask != 0 )
- {
- rep_nop();
- barrier();
- }
- }
-
- /* No need for atomicity: we are the only possible updater. */
- tlbflush_clock++;
-
- out:
- spin_unlock(&flush_lock);
-}
-
-static void flush_tlb_all_pge_ipi(void* info)
-{
- __flush_tlb_pge();
-}
-
-void flush_tlb_all_pge(void)
-{
- smp_call_function (flush_tlb_all_pge_ipi,0,1,1);
- __flush_tlb_pge();
-}
-
-void smp_send_event_check_mask(unsigned long cpu_mask)
-{
- send_IPI_mask(cpu_mask, EVENT_CHECK_VECTOR);
-}
-
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
-
-struct call_data_struct {
- void (*func) (void *info);
- void *info;
- atomic_t started;
- atomic_t finished;
- int wait;
-};
-
-static struct call_data_struct * call_data;
-
-/*
- * this function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- */
-
-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
- int wait)
-/*
- * [SUMMARY] Run a function on all other CPUs.
- * <func> The function to run. This must be fast and non-blocking.
- * <info> An arbitrary pointer to pass to the function.
- * <nonatomic> currently unused.
- * <wait> If true, wait (atomically) until function has completed on other CPUs.
- * [RETURNS] 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler, or bottom halfs.
- */
-{
- struct call_data_struct data;
- int cpus = smp_num_cpus-1;
-
- if (!cpus)
- return 0;
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- ASSERT(local_irq_is_enabled());
-
- spin_lock(&call_lock);
-
- call_data = &data;
- wmb();
- /* Send a message to all other CPUs and wait for them to respond */
- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
- barrier();
-
- if (wait)
- while (atomic_read(&data.finished) != cpus)
- barrier();
-
- spin_unlock(&call_lock);
-
- return 0;
-}
-
-static void stop_this_cpu (void * dummy)
-{
- /*
- * Remove this CPU:
- */
- clear_bit(smp_processor_id(), &cpu_online_map);
- __cli();
- disable_local_APIC();
- for(;;) __asm__("hlt");
-}
-
-/*
- * this function calls the 'stop' function on all other CPUs in the system.
- */
-
-void smp_send_stop(void)
-{
- smp_call_function(stop_this_cpu, NULL, 1, 0);
- smp_num_cpus = 1;
-
- __cli();
- disable_local_APIC();
- __sti();
-}
-
-/*
- * Nothing to do, as all the work is done automatically when
- * we return from the interrupt.
- */
-asmlinkage void smp_event_check_interrupt(void)
-{
- ack_APIC_irq();
- perfc_incrc(ipis);
-}
-
-asmlinkage void smp_call_function_interrupt(void)
-{
- void (*func) (void *info) = call_data->func;
- void *info = call_data->info;
- int wait = call_data->wait;
-
- ack_APIC_irq();
- perfc_incrc(ipis);
-
- /*
- * Notify initiating CPU that I've grabbed the data and am
- * about to execute the function
- */
- mb();
- atomic_inc(&call_data->started);
- /*
- * At this point the info structure may be out of scope unless wait==1
- */
- (*func)(info);
- if (wait) {
- mb();
- atomic_inc(&call_data->finished);
- }
-}
-
-#endif /* CONFIG_SMP */
+++ /dev/null
-/*
- * x86 SMP booting functions
- *
- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- * Much of the core SMP work is based on previous work by Thomas Radke, to
- * whom a great many thanks are extended.
- *
- * Thanks to Intel for making available several different Pentium,
- * Pentium Pro and Pentium-II/Xeon MP machines.
- * Original development of Linux SMP code supported by Caldera.
- *
- * This code is released under the GNU General Public License version 2 or
- * later.
- *
- * Fixes
- * Felix Koop : NR_CPUS used properly
- * Jose Renau : Handle single CPU case.
- * Alan Cox : By repeated request 8) - Total BogoMIP report.
- * Greg Wright : Fix for kernel stacks panic.
- * Erich Boleyn : MP v1.4 and additional changes.
- * Matthias Sattler : Changes for 2.1 kernel map.
- * Michel Lespinasse : Changes for 2.1 kernel map.
- * Michael Chastain : Change trampoline.S to gnu as.
- * Alan Cox : Dumb bug: 'B' step PPro's are fine
- * Ingo Molnar : Added APIC timers, based on code
- * from Jose Renau
- * Ingo Molnar : various cleanups and rewrites
- * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
- * Maciej W. Rozycki : Bits for genuine 82489DX APICs
- * Martin J. Bligh : Added support for multi-quad systems
- */
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/interrupt.h>
-#include <xen/irq.h>
-#include <xen/mm.h>
-#include <xen/slab.h>
-#include <asm/pgalloc.h>
-#include <asm/mc146818rtc.h>
-#include <asm/smpboot.h>
-#include <xen/smp.h>
-#include <asm/msr.h>
-#include <asm/system.h>
-#include <asm/mpspec.h>
-#include <asm/io_apic.h>
-#include <xen/sched.h>
-#include <xen/delay.h>
-#include <xen/lib.h>
-
-#ifdef CONFIG_SMP
-
-/* Set if we find a B stepping CPU */
-static int smp_b_stepping;
-
-/* Setup configured maximum number of CPUs to activate */
-static int max_cpus = -1;
-
-/* Total count of live CPUs */
-int smp_num_cpus = 1;
-
-/* Bitmask of currently online CPUs */
-unsigned long cpu_online_map;
-
-static volatile unsigned long cpu_callin_map;
-static volatile unsigned long cpu_callout_map;
-
-/* Per CPU bogomips and other parameters */
-struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
-
-/* Set when the idlers are all forked */
-int smp_threads_ready;
-
-/*
- * Trampoline 80x86 program as an array.
- */
-
-extern unsigned char trampoline_data [];
-extern unsigned char trampoline_end [];
-static unsigned char *trampoline_base;
-
-/*
- * Currently trivial. Write the real->protected mode
- * bootstrap into the page concerned. The caller
- * has made sure it's suitably aligned.
- */
-
-static unsigned long __init setup_trampoline(void)
-{
- memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
- return virt_to_phys(trampoline_base);
-}
-
-/*
- * We are called very early to get the low memory for the
- * SMP bootup trampoline page.
- */
-void __init smp_alloc_memory(void)
-{
- /*
- * Has to be in very low memory so we can execute
- * real-mode AP code.
- */
- trampoline_base = __va(0x90000);
-}
-
-/*
- * The bootstrap kernel entry code has set these up. Save them for
- * a given CPU
- */
-
-void __init smp_store_cpu_info(int id)
-{
- struct cpuinfo_x86 *c = cpu_data + id;
-
- *c = boot_cpu_data;
- c->pte_quick = 0;
- c->pmd_quick = 0;
- c->pgd_quick = 0;
- c->pgtable_cache_sz = 0;
- identify_cpu(c);
- /*
- * Mask B, Pentium, but not Pentium MMX
- */
- if (c->x86_vendor == X86_VENDOR_INTEL &&
- c->x86 == 5 &&
- c->x86_mask >= 1 && c->x86_mask <= 4 &&
- c->x86_model <= 3)
- /*
- * Remember we have B step Pentia with bugs
- */
- smp_b_stepping = 1;
-}
-
-/*
- * Architecture specific routine called by the kernel just before init is
- * fired off. This allows the BP to have everything in order [we hope].
- * At the end of this all the APs will hit the system scheduling and off
- * we go. Each AP will load the system gdt's and jump through the kernel
- * init into idle(). At this point the scheduler will one day take over
- * and give them jobs to do. smp_callin is a standard routine
- * we use to track CPUs as they power up.
- */
-
-static atomic_t smp_commenced = ATOMIC_INIT(0);
-
-void __init smp_commence(void)
-{
- /*
- * Lets the callins below out of their loop.
- */
- Dprintk("Setting commenced=1, go go go\n");
-
- wmb();
- atomic_set(&smp_commenced,1);
-}
-
-/*
- * TSC synchronization.
- *
- * We first check wether all CPUs have their TSC's synchronized,
- * then we print a warning if not, and always resync.
- */
-
-static atomic_t tsc_start_flag = ATOMIC_INIT(0);
-static atomic_t tsc_count_start = ATOMIC_INIT(0);
-static atomic_t tsc_count_stop = ATOMIC_INIT(0);
-static unsigned long long tsc_values[NR_CPUS];
-
-#define NR_LOOPS 5
-
-/*
- * accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit
- * multiplication. Not terribly optimized but we need it at boot time only
- * anyway.
- *
- * result == a / b
- * == (a1 + a2*(2^32)) / b
- * == a1/b + a2*(2^32/b)
- * == a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b
- * ^---- (this multiplication can overflow)
- */
-
-static unsigned long long div64 (unsigned long long a, unsigned long b0)
-{
- unsigned int a1, a2;
- unsigned long long res;
-
- a1 = ((unsigned int*)&a)[0];
- a2 = ((unsigned int*)&a)[1];
-
- res = a1/b0 +
- (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) +
- a2 / b0 +
- (a2 * (0xffffffff % b0)) / b0;
-
- return res;
-}
-
-static void __init synchronize_tsc_bp (void)
-{
- int i;
- unsigned long long t0;
- unsigned long long sum, avg;
- long long delta;
- int buggy = 0;
-
- printk("checking TSC synchronization across CPUs: ");
-
- atomic_set(&tsc_start_flag, 1);
- wmb();
-
- /*
- * We loop a few times to get a primed instruction cache,
- * then the last pass is more or less synchronized and
- * the BP and APs set their cycle counters to zero all at
- * once. This reduces the chance of having random offsets
- * between the processors, and guarantees that the maximum
- * delay between the cycle counters is never bigger than
- * the latency of information-passing (cachelines) between
- * two CPUs.
- */
- for (i = 0; i < NR_LOOPS; i++) {
- /*
- * all APs synchronize but they loop on '== num_cpus'
- */
- while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb();
- atomic_set(&tsc_count_stop, 0);
- wmb();
- /*
- * this lets the APs save their current TSC:
- */
- atomic_inc(&tsc_count_start);
-
- rdtscll(tsc_values[smp_processor_id()]);
- /*
- * We clear the TSC in the last loop:
- */
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
-
- /*
- * Wait for all APs to leave the synchronization point:
- */
- while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb();
- atomic_set(&tsc_count_start, 0);
- wmb();
- atomic_inc(&tsc_count_stop);
- }
-
- sum = 0;
- for (i = 0; i < smp_num_cpus; i++) {
- t0 = tsc_values[i];
- sum += t0;
- }
- avg = div64(sum, smp_num_cpus);
-
- sum = 0;
- for (i = 0; i < smp_num_cpus; i++) {
- delta = tsc_values[i] - avg;
- if (delta < 0)
- delta = -delta;
- /*
- * We report bigger than 2 microseconds clock differences.
- */
- if (delta > 2*ticks_per_usec) {
- long realdelta;
- if (!buggy) {
- buggy = 1;
- printk("\n");
- }
- realdelta = div64(delta, ticks_per_usec);
- if (tsc_values[i] < avg)
- realdelta = -realdelta;
-
- printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
- i, realdelta);
- }
-
- sum += delta;
- }
- if (!buggy)
- printk("passed.\n");
-}
-
-static void __init synchronize_tsc_ap (void)
-{
- int i;
-
- /*
- * smp_num_cpus is not necessarily known at the time
- * this gets called, so we first wait for the BP to
- * finish SMP initialization:
- */
- while (!atomic_read(&tsc_start_flag)) mb();
-
- for (i = 0; i < NR_LOOPS; i++) {
- atomic_inc(&tsc_count_start);
- while (atomic_read(&tsc_count_start) != smp_num_cpus) mb();
-
- rdtscll(tsc_values[smp_processor_id()]);
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
-
- atomic_inc(&tsc_count_stop);
- while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb();
- }
-}
-#undef NR_LOOPS
-
-static atomic_t init_deasserted;
-
-void __init smp_callin(void)
-{
- int cpuid, phys_id, i;
-
- /*
- * If waken up by an INIT in an 82489DX configuration
- * we may get here before an INIT-deassert IPI reaches
- * our local APIC. We have to wait for the IPI or we'll
- * lock up on an APIC access.
- */
- while (!atomic_read(&init_deasserted));
-
- /*
- * (This works even if the APIC is not enabled.)
- */
- phys_id = GET_APIC_ID(apic_read(APIC_ID));
- cpuid = smp_processor_id();
- if (test_and_set_bit(cpuid, &cpu_online_map)) {
- printk("huh, phys CPU#%d, CPU#%d already present??\n",
- phys_id, cpuid);
- BUG();
- }
- Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
-
- /*
- * STARTUP IPIs are fragile beasts as they might sometimes
- * trigger some glue motherboard logic. Complete APIC bus
- * silence for 1 second, this overestimates the time the
- * boot CPU is spending to send the up to 2 STARTUP IPIs
- * by a factor of two. This should be enough.
- */
-
- for ( i = 0; i < 200; i++ )
- {
- if ( test_bit(cpuid, &cpu_callout_map) ) break;
- mdelay(10);
- }
-
- if (!test_bit(cpuid, &cpu_callout_map)) {
- printk("BUG: CPU%d started up but did not get a callout!\n",
- cpuid);
- BUG();
- }
-
- /*
- * the boot CPU has finished the init stage and is spinning
- * on callin_map until we finish. We are free to set up this
- * CPU, first the APIC. (this is probably redundant on most
- * boards)
- */
-
- Dprintk("CALLIN, before setup_local_APIC().\n");
-
- setup_local_APIC();
-
- __sti();
-
-#ifdef CONFIG_MTRR
- /*
- * Must be done before calibration delay is computed
- */
- mtrr_init_secondary_cpu ();
-#endif
-
- Dprintk("Stack at about %p\n",&cpuid);
-
- /*
- * Save our processor parameters
- */
- smp_store_cpu_info(cpuid);
-
- if (nmi_watchdog == NMI_LOCAL_APIC)
- setup_apic_nmi_watchdog();
-
- /*
- * Allow the master to continue.
- */
- set_bit(cpuid, &cpu_callin_map);
-
- /*
- * Synchronize the TSC with the BP
- */
- synchronize_tsc_ap();
-}
-
-static int cpucount;
-
-/*
- * Activate a secondary processor.
- */
-void __init start_secondary(void)
-{
- unsigned int cpu = cpucount;
- /* 6 bytes suitable for passing to LIDT instruction. */
- unsigned char idt_load[6];
-
- extern void cpu_init(void);
-
- set_current(idle_task[cpu]);
-
- /*
- * Dont put anything before smp_callin(), SMP
- * booting is too fragile that we want to limit the
- * things done here to the most necessary things.
- */
- cpu_init();
- smp_callin();
-
- while (!atomic_read(&smp_commenced))
- rep_nop();
-
- /*
- * At this point, boot CPU has fully initialised the IDT. It is
- * now safe to make ourselves a private copy.
- */
- idt_tables[cpu] = kmalloc(IDT_ENTRIES*8, GFP_KERNEL);
- memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*8);
- *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*8)-1;
- *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
- __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
-
- /*
- * low-memory mappings have been cleared, flush them from the local TLBs
- * too.
- */
- local_flush_tlb();
-
- startup_cpu_idle_loop();
-
- BUG();
-}
-
-extern struct {
- unsigned long esp, ss;
-} stack_start;
-
-/* which physical APIC ID maps to which logical CPU number */
-volatile int physical_apicid_2_cpu[MAX_APICID];
-/* which logical CPU number maps to which physical APIC ID */
-volatile int cpu_2_physical_apicid[NR_CPUS];
-
-/* which logical APIC ID maps to which logical CPU number */
-volatile int logical_apicid_2_cpu[MAX_APICID];
-/* which logical CPU number maps to which logical APIC ID */
-volatile int cpu_2_logical_apicid[NR_CPUS];
-
-static inline void init_cpu_to_apicid(void)
-/* Initialize all maps between cpu number and apicids */
-{
- int apicid, cpu;
-
- for (apicid = 0; apicid < MAX_APICID; apicid++) {
- physical_apicid_2_cpu[apicid] = -1;
- logical_apicid_2_cpu[apicid] = -1;
- }
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- cpu_2_physical_apicid[cpu] = -1;
- cpu_2_logical_apicid[cpu] = -1;
- }
-}
-
-static inline void map_cpu_to_boot_apicid(int cpu, int apicid)
-/*
- * set up a mapping between cpu and apicid. Uses logical apicids for multiquad,
- * else physical apic ids
- */
-{
- physical_apicid_2_cpu[apicid] = cpu;
- cpu_2_physical_apicid[cpu] = apicid;
-}
-
-static inline void unmap_cpu_to_boot_apicid(int cpu, int apicid)
-/*
- * undo a mapping between cpu and apicid. Uses logical apicids for multiquad,
- * else physical apic ids
- */
-{
- physical_apicid_2_cpu[apicid] = -1;
- cpu_2_physical_apicid[cpu] = -1;
-}
-
-#if APIC_DEBUG
-static inline void inquire_remote_apic(int apicid)
-{
- int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
- char *names[] = { "ID", "VERSION", "SPIV" };
- int timeout, status;
-
- printk("Inquiring remote APIC #%d...\n", apicid);
-
- for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
- printk("... APIC #%d %s: ", apicid, names[i]);
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
- apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
-
- timeout = 0;
- do {
- udelay(100);
- status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
- } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
-
- switch (status) {
- case APIC_ICR_RR_VALID:
- status = apic_read(APIC_RRR);
- printk("%08x\n", status);
- break;
- default:
- printk("failed\n");
- }
- }
-}
-#endif
-
-
-static int wakeup_secondary_via_INIT(int phys_apicid, unsigned long start_eip)
-{
- unsigned long send_status = 0, accept_status = 0;
- int maxlvt, timeout, num_starts, j;
-
- Dprintk("Asserting INIT.\n");
-
- /*
- * Turn INIT on target chip
- */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /*
- * Send IPI
- */
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
- | APIC_DM_INIT);
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- mdelay(10);
-
- Dprintk("Deasserting INIT.\n");
-
- /* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /* Send IPI */
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- atomic_set(&init_deasserted, 1);
-
- /*
- * Should we send STARTUP IPIs ?
- *
- * Determine this based on the APIC version.
- * If we don't have an integrated APIC, don't send the STARTUP IPIs.
- */
- if (APIC_INTEGRATED(apic_version[phys_apicid]))
- num_starts = 2;
- else
- num_starts = 0;
-
- /*
- * Run STARTUP IPI loop.
- */
- Dprintk("#startup loops: %d.\n", num_starts);
-
- maxlvt = get_maxlvt();
-
- for (j = 1; j <= num_starts; j++) {
- Dprintk("Sending STARTUP #%d.\n",j);
-
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- Dprintk("After apic_write.\n");
-
- /*
- * STARTUP IPI
- */
-
- /* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /* Boot on the stack */
- /* Kick the second */
- apic_write_around(APIC_ICR, APIC_DM_STARTUP
- | (start_eip >> 12));
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(300);
-
- Dprintk("Startup point 1.\n");
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(200);
- /*
- * Due to the Pentium erratum 3AP.
- */
- if (maxlvt > 3) {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- }
- accept_status = (apic_read(APIC_ESR) & 0xEF);
- if (send_status || accept_status)
- break;
- }
- Dprintk("After Startup.\n");
-
- if (send_status)
- printk("APIC never delivered???\n");
- if (accept_status)
- printk("APIC delivery error (%lx).\n", accept_status);
-
- return (send_status | accept_status);
-}
-
-extern unsigned long cpu_initialized;
-
-static void __init do_boot_cpu (int apicid)
-/*
- * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
- * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
- */
-{
- struct task_struct *idle;
- unsigned long boot_error = 0;
- int timeout, cpu;
- unsigned long start_eip, stack;
-
- cpu = ++cpucount;
-
- if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
- panic("failed 'createdomain' for CPU %d", cpu);
-
- set_bit(PF_IDLETASK, &idle->flags);
-
- idle->mm.pagetable = mk_pagetable(__pa(idle_pg_table));
-
- map_cpu_to_boot_apicid(cpu, apicid);
-
- SET_DEFAULT_FAST_TRAP(&idle->thread);
-
- idle_task[cpu] = idle;
-
- /* start_eip had better be page-aligned! */
- start_eip = setup_trampoline();
-
- /* So we see what's up. */
- printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
-
- stack = __pa(__get_free_pages(GFP_KERNEL, 1));
- stack_start.esp = stack + STACK_SIZE - STACK_RESERVED;
-
- /* Debug build: detect stack overflow by setting up a guard page. */
- memguard_guard_range(__va(stack), PAGE_SIZE);
-
- /*
- * This grunge runs the startup process for
- * the targeted processor.
- */
-
- atomic_set(&init_deasserted, 0);
-
- Dprintk("Setting warm reset code and vector.\n");
-
- CMOS_WRITE(0xa, 0xf);
- local_flush_tlb();
- Dprintk("1.\n");
- *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
- Dprintk("2.\n");
- *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
- Dprintk("3.\n");
-
- /*
- * Be paranoid about clearing APIC errors.
- */
- if ( APIC_INTEGRATED(apic_version[apicid]) )
- {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- }
-
- /*
- * Status is now clean
- */
- boot_error = 0;
-
- /*
- * Starting actual IPI sequence...
- */
-
- boot_error = wakeup_secondary_via_INIT(apicid, start_eip);
-
- if (!boot_error) {
- /*
- * allow APs to start initializing.
- */
- Dprintk("Before Callout %d.\n", cpu);
- set_bit(cpu, &cpu_callout_map);
- Dprintk("After Callout %d.\n", cpu);
-
- /*
- * Wait 5s total for a response
- */
- for (timeout = 0; timeout < 50000; timeout++) {
- if (test_bit(cpu, &cpu_callin_map))
- break; /* It has booted */
- udelay(100);
- }
-
- if (test_bit(cpu, &cpu_callin_map)) {
- /* number CPUs logically, starting from 1 (BSP is 0) */
- printk("CPU%d has booted.\n", cpu);
- } else {
- boot_error= 1;
- if (*((volatile unsigned long *)phys_to_virt(start_eip))
- == 0xA5A5A5A5)
- /* trampoline started but...? */
- printk("Stuck ??\n");
- else
- /* trampoline code not run */
- printk("Not responding.\n");
-#if APIC_DEBUG
- inquire_remote_apic(apicid);
-#endif
- }
- }
- if (boot_error) {
- /* Try to put things back the way they were before ... */
- unmap_cpu_to_boot_apicid(cpu, apicid);
- clear_bit(cpu, &cpu_callout_map); /* was set here (do_boot_cpu()) */
- clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
- clear_bit(cpu, &cpu_online_map); /* was set in smp_callin() */
- cpucount--;
- }
-}
-
-
-/*
- * Cycle through the processors sending APIC IPIs to boot each.
- */
-
-static int boot_cpu_logical_apicid;
-/* Where the IO area was mapped on multiquad, always 0 otherwise */
-void *xquad_portio = NULL;
-
-void __init smp_boot_cpus(void)
-{
- int apicid, bit;
-
-#ifdef CONFIG_MTRR
- /* Must be done before other processors booted */
- mtrr_init_boot_cpu ();
-#endif
- /* Initialize the logical to physical CPU number mapping */
- init_cpu_to_apicid();
-
- /*
- * Setup boot CPU information
- */
- smp_store_cpu_info(0); /* Final full version of the data */
- printk("CPU%d booted\n", 0);
-
- /*
- * We have the boot CPU online for sure.
- */
- set_bit(0, &cpu_online_map);
- boot_cpu_logical_apicid = logical_smp_processor_id();
- map_cpu_to_boot_apicid(0, boot_cpu_apicid);
-
- /*
- * If we couldnt find an SMP configuration at boot time,
- * get out of here now!
- */
- if (!smp_found_config) {
- printk("SMP motherboard not detected.\n");
- io_apic_irqs = 0;
- cpu_online_map = phys_cpu_present_map = 1;
- smp_num_cpus = 1;
- if (APIC_init_uniprocessor())
- printk("Local APIC not detected."
- " Using dummy APIC emulation.\n");
- goto smp_done;
- }
-
- /*
- * Should not be necessary because the MP table should list the boot
- * CPU too, but we do it for the sake of robustness anyway.
- */
- if (!test_bit(boot_cpu_physical_apicid, &phys_cpu_present_map)) {
- printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
- boot_cpu_physical_apicid);
- phys_cpu_present_map |= (1 << hard_smp_processor_id());
- }
-
- /*
- * If we couldn't find a local APIC, then get out of here now!
- */
- if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
- !test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) {
- printk("BIOS bug, local APIC #%d not detected!...\n",
- boot_cpu_physical_apicid);
- printk("... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
- io_apic_irqs = 0;
- cpu_online_map = phys_cpu_present_map = 1;
- smp_num_cpus = 1;
- goto smp_done;
- }
-
- verify_local_APIC();
-
- /*
- * If SMP should be disabled, then really disable it!
- */
- if (!max_cpus) {
- smp_found_config = 0;
- printk("SMP mode deactivated, forcing use of dummy APIC emulation.\n");
- io_apic_irqs = 0;
- cpu_online_map = phys_cpu_present_map = 1;
- smp_num_cpus = 1;
- goto smp_done;
- }
-
- connect_bsp_APIC();
- setup_local_APIC();
-
- if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid)
- BUG();
-
- /*
- * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
- *
- * In clustered apic mode, phys_cpu_present_map is a constructed thus:
- * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
- * clustered apic ID.
- */
- Dprintk("CPU present map: %lx\n", phys_cpu_present_map);
-
- for (bit = 0; bit < NR_CPUS; bit++) {
- apicid = cpu_present_to_apicid(bit);
- /*
- * Don't even attempt to start the boot CPU!
- */
- if (apicid == boot_cpu_apicid)
- continue;
-
- if (!(phys_cpu_present_map & (1 << bit)))
- continue;
- if ((max_cpus >= 0) && (max_cpus <= cpucount+1))
- continue;
-
- do_boot_cpu(apicid);
-
- /*
- * Make sure we unmap all failed CPUs
- */
- if ((boot_apicid_to_cpu(apicid) == -1) &&
- (phys_cpu_present_map & (1 << bit)))
- printk("CPU #%d not responding - cannot use it.\n",
- apicid);
- }
-
- /*
- * Cleanup possible dangling ends...
- */
- /*
- * Install writable page 0 entry to set BIOS data area.
- */
- local_flush_tlb();
-
- /*
- * Paranoid: Set warm reset code and vector here back
- * to default values.
- */
- CMOS_WRITE(0, 0xf);
-
- *((volatile long *) phys_to_virt(0x467)) = 0;
-
- if (!cpucount) {
- printk("Error: only one processor found.\n");
- } else {
- printk("Total of %d processors activated.\n", cpucount+1);
- }
- smp_num_cpus = cpucount + 1;
-
- if (smp_b_stepping)
- printk("WARNING: SMP operation may"
- " be unreliable with B stepping processors.\n");
- Dprintk("Boot done.\n");
-
- /*
- * Here we can be sure that there is an IO-APIC in the system. Let's
- * go and set it up:
- */
- if ( nr_ioapics ) setup_IO_APIC();
-
- /* Set up all local APIC timers in the system. */
- setup_APIC_clocks();
-
- /* Synchronize the TSC with the AP(s). */
- if ( cpucount ) synchronize_tsc_bp();
-
- smp_done:
- ;
-}
-
-#endif /* CONFIG_SMP */
+++ /dev/null
-/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
- ****************************************************************************
- * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
- * (C) 2002-2003 University of Cambridge
- ****************************************************************************
- *
- * File: i386/time.c
- * Author: Rolf Neugebar & Keir Fraser
- *
- * Environment: Xen Hypervisor
- * Description: modified version of Linux' time.c
- * implements system and wall clock time.
- * based on freebsd's implementation.
- */
-
-/*
- * linux/arch/i386/kernel/time.c
- *
- * Copyright (C) 1991, 1992, 1995 Linus Torvalds
- */
-
-#include <xen/errno.h>
-#include <xen/sched.h>
-#include <xen/lib.h>
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/interrupt.h>
-#include <xen/time.h>
-#include <xen/ac_timer.h>
-
-#include <asm/io.h>
-#include <xen/smp.h>
-#include <xen/irq.h>
-#include <asm/msr.h>
-#include <asm/mpspec.h>
-#include <asm/processor.h>
-#include <asm/fixmap.h>
-#include <asm/mc146818rtc.h>
-
-extern rwlock_t xtime_lock;
-extern unsigned long wall_jiffies;
-
-/* GLOBAL */
-unsigned long cpu_khz; /* Detected as we calibrate the TSC */
-unsigned long ticks_per_usec; /* TSC ticks per microsecond. */
-spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
-int timer_ack = 0;
-int do_timer_lists_from_pit = 0;
-
-/* PRIVATE */
-static unsigned int rdtsc_bitshift; /* Which 32 bits of TSC do we use? */
-static u64 cpu_freq; /* CPU frequency (Hz) */
-static u32 st_scale_f; /* Cycles -> ns, fractional part */
-static u32 st_scale_i; /* Cycles -> ns, integer part */
-static u32 tsc_irq; /* CPU0's TSC at last 'time update' */
-static s_time_t stime_irq; /* System time at last 'time update' */
-
-static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
- u64 full_tsc;
-
- write_lock(&xtime_lock);
-
-#ifdef CONFIG_X86_IO_APIC
- if ( timer_ack )
- {
- extern spinlock_t i8259A_lock;
- spin_lock(&i8259A_lock);
- outb(0x0c, 0x20);
- /* Ack the IRQ; AEOI will end it automatically. */
- inb(0x20);
- spin_unlock(&i8259A_lock);
- }
-#endif
-
- /*
- * Updates TSC timestamp (used to interpolate passage of time between
- * interrupts).
- */
- rdtscll(full_tsc);
- tsc_irq = (u32)(full_tsc >> rdtsc_bitshift);
-
- /* Updates xtime (wallclock time). */
- do_timer(regs);
-
- /* Updates system time (nanoseconds since boot). */
- stime_irq += MILLISECS(1000/HZ);
-
- write_unlock(&xtime_lock);
-
- /* Rough hack to allow accurate timers to sort-of-work with no APIC. */
- if ( do_timer_lists_from_pit )
- __cpu_raise_softirq(smp_processor_id(), AC_TIMER_SOFTIRQ);
-}
-
-static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0,
- "timer", NULL, NULL};
-
-/* ------ Calibrate the TSC -------
- * Return processor ticks per second / CALIBRATE_FRAC.
- */
-
-#define CLOCK_TICK_RATE 1193180 /* system crystal frequency (Hz) */
-#define CALIBRATE_FRAC 20 /* calibrate over 50ms */
-#define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC)
-
-static unsigned long __init calibrate_tsc(void)
-{
- unsigned long startlow, starthigh, endlow, endhigh, count;
-
- /* Set the Gate high, disable speaker */
- outb((inb(0x61) & ~0x02) | 0x01, 0x61);
-
- /*
- * Now let's take care of CTC channel 2
- *
- * Set the Gate high, program CTC channel 2 for mode 0, (interrupt on
- * terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB)
- * to begin countdown.
- */
- outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */
- outb(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */
- outb(CALIBRATE_LATCH >> 8, 0x42); /* MSB of count */
-
- rdtsc(startlow, starthigh);
- for ( count = 0; (inb(0x61) & 0x20) == 0; count++ )
- continue;
- rdtsc(endlow, endhigh);
-
- /* Error if the CTC doesn't behave itself. */
- if ( count == 0 )
- return 0;
-
- /* [endhigh:endlow] = [endhigh:endlow] - [starthigh:startlow] */
- __asm__( "subl %2,%0 ; sbbl %3,%1"
- : "=a" (endlow), "=d" (endhigh)
- : "g" (startlow), "g" (starthigh), "0" (endlow), "1" (endhigh) );
-
- /* If quotient doesn't fit in 32 bits then we return error (zero). */
- return endhigh ? 0 : endlow;
-}
-
-
-/***************************************************************************
- * CMOS Timer functions
- ***************************************************************************/
-
-/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
- * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
- * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
- *
- * [For the Julian calendar (which was used in Russia before 1917,
- * Britain & colonies before 1752, anywhere else before 1582,
- * and is still in use by some communities) leave out the
- * -year/100+year/400 terms, and add 10.]
- *
- * This algorithm was first published by Gauss (I think).
- *
- * WARNING: this function will overflow on 2106-02-07 06:28:16 on
- * machines were long is 32-bit! (However, as time_t is signed, we
- * will already get problems at other places on 2038-01-19 03:14:08)
- */
-static inline unsigned long
-mktime (unsigned int year, unsigned int mon,
- unsigned int day, unsigned int hour,
- unsigned int min, unsigned int sec)
-{
- /* 1..12 -> 11,12,1..10: put Feb last since it has a leap day. */
- if ( 0 >= (int) (mon -= 2) )
- {
- mon += 12;
- year -= 1;
- }
-
- return ((((unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day)+
- year*365 - 719499
- )*24 + hour /* now have hours */
- )*60 + min /* now have minutes */
- )*60 + sec; /* finally seconds */
-}
-
-static unsigned long __get_cmos_time(void)
-{
- unsigned int year, mon, day, hour, min, sec;
-
- sec = CMOS_READ(RTC_SECONDS);
- min = CMOS_READ(RTC_MINUTES);
- hour = CMOS_READ(RTC_HOURS);
- day = CMOS_READ(RTC_DAY_OF_MONTH);
- mon = CMOS_READ(RTC_MONTH);
- year = CMOS_READ(RTC_YEAR);
-
- if ( !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD )
- {
- BCD_TO_BIN(sec);
- BCD_TO_BIN(min);
- BCD_TO_BIN(hour);
- BCD_TO_BIN(day);
- BCD_TO_BIN(mon);
- BCD_TO_BIN(year);
- }
-
- if ( (year += 1900) < 1970 )
- year += 100;
-
- return mktime(year, mon, day, hour, min, sec);
-}
-
-static unsigned long get_cmos_time(void)
-{
- unsigned long res, flags;
- int i;
-
- spin_lock_irqsave(&rtc_lock, flags);
-
- /* read RTC exactly on falling edge of update flag */
- for ( i = 0 ; i < 1000000 ; i++ ) /* may take up to 1 second... */
- if ( (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) )
- break;
- for ( i = 0 ; i < 1000000 ; i++ ) /* must try at least 2.228 ms */
- if ( !(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) )
- break;
-
- res = __get_cmos_time();
-
- spin_unlock_irqrestore(&rtc_lock, flags);
- return res;
-}
-
-/***************************************************************************
- * System Time
- ***************************************************************************/
-
-static inline u64 get_time_delta(void)
-{
- s32 delta_tsc;
- u32 low;
- u64 delta, tsc;
-
- rdtscll(tsc);
- low = (u32)(tsc >> rdtsc_bitshift);
- delta_tsc = (s32)(low - tsc_irq);
- if ( unlikely(delta_tsc < 0) ) delta_tsc = 0;
- delta = ((u64)delta_tsc * st_scale_f);
- delta >>= 32;
- delta += ((u64)delta_tsc * st_scale_i);
-
- return delta;
-}
-
-s_time_t get_s_time(void)
-{
- s_time_t now;
- unsigned long flags;
-
- read_lock_irqsave(&xtime_lock, flags);
-
- now = stime_irq + get_time_delta();
-
- /* Ensure that the returned system time is monotonically increasing. */
- {
- static s_time_t prev_now = 0;
- if ( unlikely(now < prev_now) )
- now = prev_now;
- prev_now = now;
- }
-
- read_unlock_irqrestore(&xtime_lock, flags);
-
- return now;
-}
-
-
-void update_dom_time(shared_info_t *si)
-{
- unsigned long flags;
-
- read_lock_irqsave(&xtime_lock, flags);
-
- si->time_version1++;
- wmb();
-
- si->cpu_freq = cpu_freq;
- si->tsc_timestamp.tsc_bitshift = rdtsc_bitshift;
- si->tsc_timestamp.tsc_bits = tsc_irq;
- si->system_time = stime_irq;
- si->wc_sec = xtime.tv_sec;
- si->wc_usec = xtime.tv_usec;
- si->wc_usec += (jiffies - wall_jiffies) * (1000000 / HZ);
- while ( si->wc_usec >= 1000000 )
- {
- si->wc_usec -= 1000000;
- si->wc_sec++;
- }
-
- wmb();
- si->time_version2++;
-
- read_unlock_irqrestore(&xtime_lock, flags);
-}
-
-
-/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
-void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base)
-{
- s64 delta;
- long _usecs = (long)usecs;
-
- write_lock_irq(&xtime_lock);
-
- delta = (s64)(stime_irq - system_time_base);
-
- _usecs += (long)(delta/1000);
- _usecs -= (jiffies - wall_jiffies) * (1000000 / HZ);
-
- while ( _usecs < 0 )
- {
- _usecs += 1000000;
- secs--;
- }
-
- xtime.tv_sec = secs;
- xtime.tv_usec = _usecs;
-
- write_unlock_irq(&xtime_lock);
-
- update_dom_time(current->shared_info);
-}
-
-
-/* Late init function (after all CPUs are booted). */
-int __init init_xen_time()
-{
- u64 scale;
- u64 full_tsc;
- unsigned int cpu_ghz;
-
- cpu_ghz = (unsigned int)(cpu_freq / 1000000000ULL);
- for ( rdtsc_bitshift = 0; cpu_ghz != 0; rdtsc_bitshift++, cpu_ghz >>= 1 )
- continue;
-
- scale = 1000000000LL << (32 + rdtsc_bitshift);
- scale /= cpu_freq;
- st_scale_f = scale & 0xffffffff;
- st_scale_i = scale >> 32;
-
- /* System time ticks from zero. */
- rdtscll(full_tsc);
- stime_irq = (s_time_t)0;
- tsc_irq = (u32)(full_tsc >> rdtsc_bitshift);
-
- /* Wallclock time starts as the initial RTC time. */
- xtime.tv_sec = get_cmos_time();
-
- printk("Time init:\n");
- printk(".... System Time: %lldns\n",
- NOW());
- printk(".... cpu_freq: %08X:%08X\n",
- (u32)(cpu_freq>>32), (u32)cpu_freq);
- printk(".... scale: %08X:%08X\n",
- (u32)(scale>>32), (u32)scale);
- printk(".... Wall Clock: %lds %ldus\n",
- xtime.tv_sec, xtime.tv_usec);
-
- return 0;
-}
-
-
-/* Early init function. */
-void __init time_init(void)
-{
- unsigned long ticks_per_frac = calibrate_tsc();
-
- if ( !ticks_per_frac )
- panic("Error calibrating TSC\n");
-
- ticks_per_usec = ticks_per_frac / (1000000/CALIBRATE_FRAC);
- cpu_khz = ticks_per_frac / (1000/CALIBRATE_FRAC);
-
- cpu_freq = (u64)ticks_per_frac * (u64)CALIBRATE_FRAC;
-
- printk("Detected %lu.%03lu MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
-
- setup_irq(0, &irq0);
-}
+++ /dev/null
-/*
- *
- * Trampoline.S Derived from Setup.S by Linus Torvalds
- *
- * 4 Jan 1997 Michael Chastain: changed to gnu as.
- *
- * Entry: CS:IP point to the start of our code, we are
- * in real mode with no stack, but the rest of the
- * trampoline page to make our stack and everything else
- * is a mystery.
- *
- * On entry to trampoline_data, the processor is in real mode
- * with 16-bit addressing and 16-bit data. CS has some value
- * and IP is zero. Thus, data addresses need to be absolute
- * (no relocation) and are taken with regard to r_base.
- */
-
-#include <xen/config.h>
-#include <hypervisor-ifs/hypervisor-if.h>
-#include <asm/page.h>
-
-#ifdef CONFIG_SMP
-
-.data
-
-.code16
-
-ENTRY(trampoline_data)
-r_base = .
- mov %cs, %ax # Code and data in the same place
- mov %ax, %ds
-
- movl $0xA5A5A5A5, %ebx # Flag an SMP trampoline
- cli # We should be safe anyway
-
- movl $0xA5A5A5A5, trampoline_data - r_base
-
- lidt idt_48 - r_base # load idt with 0, 0
- lgdt gdt_48 - r_base # load gdt with whatever is appropriate
-
- xor %ax, %ax
- inc %ax # protected mode (PE) bit
- lmsw %ax # into protected mode
- jmp flush_instr
-flush_instr:
- ljmpl $__HYPERVISOR_CS, $(MONITOR_BASE)-__PAGE_OFFSET
-
-idt_48:
- .word 0 # idt limit = 0
- .word 0, 0 # idt base = 0L
-
-gdt_48:
- .word (LAST_RESERVED_GDT_ENTRY*8)+7
- .long gdt_table-__PAGE_OFFSET
-
-.globl SYMBOL_NAME(trampoline_end)
-SYMBOL_NAME_LABEL(trampoline_end)
-
-#endif /* CONFIG_SMP */
+++ /dev/null
-/******************************************************************************
- * arch/i386/traps.c
- *
- * Modifications to Linux original are copyright (c) 2002-2003, K A Fraser
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-/*
- * xen/arch/i386/traps.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/interrupt.h>
-#include <xen/sched.h>
-#include <xen/lib.h>
-#include <xen/errno.h>
-#include <xen/mm.h>
-#include <asm/ptrace.h>
-#include <xen/delay.h>
-#include <xen/spinlock.h>
-#include <xen/irq.h>
-#include <xen/perfc.h>
-#include <xen/shadow.h>
-#include <asm/domain_page.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/atomic.h>
-#include <asm/desc.h>
-#include <asm/debugreg.h>
-#include <asm/smp.h>
-#include <asm/pgalloc.h>
-#include <asm/uaccess.h>
-#include <asm/i387.h>
-#include <asm/pdb.h>
-
-#define GTBF_TRAP 1
-#define GTBF_TRAP_NOCODE 2
-#define GTBF_TRAP_CR2 4
-struct guest_trap_bounce {
- unsigned long error_code; /* 0 */
- unsigned long cr2; /* 4 */
- unsigned short flags; /* 8 */
- unsigned short cs; /* 10 */
- unsigned long eip; /* 12 */
-} guest_trap_bounce[NR_CPUS] = { { 0 } };
-
-#define DOUBLEFAULT_STACK_SIZE 1024
-static struct tss_struct doublefault_tss;
-static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
-
-asmlinkage int hypervisor_call(void);
-asmlinkage void lcall7(void);
-asmlinkage void lcall27(void);
-
-/* Master table, and the one used by CPU0. */
-struct desc_struct idt_table[256] = { {0, 0}, };
-/* All other CPUs have their own copy. */
-struct desc_struct *idt_tables[NR_CPUS] = { 0 };
-
-asmlinkage void divide_error(void);
-asmlinkage void debug(void);
-asmlinkage void nmi(void);
-asmlinkage void int3(void);
-asmlinkage void overflow(void);
-asmlinkage void bounds(void);
-asmlinkage void invalid_op(void);
-asmlinkage void device_not_available(void);
-asmlinkage void coprocessor_segment_overrun(void);
-asmlinkage void invalid_TSS(void);
-asmlinkage void segment_not_present(void);
-asmlinkage void stack_segment(void);
-asmlinkage void general_protection(void);
-asmlinkage void page_fault(void);
-asmlinkage void coprocessor_error(void);
-asmlinkage void simd_coprocessor_error(void);
-asmlinkage void alignment_check(void);
-asmlinkage void spurious_interrupt_bug(void);
-asmlinkage void machine_check(void);
-
-int kstack_depth_to_print = 8*20;
-
-static inline int kernel_text_address(unsigned long addr)
-{
- if (addr >= (unsigned long) &_stext &&
- addr <= (unsigned long) &_etext)
- return 1;
- return 0;
-
-}
-
-void show_stack(unsigned long *esp)
-{
- unsigned long *stack, addr;
- int i;
-
- printk("Stack trace from ESP=%p:\n", esp);
-
- stack = esp;
- for ( i = 0; i < kstack_depth_to_print; i++ )
- {
- if ( ((long)stack & (STACK_SIZE-1)) == 0 )
- break;
- if ( i && ((i % 8) == 0) )
- printk("\n ");
- if ( kernel_text_address(*stack) )
- printk("[%08lx] ", *stack++);
- else
- printk("%08lx ", *stack++);
- }
- printk("\n");
-
- printk("Call Trace from ESP=%p: ", esp);
- stack = esp;
- i = 0;
- while (((long) stack & (STACK_SIZE-1)) != 0) {
- addr = *stack++;
- if (kernel_text_address(addr)) {
- if (i && ((i % 6) == 0))
- printk("\n ");
- printk("[<%08lx>] ", addr);
- i++;
- }
- }
- printk("\n");
-}
-
-void show_registers(struct pt_regs *regs)
-{
- unsigned long esp;
- unsigned short ss;
-
- esp = (unsigned long) (®s->esp);
- ss = __HYPERVISOR_DS;
- if ( regs->xcs & 3 )
- {
- esp = regs->esp;
- ss = regs->xss & 0xffff;
- }
-
- printk("CPU: %d\nEIP: %04x:[<%08lx>] \nEFLAGS: %08lx\n",
- smp_processor_id(), 0xffff & regs->xcs, regs->eip, regs->eflags);
- printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
- regs->eax, regs->ebx, regs->ecx, regs->edx);
- printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
- regs->esi, regs->edi, regs->ebp, esp);
- printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
- regs->xds & 0xffff, regs->xes & 0xffff,
- regs->xfs & 0xffff, regs->xgs & 0xffff, ss);
-
- show_stack(®s->esp);
-}
-
-
-spinlock_t die_lock = SPIN_LOCK_UNLOCKED;
-
-void die(const char * str, struct pt_regs * regs, long err)
-{
- unsigned long flags;
- spin_lock_irqsave(&die_lock, flags);
- printk("%s: %04lx,%04lx\n", str, err >> 16, err & 0xffff);
- show_registers(regs);
- spin_unlock_irqrestore(&die_lock, flags);
- panic("HYPERVISOR DEATH!!\n");
-}
-
-
-static inline void do_trap(int trapnr, char *str,
- struct pt_regs *regs,
- long error_code, int use_error_code)
-{
- struct task_struct *p = current;
- struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
- trap_info_t *ti;
- unsigned long fixup;
-
- if (!(regs->xcs & 3))
- goto fault_in_hypervisor;
-
- ti = current->thread.traps + trapnr;
- gtb->flags = use_error_code ? GTBF_TRAP : GTBF_TRAP_NOCODE;
- gtb->error_code = error_code;
- gtb->cs = ti->cs;
- gtb->eip = ti->address;
- if ( TI_GET_IF(ti) )
- p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
- return;
-
- fault_in_hypervisor:
-
- if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
- {
- DPRINTK("Trap %d: %08lx -> %08lx\n", trapnr, regs->eip, fixup);
- regs->eip = fixup;
- regs->xds = regs->xes = regs->xfs = regs->xgs = __HYPERVISOR_DS;
- return;
- }
-
- show_registers(regs);
- panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
- "[error_code=%08x]\n",
- smp_processor_id(), trapnr, str, error_code);
-}
-
-#define DO_ERROR_NOCODE(trapnr, str, name) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
-{ \
-do_trap(trapnr, str, regs, error_code, 0); \
-}
-
-#define DO_ERROR(trapnr, str, name) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
-{ \
-do_trap(trapnr, str, regs, error_code, 1); \
-}
-
-DO_ERROR_NOCODE( 0, "divide error", divide_error)
- DO_ERROR_NOCODE( 4, "overflow", overflow)
- DO_ERROR_NOCODE( 5, "bounds", bounds)
- DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
- DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
- DO_ERROR(10, "invalid TSS", invalid_TSS)
- DO_ERROR(11, "segment not present", segment_not_present)
- DO_ERROR(12, "stack segment", stack_segment)
-/* Vector 15 reserved by Intel */
- DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
- DO_ERROR(17, "alignment check", alignment_check)
- DO_ERROR_NOCODE(18, "machine check", machine_check)
- DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
-
- asmlinkage void do_int3(struct pt_regs *regs, long error_code)
-{
- struct task_struct *p = current;
- struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
- trap_info_t *ti;
-
-#ifdef XEN_DEBUGGER
- if ( pdb_initialized && pdb_handle_exception(3, regs) == 0 )
- return;
-#endif
-
- if ( (regs->xcs & 3) != 3 )
- {
- if ( unlikely((regs->xcs & 3) == 0) )
- {
- show_registers(regs);
- panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n"
- "[error_code=%08x]\n",
- smp_processor_id(), error_code);
- }
- }
-
- ti = current->thread.traps + 3;
- gtb->flags = GTBF_TRAP_NOCODE;
- gtb->error_code = error_code;
- gtb->cs = ti->cs;
- gtb->eip = ti->address;
- if ( TI_GET_IF(ti) )
- p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
-}
-
-asmlinkage void do_double_fault(void)
-{
- extern spinlock_t console_lock;
- struct tss_struct *tss = &doublefault_tss;
- unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
-
- /* Disable the NMI watchdog. It's useless now. */
- watchdog_on = 0;
-
- /* Find information saved during fault and dump it to the console. */
- tss = &init_tss[cpu];
- printk("CPU: %d\nEIP: %04x:[<%08lx>] \nEFLAGS: %08lx\n",
- cpu, tss->cs, tss->eip, tss->eflags);
- printk("CR3: %08lx\n", tss->__cr3);
- printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
- tss->eax, tss->ebx, tss->ecx, tss->edx);
- printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
- tss->esi, tss->edi, tss->ebp, tss->esp);
- printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
- tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
- printk("************************************\n");
- printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu);
- printk("System needs manual reset.\n");
- printk("************************************\n");
-
- /* Lock up the console to prevent spurious output from other CPUs. */
- spin_lock(&console_lock);
-
- /* Wait for manual reset. */
- for ( ; ; ) ;
-}
-
-asmlinkage void do_page_fault(struct pt_regs *regs, long error_code)
-{
- struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
- trap_info_t *ti;
- unsigned long off, addr, fixup;
- struct task_struct *p = current;
- extern int map_ldt_shadow_page(unsigned int);
-
- __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : );
-
- perfc_incrc(page_faults);
-
- if ( unlikely(addr >= LDT_VIRT_START) &&
- (addr < (LDT_VIRT_START + (p->mm.ldt_ents*LDT_ENTRY_SIZE))) )
- {
- /*
- * Copy a mapping from the guest's LDT, if it is valid. Otherwise we
- * send the fault up to the guest OS to be handled.
- */
- off = addr - LDT_VIRT_START;
- addr = p->mm.ldt_base + off;
- if ( likely(map_ldt_shadow_page(off >> PAGE_SHIFT)) )
- return; /* successfully copied the mapping */
- }
-
- if ( unlikely(p->mm.shadow_mode) &&
- (addr < PAGE_OFFSET) && shadow_fault(addr, error_code) )
- return; /* Returns TRUE if fault was handled. */
-
- if ( unlikely(!(regs->xcs & 3)) )
- goto fault_in_hypervisor;
-
- ti = p->thread.traps + 14;
- gtb->flags = GTBF_TRAP_CR2; /* page fault pushes %cr2 */
- gtb->cr2 = addr;
- gtb->error_code = error_code;
- gtb->cs = ti->cs;
- gtb->eip = ti->address;
- if ( TI_GET_IF(ti) )
- p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
- return;
-
- fault_in_hypervisor:
-
- if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
- {
- perfc_incrc(copy_user_faults);
- if ( !p->mm.shadow_mode )
- DPRINTK("Page fault: %08lx -> %08lx\n", regs->eip, fixup);
- regs->eip = fixup;
- regs->xds = regs->xes = regs->xfs = regs->xgs = __HYPERVISOR_DS;
- return;
- }
-
- if ( addr >= PAGE_OFFSET )
- {
- unsigned long page;
- page = l2_pgentry_val(idle_pg_table[addr >> L2_PAGETABLE_SHIFT]);
- printk("*pde = %08lx\n", page);
- if ( page & _PAGE_PRESENT )
- {
- page &= PAGE_MASK;
- page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT];
- printk(" *pte = %08lx\n", page);
- }
-#ifdef MEMORY_GUARD
- if ( !(error_code & 1) )
- printk(" -- POSSIBLY AN ACCESS TO FREED MEMORY? --\n");
-#endif
- }
-
-#ifdef XEN_DEBUGGER
- if ( pdb_page_fault_possible )
- {
- pdb_page_fault = 1;
- /* make eax & edx valid to complete the instruction */
- regs->eax = (long)&pdb_page_fault_scratch;
- regs->edx = (long)&pdb_page_fault_scratch;
- return;
- }
-#endif
-
- show_registers(regs);
- panic("CPU%d FATAL PAGE FAULT\n"
- "[error_code=%08x]\n"
- "Faulting linear address might be %08lx\n",
- smp_processor_id(), error_code, addr);
-}
-
-asmlinkage void do_general_protection(struct pt_regs *regs, long error_code)
-{
- struct task_struct *p = current;
- struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
- trap_info_t *ti;
- unsigned long fixup;
-
- /* Badness if error in ring 0, or result of an interrupt. */
- if ( !(regs->xcs & 3) || (error_code & 1) )
- goto gp_in_kernel;
-
- /*
- * Cunning trick to allow arbitrary "INT n" handling.
- *
- * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
- * instruction from trapping to the appropriate vector, when that might not
- * be expected by Xen or the guest OS. For example, that entry might be for
- * a fault handler (unlike traps, faults don't increment EIP), or might
- * expect an error code on the stack (which a software trap never
- * provides), or might be a hardware interrupt handler that doesn't like
- * being called spuriously.
- *
- * Instead, a GPF occurs with the faulting IDT vector in the error code.
- * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is
- * clear to indicate that it's a software fault, not hardware.
- *
- * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
- * okay because they can only be triggered by an explicit DPL-checked
- * instruction. The DPL specified by the guest OS for these vectors is NOT
- * CHECKED!!
- */
- if ( (error_code & 3) == 2 )
- {
- /* This fault must be due to <INT n> instruction. */
- ti = current->thread.traps + (error_code>>3);
- if ( TI_GET_DPL(ti) >= (regs->xcs & 3) )
- {
-#ifdef XEN_DEBUGGER
- if ( pdb_initialized && (pdb_ctx.system_call != 0) )
- {
- unsigned long cr3;
- __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : );
- if ( cr3 == pdb_ctx.ptbr )
- pdb_linux_syscall_enter_bkpt(regs, error_code, ti);
- }
-#endif
-
- gtb->flags = GTBF_TRAP_NOCODE;
- regs->eip += 2;
- goto finish_propagation;
- }
- }
-
- /* Pass on GPF as is. */
- ti = current->thread.traps + 13;
- gtb->flags = GTBF_TRAP;
- gtb->error_code = error_code;
- finish_propagation:
- gtb->cs = ti->cs;
- gtb->eip = ti->address;
- if ( TI_GET_IF(ti) )
- p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
- return;
-
- gp_in_kernel:
-
- if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
- {
- DPRINTK("GPF (%04lx): %08lx -> %08lx\n", error_code, regs->eip, fixup);
- regs->eip = fixup;
- regs->xds = regs->xes = regs->xfs = regs->xgs = __HYPERVISOR_DS;
- return;
- }
-
- die("general protection fault", regs, error_code);
-}
-
-asmlinkage void mem_parity_error(unsigned char reason, struct pt_regs * regs)
-{
- printk("NMI received. Dazed and confused, but trying to continue\n");
- printk("You probably have a hardware problem with your RAM chips\n");
-
- /* Clear and disable the memory parity error line. */
- reason = (reason & 0xf) | 4;
- outb(reason, 0x61);
-
- show_registers(regs);
- panic("PARITY ERROR");
-}
-
-asmlinkage void io_check_error(unsigned char reason, struct pt_regs * regs)
-{
- printk("NMI: IOCK error (debug interrupt?)\n");
-
- reason = (reason & 0xf) | 8;
- outb(reason, 0x61);
-
- show_registers(regs);
- panic("IOCK ERROR");
-}
-
-static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
-{
- printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
- printk("Dazed and confused, but trying to continue\n");
- printk("Do you have a strange power saving mode enabled?\n");
-}
-
-asmlinkage void do_nmi(struct pt_regs * regs, unsigned long reason)
-{
- ++nmi_count(smp_processor_id());
-
-#if CONFIG_X86_LOCAL_APIC
- if ( nmi_watchdog )
- nmi_watchdog_tick(regs);
- else
-#endif
- unknown_nmi_error((unsigned char)(reason&0xff), regs);
-}
-
-asmlinkage void math_state_restore(struct pt_regs *regs, long error_code)
-{
- /* Prevent recursion. */
- clts();
-
- if ( !test_bit(PF_USEDFPU, ¤t->flags) )
- {
- if ( test_bit(PF_DONEFPUINIT, ¤t->flags) )
- restore_fpu(current);
- else
- init_fpu();
- set_bit(PF_USEDFPU, ¤t->flags); /* so we fnsave on switch_to() */
- }
-
- if ( test_and_clear_bit(PF_GUEST_STTS, ¤t->flags) )
- {
- struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
- gtb->flags = GTBF_TRAP_NOCODE;
- gtb->cs = current->thread.traps[7].cs;
- gtb->eip = current->thread.traps[7].address;
- }
-}
-
-#ifdef XEN_DEBUGGER
-asmlinkage void do_pdb_debug(struct pt_regs *regs, long error_code)
-{
- unsigned int condition;
- struct task_struct *tsk = current;
- struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
-
- __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
- if ( (condition & (1 << 14)) != (1 << 14) )
- printk("\nwarning: debug trap w/o BS bit [0x%x]\n\n", condition);
- __asm__("movl %0,%%db6" : : "r" (0));
-
- if ( pdb_handle_exception(1, regs) != 0 )
- {
- tsk->thread.debugreg[6] = condition;
-
- gtb->flags = GTBF_TRAP_NOCODE;
- gtb->cs = tsk->thread.traps[1].cs;
- gtb->eip = tsk->thread.traps[1].address;
- }
-}
-#endif
-
-asmlinkage void do_debug(struct pt_regs *regs, long error_code)
-{
- unsigned int condition;
- struct task_struct *tsk = current;
- struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
-
-#ifdef XEN_DEBUGGER
- if ( pdb_initialized )
- return do_pdb_debug(regs, error_code);
-#endif
-
- __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
-
- /* Mask out spurious debug traps due to lazy DR7 setting */
- if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
- (tsk->thread.debugreg[7] == 0) )
- {
- __asm__("movl %0,%%db7" : : "r" (0));
- return;
- }
-
- if ( (regs->xcs & 3) == 0 )
- {
- /* Clear TF just for absolute sanity. */
- regs->eflags &= ~EF_TF;
- /*
- * Basically, we ignore watchpoints when they trigger in
- * the hypervisor. This may happen when a buffer is passed
- * to us which previously had a watchpoint set on it.
- * No need to bump EIP; the only faulting trap is an
- * instruction breakpoint, which can't happen to us.
- */
- return;
- }
-
- /* Save debug status register where guest OS can peek at it */
- tsk->thread.debugreg[6] = condition;
-
- gtb->flags = GTBF_TRAP_NOCODE;
- gtb->cs = tsk->thread.traps[1].cs;
- gtb->eip = tsk->thread.traps[1].address;
-}
-
-
-asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs,
- long error_code)
-{ /* nothing */ }
-
-
-#define _set_gate(gate_addr,type,dpl,addr) \
-do { \
- int __d0, __d1; \
- __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
- "movw %4,%%dx\n\t" \
- "movl %%eax,%0\n\t" \
- "movl %%edx,%1" \
- :"=m" (*((long *) (gate_addr))), \
- "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
- :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
- "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
-} while (0)
-
-void set_intr_gate(unsigned int n, void *addr)
-{
- _set_gate(idt_table+n,14,0,addr);
-}
-
-static void __init set_system_gate(unsigned int n, void *addr)
-{
- _set_gate(idt_table+n,14,3,addr);
-}
-
-static void set_task_gate(unsigned int n, unsigned int sel)
-{
- idt_table[n].a = sel << 16;
- idt_table[n].b = 0x8500;
-}
-
-#define _set_seg_desc(gate_addr,type,dpl,base,limit) {\
- *((gate_addr)+1) = ((base) & 0xff000000) | \
- (((base) & 0x00ff0000)>>16) | \
- ((limit) & 0xf0000) | \
- ((dpl)<<13) | \
- (0x00408000) | \
- ((type)<<8); \
- *(gate_addr) = (((base) & 0x0000ffff)<<16) | \
- ((limit) & 0x0ffff); }
-
-#define _set_tssldt_desc(n,addr,limit,type) \
-__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
- "movw %%ax,2(%2)\n\t" \
- "rorl $16,%%eax\n\t" \
- "movb %%al,4(%2)\n\t" \
- "movb %4,5(%2)\n\t" \
- "movb $0,6(%2)\n\t" \
- "movb %%ah,7(%2)\n\t" \
- "rorl $16,%%eax" \
- : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type))
-
-void set_tss_desc(unsigned int n, void *addr)
-{
- _set_tssldt_desc(gdt_table+__TSS(n), (int)addr, 8299, 0x89);
-}
-
-void __init trap_init(void)
-{
- /*
- * Make a separate task for double faults. This will get us debug output if
- * we blow the kernel stack.
- */
- struct tss_struct *tss = &doublefault_tss;
- memset(tss, 0, sizeof(*tss));
- tss->ds = __HYPERVISOR_DS;
- tss->es = __HYPERVISOR_DS;
- tss->ss = __HYPERVISOR_DS;
- tss->esp = (unsigned long)
- &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
- tss->__cr3 = __pa(idle_pg_table);
- tss->cs = __HYPERVISOR_CS;
- tss->eip = (unsigned long)do_double_fault;
- tss->eflags = 2;
- tss->bitmap = INVALID_IO_BITMAP_OFFSET;
- _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY,
- (int)tss, 235, 0x89);
-
- /*
- * Note that interrupt gates are always used, rather than trap gates. We
- * must have interrupts disabled until DS/ES/FS/GS are saved because the
- * first activation must have the "bad" value(s) for these registers and
- * we may lose them if another activation is installed before they are
- * saved. The page-fault handler also needs interrupts disabled until %cr2
- * has been read and saved on the stack.
- */
- set_intr_gate(0,÷_error);
- set_intr_gate(1,&debug);
- set_intr_gate(2,&nmi);
- set_system_gate(3,&int3); /* usable from all privilege levels */
- set_system_gate(4,&overflow); /* usable from all privilege levels */
- set_intr_gate(5,&bounds);
- set_intr_gate(6,&invalid_op);
- set_intr_gate(7,&device_not_available);
- set_task_gate(8,__DOUBLEFAULT_TSS_ENTRY<<3);
- set_intr_gate(9,&coprocessor_segment_overrun);
- set_intr_gate(10,&invalid_TSS);
- set_intr_gate(11,&segment_not_present);
- set_intr_gate(12,&stack_segment);
- set_intr_gate(13,&general_protection);
- set_intr_gate(14,&page_fault);
- set_intr_gate(15,&spurious_interrupt_bug);
- set_intr_gate(16,&coprocessor_error);
- set_intr_gate(17,&alignment_check);
- set_intr_gate(18,&machine_check);
- set_intr_gate(19,&simd_coprocessor_error);
-
- /* Only ring 1 can access monitor services. */
- _set_gate(idt_table+HYPERVISOR_CALL_VECTOR,14,1,&hypervisor_call);
-
- /* CPU0 uses the master IDT. */
- idt_tables[0] = idt_table;
-
- /*
- * Should be a barrier for any external CPU state.
- */
- {
- extern void cpu_init(void);
- cpu_init();
- }
-}
-
-
-long do_set_trap_table(trap_info_t *traps)
-{
- trap_info_t cur;
- trap_info_t *dst = current->thread.traps;
-
- for ( ; ; )
- {
- if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT;
-
- if ( cur.address == 0 ) break;
-
- if ( !VALID_CODESEL(cur.cs) ) return -EPERM;
-
- memcpy(dst+cur.vector, &cur, sizeof(cur));
- traps++;
- }
-
- return 0;
-}
-
-
-long do_set_callbacks(unsigned long event_selector,
- unsigned long event_address,
- unsigned long failsafe_selector,
- unsigned long failsafe_address)
-{
- struct task_struct *p = current;
-
- if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) )
- return -EPERM;
-
- p->event_selector = event_selector;
- p->event_address = event_address;
- p->failsafe_selector = failsafe_selector;
- p->failsafe_address = failsafe_address;
-
- return 0;
-}
-
-
-long set_fast_trap(struct task_struct *p, int idx)
-{
- trap_info_t *ti;
-
- /* Index 0 is special: it disables fast traps. */
- if ( idx == 0 )
- {
- if ( p == current )
- CLEAR_FAST_TRAP(&p->thread);
- SET_DEFAULT_FAST_TRAP(&p->thread);
- return 0;
- }
-
- /*
- * We only fast-trap vectors 0x20-0x2f, and vector 0x80.
- * The former range is used by Windows and MS-DOS.
- * Vector 0x80 is used by Linux and the BSD variants.
- */
- if ( (idx != 0x80) && ((idx < 0x20) || (idx > 0x2f)) )
- return -1;
-
- ti = p->thread.traps + idx;
-
- /*
- * We can't virtualise interrupt gates, as there's no way to get
- * the CPU to automatically clear the events_mask variable.
- */
- if ( TI_GET_IF(ti) )
- return -1;
-
- if ( p == current )
- CLEAR_FAST_TRAP(&p->thread);
-
- p->thread.fast_trap_idx = idx;
- p->thread.fast_trap_desc.a = (ti->cs << 16) | (ti->address & 0xffff);
- p->thread.fast_trap_desc.b =
- (ti->address & 0xffff0000) | 0x8f00 | (TI_GET_DPL(ti)&3)<<13;
-
- if ( p == current )
- SET_FAST_TRAP(&p->thread);
-
- return 0;
-}
-
-
-long do_set_fast_trap(int idx)
-{
- return set_fast_trap(current, idx);
-}
-
-
-long do_fpu_taskswitch(void)
-{
- set_bit(PF_GUEST_STTS, ¤t->flags);
- stts();
- return 0;
-}
-
-
-long set_debugreg(struct task_struct *p, int reg, unsigned long value)
-{
- int i;
-
- switch ( reg )
- {
- case 0:
- if ( value > (PAGE_OFFSET-4) ) return -EPERM;
- if ( p == current )
- __asm__ ( "movl %0, %%db0" : : "r" (value) );
- break;
- case 1:
- if ( value > (PAGE_OFFSET-4) ) return -EPERM;
- if ( p == current )
- __asm__ ( "movl %0, %%db1" : : "r" (value) );
- break;
- case 2:
- if ( value > (PAGE_OFFSET-4) ) return -EPERM;
- if ( p == current )
- __asm__ ( "movl %0, %%db2" : : "r" (value) );
- break;
- case 3:
- if ( value > (PAGE_OFFSET-4) ) return -EPERM;
- if ( p == current )
- __asm__ ( "movl %0, %%db3" : : "r" (value) );
- break;
- case 6:
- /*
- * DR6: Bits 4-11,16-31 reserved (set to 1).
- * Bit 12 reserved (set to 0).
- */
- value &= 0xffffefff; /* reserved bits => 0 */
- value |= 0xffff0ff0; /* reserved bits => 1 */
- if ( p == current )
- __asm__ ( "movl %0, %%db6" : : "r" (value) );
- break;
- case 7:
- /*
- * DR7: Bit 10 reserved (set to 1).
- * Bits 11-12,14-15 reserved (set to 0).
- * Privileged bits:
- * GD (bit 13): must be 0.
- * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
- * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
- */
- /* DR7 == 0 => debugging disabled for this domain. */
- if ( value != 0 )
- {
- value &= 0xffff27ff; /* reserved bits => 0 */
- value |= 0x00000400; /* reserved bits => 1 */
- if ( (value & (1<<13)) != 0 ) return -EPERM;
- for ( i = 0; i < 16; i += 2 )
- if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
- }
- if ( p == current )
- __asm__ ( "movl %0, %%db7" : : "r" (value) );
- break;
- default:
- return -EINVAL;
- }
-
- p->thread.debugreg[reg] = value;
- return 0;
-}
-
-long do_set_debugreg(int reg, unsigned long value)
-{
- return set_debugreg(current, reg, value);
-}
-
-unsigned long do_get_debugreg(int reg)
-{
- if ( (reg < 0) || (reg > 7) ) return -EINVAL;
- return current->thread.debugreg[reg];
-}
+++ /dev/null
-/*
- * User address space access functions.
- * The non inlined parts of asm-i386/uaccess.h are here.
- *
- * Copyright 1997 Andi Kleen <ak@muc.de>
- * Copyright 1997 Linus Torvalds
- */
-#include <xen/config.h>
-#include <asm/uaccess.h>
-//#include <asm/mmx.h>
-
-#ifdef CONFIG_X86_USE_3DNOW_AND_WORKS
-
-unsigned long
-__generic_copy_to_user(void *to, const void *from, unsigned long n)
-{
- if (access_ok(VERIFY_WRITE, to, n))
- {
- if(n<512)
- __copy_user(to,from,n);
- else
- mmx_copy_user(to,from,n);
- }
- return n;
-}
-
-unsigned long
-__generic_copy_from_user(void *to, const void *from, unsigned long n)
-{
- if (access_ok(VERIFY_READ, from, n))
- {
- if(n<512)
- __copy_user_zeroing(to,from,n);
- else
- mmx_copy_user_zeroing(to, from, n);
- }
- else
- memset(to, 0, n);
- return n;
-}
-
-#else
-
-unsigned long
-__generic_copy_to_user(void *to, const void *from, unsigned long n)
-{
- prefetch(from);
- if (access_ok(VERIFY_WRITE, to, n))
- __copy_user(to,from,n);
- return n;
-}
-
-unsigned long
-__generic_copy_from_user(void *to, const void *from, unsigned long n)
-{
- prefetchw(to);
- if (access_ok(VERIFY_READ, from, n))
- __copy_user_zeroing(to,from,n);
- else
- memset(to, 0, n);
- return n;
-}
-
-#endif
-
-/*
- * Copy a null terminated string from userspace.
- */
-
-#define __do_strncpy_from_user(dst,src,count,res) \
-do { \
- int __d0, __d1, __d2; \
- __asm__ __volatile__( \
- " testl %1,%1\n" \
- " jz 2f\n" \
- "0: lodsb\n" \
- " stosb\n" \
- " testb %%al,%%al\n" \
- " jz 1f\n" \
- " decl %1\n" \
- " jnz 0b\n" \
- "1: subl %1,%0\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl %5,%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,3b\n" \
- ".previous" \
- : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \
- "=&D" (__d2) \
- : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
- : "memory"); \
-} while (0)
-
-long
-__strncpy_from_user(char *dst, const char *src, long count)
-{
- long res;
- __do_strncpy_from_user(dst, src, count, res);
- return res;
-}
-
-long
-strncpy_from_user(char *dst, const char *src, long count)
-{
- long res = -EFAULT;
- if (access_ok(VERIFY_READ, src, 1))
- __do_strncpy_from_user(dst, src, count, res);
- return res;
-}
-
-
-/*
- * Zero Userspace
- */
-
-#define __do_clear_user(addr,size) \
-do { \
- int __d0; \
- __asm__ __volatile__( \
- "0: rep; stosl\n" \
- " movl %2,%0\n" \
- "1: rep; stosb\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: lea 0(%2,%0,4),%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,3b\n" \
- " .long 1b,2b\n" \
- ".previous" \
- : "=&c"(size), "=&D" (__d0) \
- : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \
-} while (0)
-
-unsigned long
-clear_user(void *to, unsigned long n)
-{
- if (access_ok(VERIFY_WRITE, to, n))
- __do_clear_user(to, n);
- return n;
-}
-
-unsigned long
-__clear_user(void *to, unsigned long n)
-{
- __do_clear_user(to, n);
- return n;
-}
-
-/*
- * Return the size of a string (including the ending 0)
- *
- * Return 0 on exception, a value greater than N if too long
- */
-
-long strnlen_user(const char *s, long n)
-{
- unsigned long mask = -__addr_ok(s);
- unsigned long res, tmp;
-
- __asm__ __volatile__(
- " testl %0, %0\n"
- " jz 3f\n"
- " andl %0,%%ecx\n"
- "0: repne; scasb\n"
- " setne %%al\n"
- " subl %%ecx,%0\n"
- " addl %0,%%eax\n"
- "1:\n"
- ".section .fixup,\"ax\"\n"
- "2: xorl %%eax,%%eax\n"
- " jmp 1b\n"
- "3: movb $1,%%al\n"
- " jmp 1b\n"
- ".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 0b,2b\n"
- ".previous"
- :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp)
- :"0" (n), "1" (s), "2" (0), "3" (mask)
- :"cc");
- return res & mask;
-}
+++ /dev/null
-/* ld script to make i386 Linux kernel
- * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
- */
-OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
-OUTPUT_ARCH(i386)
-ENTRY(start)
-SECTIONS
-{
- . = 0xFC400000 + 0x100000;
- _text = .; /* Text and read-only data */
- .text : {
- *(.text)
- *(.fixup)
- *(.gnu.warning)
- } = 0x9090
- .text.lock : { *(.text.lock) } /* out-of-line lock text */
-
- _etext = .; /* End of text section */
-
- .rodata : { *(.rodata) *(.rodata.*) }
- .kstrtab : { *(.kstrtab) }
-
- . = ALIGN(16); /* Exception table */
- __start___ex_table = .;
- __ex_table : { *(__ex_table) }
- __stop___ex_table = .;
-
- __start___ksymtab = .; /* Kernel symbol table */
- __ksymtab : { *(__ksymtab) }
- __stop___ksymtab = .;
-
- __start___kallsyms = .; /* All kernel symbols */
- __kallsyms : { *(__kallsyms) }
- __stop___kallsyms = .;
-
- .data : { /* Data */
- *(.data)
- CONSTRUCTORS
- }
-
- _edata = .; /* End of data section */
-
- . = ALIGN(8192); /* init_task */
- .data.init_task : { *(.data.init_task) }
-
- . = ALIGN(4096); /* Init code and data */
- __init_begin = .;
- .text.init : { *(.text.init) }
- .data.init : { *(.data.init) }
- . = ALIGN(16);
- __setup_start = .;
- .setup.init : { *(.setup.init) }
- __setup_end = .;
- __initcall_start = .;
- .initcall.init : { *(.initcall.init) }
- __initcall_end = .;
- . = ALIGN(4096);
- __init_end = .;
-
- . = ALIGN(4096);
- .data.page_aligned : { *(.data.idt) }
-
- . = ALIGN(32);
- .data.cacheline_aligned : { *(.data.cacheline_aligned) }
-
- __bss_start = .; /* BSS */
- .bss : {
- *(.bss)
- }
- _end = . ;
-
- /* Sections to be discarded */
- /DISCARD/ : {
- *(.text.exit)
- *(.data.exit)
- *(.exitcall.exit)
- }
-
- /* Stabs debugging sections. */
- .stab 0 : { *(.stab) }
- .stabstr 0 : { *(.stabstr) }
- .stab.excl 0 : { *(.stab.excl) }
- .stab.exclstr 0 : { *(.stab.exclstr) }
- .stab.index 0 : { *(.stab.index) }
- .stab.indexstr 0 : { *(.stab.indexstr) }
- .comment 0 : { *(.comment) }
-}
--- /dev/null
+
+include $(BASEDIR)/Rules.mk
+
+ifneq ($(debugger),y)
+OBJS := $(subst pdb-linux.o,,$(OBJS))
+OBJS := $(subst pdb-stub.o,,$(OBJS))
+endif
+
+# What happens here? We link monitor object files together, starting
+# at MONITOR_BASE (a very high address). But bootloader cannot put
+# things there, so we initially load at LOAD_BASE. A hacky little
+# tool called `elf-reloc' is used to modify segment offsets from
+# MONITOR_BASE-relative to LOAD_BASE-relative.
+# (NB. Linux gets round this by turning its image into raw binary, then
+# wrapping that with a low-memory bootstrapper.)
+default: boot/boot.o $(OBJS)
+ $(LD) -r -o arch.o $(OBJS)
+ $(LD) $(LDFLAGS) boot/boot.o $(ALL_OBJS) -o $(TARGET).dbg
+ objcopy -R .note -R .comment -S $(TARGET).dbg $(TARGET)
+ $(BASEDIR)/tools/elf-reloc $(MONITOR_BASE) $(LOAD_BASE) $(TARGET)
+
+clean:
+ rm -f *.o *~ core boot/*.o boot/*~ boot/core
--- /dev/null
+########################################
+# x86-specific definitions
+
+CC := gcc
+LD := ld
+
+# Linker should relocate monitor to this address
+MONITOR_BASE := 0xFC500000
+
+# Bootloader should load monitor to this real address
+LOAD_BASE := 0x00100000
+
+CFLAGS := -nostdinc -fno-builtin -fno-common -fno-strict-aliasing -O3
+CFLAGS += -iwithprefix include -Wall -Werror -DMONITOR_BASE=$(MONITOR_BASE)
+CFLAGS += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__
+CFLAGS += -Wno-pointer-arith -Wredundant-decls -D$(TARGET_SUBARCH)
+
+LDFLAGS := -T xen.lds -N
+
+ifeq ($(TARGET_SUBARCH),x86_32)
+CFLAGS += -m32 -march=i686
+LDARCHFLAGS := --oformat elf32-i386
+endif
+
+ifeq ($(TARGET_SUBARCH),x86_64)
+CFLAGS += -m64
+LDARCHFLAGS :=
+endif
--- /dev/null
+/*
+ * acpi.c - Architecture-Specific Low-Level ACPI Support
+ *
+ * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ * Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
+ * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <xen/config.h>
+#include <xen/kernel.h>
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/slab.h>
+#include <xen/pci.h>
+#include <xen/irq.h>
+#include <xen/acpi.h>
+#include <asm/mpspec.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+#include <asm/flushtlb.h>
+#include <asm/io_apic.h>
+#include <asm/acpi.h>
+#include <asm/smpboot.h>
+
+
+#define PREFIX "ACPI: "
+
+int acpi_lapic = 0;
+int acpi_ioapic = 0;
+
+/* --------------------------------------------------------------------------
+ Boot-time Configuration
+ -------------------------------------------------------------------------- */
+
+#ifdef CONFIG_ACPI_BOOT
+int acpi_noirq __initdata = 0; /* skip ACPI IRQ initialization */
+int acpi_ht __initdata = 1; /* enable HT */
+
+enum acpi_irq_model_id acpi_irq_model;
+
+
+/*
+ * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
+ * to map the target physical address. The problem is that set_fixmap()
+ * provides a single page, and it is possible that the page is not
+ * sufficient.
+ * By using this area, we can map up to MAX_IO_APICS pages temporarily,
+ * i.e. until the next __va_range() call.
+ *
+ * Important Safety Note: The fixed I/O APIC page numbers are *subtracted*
+ * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and
+ * count idx down while incrementing the phys address.
+ */
+char *__acpi_map_table(unsigned long phys, unsigned long size)
+{
+ unsigned long base, offset, mapped_size;
+ int idx;
+
+ if (phys + size < 8*1024*1024)
+ return __va(phys);
+
+ offset = phys & (PAGE_SIZE - 1);
+ mapped_size = PAGE_SIZE - offset;
+ set_fixmap(FIX_ACPI_END, phys);
+ base = fix_to_virt(FIX_ACPI_END);
+
+ /*
+ * Most cases can be covered by the below.
+ */
+ idx = FIX_ACPI_END;
+ while (mapped_size < size) {
+ if (--idx < FIX_ACPI_BEGIN)
+ return 0; /* cannot handle this */
+ phys += PAGE_SIZE;
+ set_fixmap(idx, phys);
+ mapped_size += PAGE_SIZE;
+ }
+
+ return ((unsigned char *) base + offset);
+}
+
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+
+
+static int __init
+acpi_parse_madt (
+ unsigned long phys_addr,
+ unsigned long size)
+{
+ struct acpi_table_madt *madt = NULL;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size);
+ if (!madt) {
+ printk(KERN_WARNING PREFIX "Unable to map MADT\n");
+ return -ENODEV;
+ }
+
+ if (madt->lapic_address)
+ acpi_lapic_addr = (u64) madt->lapic_address;
+
+ printk(KERN_INFO PREFIX "Local APIC address 0x%08x\n",
+ madt->lapic_address);
+
+ detect_clustered_apic(madt->header.oem_id, madt->header.oem_table_id);
+
+ return 0;
+}
+
+
+static int __init
+acpi_parse_lapic (
+ acpi_table_entry_header *header)
+{
+ struct acpi_table_lapic *processor = NULL;
+
+ processor = (struct acpi_table_lapic*) header;
+ if (!processor)
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ mp_register_lapic (
+ processor->id, /* APIC ID */
+ processor->flags.enabled); /* Enabled? */
+
+ return 0;
+}
+
+
+static int __init
+acpi_parse_lapic_addr_ovr (
+ acpi_table_entry_header *header)
+{
+ struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL;
+
+ lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header;
+ if (!lapic_addr_ovr)
+ return -EINVAL;
+
+ acpi_lapic_addr = lapic_addr_ovr->address;
+
+ return 0;
+}
+
+static int __init
+acpi_parse_lapic_nmi (
+ acpi_table_entry_header *header)
+{
+ struct acpi_table_lapic_nmi *lapic_nmi = NULL;
+
+ lapic_nmi = (struct acpi_table_lapic_nmi*) header;
+ if (!lapic_nmi)
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ if (lapic_nmi->lint != 1)
+ printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
+
+ return 0;
+}
+
+#endif /*CONFIG_X86_LOCAL_APIC*/
+
+#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
+
+static int __init
+acpi_parse_ioapic (
+ acpi_table_entry_header *header)
+{
+ struct acpi_table_ioapic *ioapic = NULL;
+
+ ioapic = (struct acpi_table_ioapic*) header;
+ if (!ioapic)
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ mp_register_ioapic (
+ ioapic->id,
+ ioapic->address,
+ ioapic->global_irq_base);
+
+ return 0;
+}
+
+
+static int __init
+acpi_parse_int_src_ovr (
+ acpi_table_entry_header *header)
+{
+ struct acpi_table_int_src_ovr *intsrc = NULL;
+
+ intsrc = (struct acpi_table_int_src_ovr*) header;
+ if (!intsrc)
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ mp_override_legacy_irq (
+ intsrc->bus_irq,
+ intsrc->flags.polarity,
+ intsrc->flags.trigger,
+ intsrc->global_irq);
+
+ return 0;
+}
+
+
+static int __init
+acpi_parse_nmi_src (
+ acpi_table_entry_header *header)
+{
+ struct acpi_table_nmi_src *nmi_src = NULL;
+
+ nmi_src = (struct acpi_table_nmi_src*) header;
+ if (!nmi_src)
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ /* TBD: Support nimsrc entries? */
+
+ return 0;
+}
+
+#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/
+
+
+static unsigned long __init
+acpi_scan_rsdp (
+ unsigned long start,
+ unsigned long length)
+{
+ unsigned long offset = 0;
+ unsigned long sig_len = sizeof("RSD PTR ") - 1;
+
+ /*
+ * Scan all 16-byte boundaries of the physical memory region for the
+ * RSDP signature.
+ */
+ for (offset = 0; offset < length; offset += 16) {
+ if (strncmp((char *) (start + offset), "RSD PTR ", sig_len))
+ continue;
+ return (start + offset);
+ }
+
+ return 0;
+}
+
+
+unsigned long __init
+acpi_find_rsdp (void)
+{
+ unsigned long rsdp_phys = 0;
+
+ /*
+ * Scan memory looking for the RSDP signature. First search EBDA (low
+ * memory) paragraphs and then search upper memory (E0000-FFFFF).
+ */
+ rsdp_phys = acpi_scan_rsdp (0, 0x400);
+ if (!rsdp_phys)
+ rsdp_phys = acpi_scan_rsdp (0xE0000, 0xFFFFF);
+
+ return rsdp_phys;
+}
+
+
+/*
+ * acpi_boot_init()
+ * called from setup_arch(), always.
+ * 1. maps ACPI tables for later use
+ * 2. enumerates lapics
+ * 3. enumerates io-apics
+ *
+ * side effects:
+ * acpi_lapic = 1 if LAPIC found
+ * acpi_ioapic = 1 if IOAPIC found
+ * if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
+ * if acpi_blacklisted() acpi_disabled = 1;
+ * acpi_irq_model=...
+ * ...
+ *
+ * return value: (currently ignored)
+ * 0: success
+ * !0: failure
+ */
+int __init
+acpi_boot_init (void)
+{
+ int result = 0;
+
+ if (acpi_disabled && !acpi_ht)
+ return(1);
+
+ /*
+ * The default interrupt routing model is PIC (8259). This gets
+ * overriden if IOAPICs are enumerated (below).
+ */
+ acpi_irq_model = ACPI_IRQ_MODEL_PIC;
+
+ /*
+ * Initialize the ACPI boot-time table parser.
+ */
+ result = acpi_table_init();
+ if (result) {
+ acpi_disabled = 1;
+ return result;
+ }
+
+ result = acpi_blacklisted();
+ if (result) {
+ printk(KERN_NOTICE PREFIX "BIOS listed in blacklist, disabling ACPI support\n");
+ acpi_disabled = 1;
+ return result;
+ }
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+ /*
+ * MADT
+ * ----
+ * Parse the Multiple APIC Description Table (MADT), if exists.
+ * Note that this table provides platform SMP configuration
+ * information -- the successor to MPS tables.
+ */
+
+ result = acpi_table_parse(ACPI_APIC, acpi_parse_madt);
+ if (!result) {
+ return 0;
+ }
+ else if (result < 0) {
+ printk(KERN_ERR PREFIX "Error parsing MADT\n");
+ return result;
+ }
+ else if (result > 1)
+ printk(KERN_WARNING PREFIX "Multiple MADT tables exist\n");
+
+ /*
+ * Local APIC
+ * ----------
+ * Note that the LAPIC address is obtained from the MADT (32-bit value)
+ * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
+ */
+
+ result = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr);
+ if (result < 0) {
+ printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n");
+ return result;
+ }
+
+ mp_register_lapic_address(acpi_lapic_addr);
+
+ result = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic);
+ if (!result) {
+ printk(KERN_ERR PREFIX "No LAPIC entries present\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return -ENODEV;
+ }
+ else if (result < 0) {
+ printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return result;
+ }
+
+ result = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi);
+ if (result < 0) {
+ printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return result;
+ }
+
+ acpi_lapic = 1;
+
+#endif /*CONFIG_X86_LOCAL_APIC*/
+
+#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
+
+ /*
+ * I/O APIC
+ * --------
+ */
+
+ /*
+ * ACPI interpreter is required to complete interrupt setup,
+ * so if it is off, don't enumerate the io-apics with ACPI.
+ * If MPS is present, it will handle them,
+ * otherwise the system will stay in PIC mode
+ */
+ if (acpi_disabled || acpi_noirq) {
+ return 1;
+ }
+
+ /*
+ * if "noapic" boot option, don't look for IO-APICs
+ */
+ if (ioapic_setup_disabled()) {
+ printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
+ "due to 'noapic' option.\n");
+ return 1;
+ }
+
+
+ result = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic);
+ if (!result) {
+ printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
+ return -ENODEV;
+ }
+ else if (result < 0) {
+ printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
+ return result;
+ }
+
+ /* Build a default routing table for legacy (ISA) interrupts. */
+ mp_config_acpi_legacy_irqs();
+
+ result = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr);
+ if (result < 0) {
+ printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return result;
+ }
+
+ result = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src);
+ if (result < 0) {
+ printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return result;
+ }
+
+ acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
+
+ acpi_irq_balance_set(NULL);
+
+ acpi_ioapic = 1;
+
+ if (acpi_lapic && acpi_ioapic)
+ smp_found_config = 1;
+
+#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/
+
+ return 0;
+}
+
+#endif /*CONFIG_ACPI_BOOT*/
+
+#ifdef CONFIG_ACPI_BUS
+/*
+ * "acpi_pic_sci=level" (current default)
+ * programs the PIC-mode SCI to Level Trigger.
+ * (NO-OP if the BIOS set Level Trigger already)
+ *
+ * If a PIC-mode SCI is not recogznied or gives spurious IRQ7's
+ * it may require Edge Trigger -- use "acpi_pic_sci=edge"
+ * (NO-OP if the BIOS set Edge Trigger already)
+ *
+ * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
+ * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge.
+ * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0)
+ * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0)
+ */
+
+static __initdata int acpi_pic_sci_trigger; /* 0: level, 1: edge */
+
+void __init
+acpi_pic_sci_set_trigger(unsigned int irq)
+{
+ unsigned char mask = 1 << (irq & 7);
+ unsigned int port = 0x4d0 + (irq >> 3);
+ unsigned char val = inb(port);
+
+
+ printk(PREFIX "IRQ%d SCI:", irq);
+ if (!(val & mask)) {
+ printk(" Edge");
+
+ if (!acpi_pic_sci_trigger) {
+ printk(" set to Level");
+ outb(val | mask, port);
+ }
+ } else {
+ printk(" Level");
+
+ if (acpi_pic_sci_trigger) {
+ printk(" set to Edge");
+ outb(val | mask, port);
+ }
+ }
+ printk(" Trigger.\n");
+}
+
+int __init
+acpi_pic_sci_setup(char *str)
+{
+ while (str && *str) {
+ if (strncmp(str, "level", 5) == 0)
+ acpi_pic_sci_trigger = 0; /* force level trigger */
+ if (strncmp(str, "edge", 4) == 0)
+ acpi_pic_sci_trigger = 1; /* force edge trigger */
+ str = strchr(str, ',');
+ if (str)
+ str += strspn(str, ", \t");
+ }
+ return 1;
+}
+
+__setup("acpi_pic_sci=", acpi_pic_sci_setup);
+
+#endif /* CONFIG_ACPI_BUS */
+
+
+
+/* --------------------------------------------------------------------------
+ Low-Level Sleep Support
+ -------------------------------------------------------------------------- */
+
+#ifdef CONFIG_ACPI_SLEEP
+
+#define DEBUG
+
+#ifdef DEBUG
+#include <xen/serial.h>
+#endif
+
+/* address in low memory of the wakeup routine. */
+unsigned long acpi_wakeup_address = 0;
+
+/* new page directory that we will be using */
+static pmd_t *pmd;
+
+/* saved page directory */
+static pmd_t saved_pmd;
+
+/* page which we'll use for the new page directory */
+static pte_t *ptep;
+
+extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
+
+/*
+ * acpi_create_identity_pmd
+ *
+ * Create a new, identity mapped pmd.
+ *
+ * Do this by creating new page directory, and marking all the pages as R/W
+ * Then set it as the new Page Middle Directory.
+ * And, of course, flush the TLB so it takes effect.
+ *
+ * We save the address of the old one, for later restoration.
+ */
+static void acpi_create_identity_pmd (void)
+{
+ pgd_t *pgd;
+ int i;
+
+ ptep = (pte_t*)__get_free_page(GFP_KERNEL);
+
+ /* fill page with low mapping */
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ set_pte(ptep + i, mk_pte_phys(i << PAGE_SHIFT, PAGE_SHARED));
+
+ pgd = pgd_offset(current->active_mm, 0);
+ pmd = pmd_alloc(current->mm,pgd, 0);
+
+ /* save the old pmd */
+ saved_pmd = *pmd;
+
+ /* set the new one */
+ set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(ptep)));
+
+ /* flush the TLB */
+ local_flush_tlb();
+}
+
+/*
+ * acpi_restore_pmd
+ *
+ * Restore the old pmd saved by acpi_create_identity_pmd and
+ * free the page that said function alloc'd
+ */
+static void acpi_restore_pmd (void)
+{
+ set_pmd(pmd, saved_pmd);
+ local_flush_tlb();
+ free_page((unsigned long)ptep);
+}
+
+/**
+ * acpi_save_state_mem - save kernel state
+ *
+ * Create an identity mapped page table and copy the wakeup routine to
+ * low memory.
+ */
+int acpi_save_state_mem (void)
+{
+ acpi_create_identity_pmd();
+ acpi_copy_wakeup_routine(acpi_wakeup_address);
+
+ return 0;
+}
+
+/**
+ * acpi_save_state_disk - save kernel state to disk
+ *
+ */
+int acpi_save_state_disk (void)
+{
+ return 1;
+}
+
+/*
+ * acpi_restore_state
+ */
+void acpi_restore_state_mem (void)
+{
+ acpi_restore_pmd();
+}
+
+/**
+ * acpi_reserve_bootmem - do _very_ early ACPI initialisation
+ *
+ * We allocate a page in low memory for the wakeup
+ * routine for when we come back from a sleep state. The
+ * runtime allocator allows specification of <16M pages, but not
+ * <1M pages.
+ */
+void __init acpi_reserve_bootmem(void)
+{
+ acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE);
+ printk(KERN_DEBUG "ACPI: have wakeup address 0x%8.8lx\n", acpi_wakeup_address);
+}
+
+void do_suspend_lowlevel_s4bios(int resume)
+{
+ if (!resume) {
+ save_processor_context();
+ acpi_save_register_state((unsigned long)&&acpi_sleep_done);
+ acpi_enter_sleep_state_s4bios();
+ return;
+ }
+acpi_sleep_done:
+ restore_processor_context();
+}
+
+
+#endif /*CONFIG_ACPI_SLEEP*/
+
--- /dev/null
+/*
+ * Local APIC handling, local APIC timers
+ *
+ * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ * Fixes
+ * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
+ * thanks to Eric Gilmore
+ * and Rolf G. Tews
+ * for testing these extensively.
+ * Maciej W. Rozycki : Various updates and fixes.
+ * Mikael Pettersson : Power Management for UP-APIC.
+ */
+
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/sched.h>
+#include <xen/irq.h>
+#include <xen/delay.h>
+#include <asm/mc146818rtc.h>
+#include <asm/msr.h>
+#include <xen/errno.h>
+#include <asm/atomic.h>
+#include <xen/smp.h>
+#include <xen/interrupt.h>
+#include <asm/mpspec.h>
+#include <asm/flushtlb.h>
+#include <asm/hardirq.h>
+#include <asm/apic.h>
+#include <xen/mm.h>
+#include <asm/io_apic.h>
+#include <asm/timex.h>
+#include <xen/ac_timer.h>
+#include <xen/perfc.h>
+
+
+/* Using APIC to generate smp_local_timer_interrupt? */
+int using_apic_timer = 0;
+
+static int enabled_via_apicbase;
+
+int get_maxlvt(void)
+{
+ unsigned int v, ver, maxlvt;
+
+ v = apic_read(APIC_LVR);
+ ver = GET_APIC_VERSION(v);
+ /* 82489DXs do not report # of LVT entries. */
+ maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
+ return maxlvt;
+}
+
+void clear_local_APIC(void)
+{
+ int maxlvt;
+ unsigned long v;
+
+ maxlvt = get_maxlvt();
+
+ /*
+ * Masking an LVT entry on a P6 can trigger a local APIC error
+ * if the vector is zero. Mask LVTERR first to prevent this.
+ */
+ if (maxlvt >= 3) {
+ v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
+ apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED);
+ }
+ /*
+ * Careful: we have to set masks only first to deassert
+ * any level-triggered sources.
+ */
+ v = apic_read(APIC_LVTT);
+ apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
+ v = apic_read(APIC_LVT0);
+ apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+ v = apic_read(APIC_LVT1);
+ apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED);
+ if (maxlvt >= 4) {
+ v = apic_read(APIC_LVTPC);
+ apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
+ }
+
+ /*
+ * Clean APIC state for other OSs:
+ */
+ apic_write_around(APIC_LVTT, APIC_LVT_MASKED);
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
+ apic_write_around(APIC_LVT1, APIC_LVT_MASKED);
+ if (maxlvt >= 3)
+ apic_write_around(APIC_LVTERR, APIC_LVT_MASKED);
+ if (maxlvt >= 4)
+ apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
+ v = GET_APIC_VERSION(apic_read(APIC_LVR));
+ if (APIC_INTEGRATED(v)) { /* !82489DX */
+ if (maxlvt > 3)
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ }
+}
+
+void __init connect_bsp_APIC(void)
+{
+ if (pic_mode) {
+ /*
+ * Do not trust the local APIC being empty at bootup.
+ */
+ clear_local_APIC();
+ /*
+ * PIC mode, enable APIC mode in the IMCR, i.e.
+ * connect BSP's local APIC to INT and NMI lines.
+ */
+ printk("leaving PIC mode, enabling APIC mode.\n");
+ outb(0x70, 0x22);
+ outb(0x01, 0x23);
+ }
+}
+
+void disconnect_bsp_APIC(void)
+{
+ if (pic_mode) {
+ /*
+ * Put the board back into PIC mode (has an effect
+ * only on certain older boards). Note that APIC
+ * interrupts, including IPIs, won't work beyond
+ * this point! The only exception are INIT IPIs.
+ */
+ printk("disabling APIC mode, entering PIC mode.\n");
+ outb(0x70, 0x22);
+ outb(0x00, 0x23);
+ }
+}
+
+void disable_local_APIC(void)
+{
+ unsigned long value;
+
+ clear_local_APIC();
+
+ /*
+ * Disable APIC (implies clearing of registers
+ * for 82489DX!).
+ */
+ value = apic_read(APIC_SPIV);
+ value &= ~APIC_SPIV_APIC_ENABLED;
+ apic_write_around(APIC_SPIV, value);
+
+ if (enabled_via_apicbase) {
+ unsigned int l, h;
+ rdmsr(MSR_IA32_APICBASE, l, h);
+ l &= ~MSR_IA32_APICBASE_ENABLE;
+ wrmsr(MSR_IA32_APICBASE, l, h);
+ }
+}
+
+/*
+ * This is to verify that we're looking at a real local APIC.
+ * Check these against your board if the CPUs aren't getting
+ * started for no apparent reason.
+ */
+int __init verify_local_APIC(void)
+{
+ unsigned int reg0, reg1;
+
+ /*
+ * The version register is read-only in a real APIC.
+ */
+ reg0 = apic_read(APIC_LVR);
+ Dprintk("Getting VERSION: %x\n", reg0);
+ apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
+ reg1 = apic_read(APIC_LVR);
+ Dprintk("Getting VERSION: %x\n", reg1);
+
+ /*
+ * The two version reads above should print the same
+ * numbers. If the second one is different, then we
+ * poke at a non-APIC.
+ */
+ if (reg1 != reg0)
+ return 0;
+
+ /*
+ * Check if the version looks reasonably.
+ */
+ reg1 = GET_APIC_VERSION(reg0);
+ if (reg1 == 0x00 || reg1 == 0xff)
+ return 0;
+ reg1 = get_maxlvt();
+ if (reg1 < 0x02 || reg1 == 0xff)
+ return 0;
+
+ /*
+ * The ID register is read/write in a real APIC.
+ */
+ reg0 = apic_read(APIC_ID);
+ Dprintk("Getting ID: %x\n", reg0);
+ apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
+ reg1 = apic_read(APIC_ID);
+ Dprintk("Getting ID: %x\n", reg1);
+ apic_write(APIC_ID, reg0);
+ if (reg1 != (reg0 ^ APIC_ID_MASK))
+ return 0;
+
+ /*
+ * The next two are just to see if we have sane values.
+ * They're only really relevant if we're in Virtual Wire
+ * compatibility mode, but most boxes are anymore.
+ */
+ reg0 = apic_read(APIC_LVT0);
+ Dprintk("Getting LVT0: %x\n", reg0);
+ reg1 = apic_read(APIC_LVT1);
+ Dprintk("Getting LVT1: %x\n", reg1);
+
+ return 1;
+}
+
+void __init sync_Arb_IDs(void)
+{
+ /*
+ * Wait for idle.
+ */
+ apic_wait_icr_idle();
+
+ Dprintk("Synchronizing Arb IDs.\n");
+ apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
+ | APIC_DM_INIT);
+}
+
+extern void __error_in_apic_c (void);
+
+/*
+ * WAS: An initial setup of the virtual wire mode.
+ * NOW: We don't bother doing anything. All we need at this point
+ * is to receive timer ticks, so that 'jiffies' is incremented.
+ * If we're SMP, then we can assume BIOS did setup for us.
+ * If we're UP, then the APIC should be disabled (it is at reset).
+ * If we're UP and APIC is enabled, then BIOS is clever and has
+ * probably done initial interrupt routing for us.
+ */
+void __init init_bsp_APIC(void)
+{
+}
+
+static unsigned long calculate_ldr(unsigned long old)
+{
+ unsigned long id = 1UL << smp_processor_id();
+ return (old & ~APIC_LDR_MASK)|SET_APIC_LOGICAL_ID(id);
+}
+
+void __init setup_local_APIC (void)
+{
+ unsigned long value, ver, maxlvt;
+
+ value = apic_read(APIC_LVR);
+ ver = GET_APIC_VERSION(value);
+
+ if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f)
+ __error_in_apic_c();
+
+ /* Double-check wether this APIC is really registered. */
+ if (!test_bit(GET_APIC_ID(apic_read(APIC_ID)), &phys_cpu_present_map))
+ BUG();
+
+ /*
+ * Intel recommends to set DFR, LDR and TPR before enabling
+ * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
+ * document number 292116). So here it goes...
+ */
+
+ /*
+ * In clustered apic mode, the firmware does this for us
+ * Put the APIC into flat delivery mode.
+ * Must be "all ones" explicitly for 82489DX.
+ */
+ apic_write_around(APIC_DFR, APIC_DFR_FLAT);
+
+ /*
+ * Set up the logical destination ID.
+ */
+ value = apic_read(APIC_LDR);
+ apic_write_around(APIC_LDR, calculate_ldr(value));
+
+ /*
+ * Set Task Priority to 'accept all'. We never change this
+ * later on.
+ */
+ value = apic_read(APIC_TASKPRI);
+ value &= ~APIC_TPRI_MASK;
+ apic_write_around(APIC_TASKPRI, value);
+
+ /*
+ * Now that we are all set up, enable the APIC
+ */
+ value = apic_read(APIC_SPIV);
+ value &= ~APIC_VECTOR_MASK;
+ /*
+ * Enable APIC
+ */
+ value |= APIC_SPIV_APIC_ENABLED;
+
+ /* Enable focus processor (bit==0) */
+ value &= ~APIC_SPIV_FOCUS_DISABLED;
+
+ /* Set spurious IRQ vector */
+ value |= SPURIOUS_APIC_VECTOR;
+ apic_write_around(APIC_SPIV, value);
+
+ /*
+ * Set up LVT0, LVT1:
+ *
+ * set up through-local-APIC on the BP's LINT0. This is not
+ * strictly necessery in pure symmetric-IO mode, but sometimes
+ * we delegate interrupts to the 8259A.
+ */
+ /*
+ * TODO: set up through-local-APIC from through-I/O-APIC? --macro
+ */
+ value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
+ if (!smp_processor_id()) {
+ value = APIC_DM_EXTINT;
+ printk("enabled ExtINT on CPU#%d\n", smp_processor_id());
+ } else {
+ value = APIC_DM_EXTINT | APIC_LVT_MASKED;
+ printk("masked ExtINT on CPU#%d\n", smp_processor_id());
+ }
+ apic_write_around(APIC_LVT0, value);
+
+ /*
+ * only the BP should see the LINT1 NMI signal, obviously.
+ */
+ if (!smp_processor_id())
+ value = APIC_DM_NMI;
+ else
+ value = APIC_DM_NMI | APIC_LVT_MASKED;
+ if (!APIC_INTEGRATED(ver)) /* 82489DX */
+ value |= APIC_LVT_LEVEL_TRIGGER;
+ apic_write_around(APIC_LVT1, value);
+
+ if (APIC_INTEGRATED(ver)) { /* !82489DX */
+ maxlvt = get_maxlvt();
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
+ value = apic_read(APIC_ESR);
+ printk("ESR value before enabling vector: %08lx\n", value);
+
+ value = ERROR_APIC_VECTOR; /* enables sending errors */
+ apic_write_around(APIC_LVTERR, value);
+ /* spec says clear errors after enabling vector. */
+ if (maxlvt > 3)
+ apic_write(APIC_ESR, 0);
+ value = apic_read(APIC_ESR);
+ printk("ESR value after enabling vector: %08lx\n", value);
+ } else {
+ printk("No ESR for 82489DX.\n");
+ }
+
+ if ( (smp_processor_id() == 0) && (nmi_watchdog == NMI_LOCAL_APIC) )
+ setup_apic_nmi_watchdog();
+}
+
+
+static inline void apic_pm_init1(void) { }
+static inline void apic_pm_init2(void) { }
+
+
+/*
+ * Detect and enable local APICs on non-SMP boards.
+ * Original code written by Keir Fraser.
+ */
+
+static int __init detect_init_APIC (void)
+{
+ u32 h, l, features;
+ extern void get_cpu_vendor(struct cpuinfo_x86*);
+
+ /* Workaround for us being called before identify_cpu(). */
+ get_cpu_vendor(&boot_cpu_data);
+
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1)
+ break;
+ if (boot_cpu_data.x86 == 15 && cpu_has_apic)
+ break;
+ goto no_apic;
+ case X86_VENDOR_INTEL:
+ if (boot_cpu_data.x86 == 6 ||
+ (boot_cpu_data.x86 == 15 && cpu_has_apic) ||
+ (boot_cpu_data.x86 == 5 && cpu_has_apic))
+ break;
+ goto no_apic;
+ default:
+ goto no_apic;
+ }
+
+ if (!cpu_has_apic) {
+ /*
+ * Some BIOSes disable the local APIC in the
+ * APIC_BASE MSR. This can only be done in
+ * software for Intel P6 and AMD K7 (Model > 1).
+ */
+ rdmsr(MSR_IA32_APICBASE, l, h);
+ if (!(l & MSR_IA32_APICBASE_ENABLE)) {
+ printk("Local APIC disabled by BIOS -- reenabling.\n");
+ l &= ~MSR_IA32_APICBASE_BASE;
+ l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+ wrmsr(MSR_IA32_APICBASE, l, h);
+ enabled_via_apicbase = 1;
+ }
+ }
+
+ /* The APIC feature bit should now be enabled in `cpuid' */
+ features = cpuid_edx(1);
+ if (!(features & (1 << X86_FEATURE_APIC))) {
+ printk("Could not enable APIC!\n");
+ return -1;
+ }
+
+ set_bit(X86_FEATURE_APIC, &boot_cpu_data.x86_capability);
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+ boot_cpu_physical_apicid = 0;
+
+ /* The BIOS may have set up the APIC at some other address */
+ rdmsr(MSR_IA32_APICBASE, l, h);
+ if (l & MSR_IA32_APICBASE_ENABLE)
+ mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
+
+ if (nmi_watchdog != NMI_NONE)
+ nmi_watchdog = NMI_LOCAL_APIC;
+
+ printk("Found and enabled local APIC!\n");
+ apic_pm_init1();
+ return 0;
+
+ no_apic:
+ printk("No local APIC present or hardware disabled\n");
+ return -1;
+}
+
+void __init init_apic_mappings(void)
+{
+ unsigned long apic_phys = 0;
+
+ /*
+ * If no local APIC can be found then set up a fake all zeroes page to
+ * simulate the local APIC and another one for the IO-APIC.
+ */
+ if (!smp_found_config && detect_init_APIC()) {
+ apic_phys = get_free_page(GFP_KERNEL);
+ apic_phys = __pa(apic_phys);
+ } else
+ apic_phys = mp_lapic_addr;
+
+ set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+ Dprintk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys);
+
+ /*
+ * Fetch the APIC ID of the BSP in case we have a
+ * default configuration (or the MP table is broken).
+ */
+ if (boot_cpu_physical_apicid == -1U)
+ boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+
+#ifdef CONFIG_X86_IO_APIC
+ {
+ unsigned long ioapic_phys = 0, idx = FIX_IO_APIC_BASE_0;
+ int i;
+
+ for (i = 0; i < nr_ioapics; i++) {
+ if (smp_found_config)
+ ioapic_phys = mp_ioapics[i].mpc_apicaddr;
+ set_fixmap_nocache(idx, ioapic_phys);
+ Dprintk("mapped IOAPIC to %08lx (%08lx)\n",
+ __fix_to_virt(idx), ioapic_phys);
+ idx++;
+ }
+ }
+#endif
+}
+
+/*****************************************************************************
+ * APIC calibration
+ *
+ * The APIC is programmed in bus cycles.
+ * Timeout values should specified in real time units.
+ * The "cheapest" time source is the cyclecounter.
+ *
+ * Thus, we need a mappings from: bus cycles <- cycle counter <- system time
+ *
+ * The calibration is currently a bit shoddy since it requires the external
+ * timer chip to generate periodic timer interupts.
+ *****************************************************************************/
+
+/* used for system time scaling */
+static unsigned int bus_freq;
+static u32 bus_cycle; /* length of one bus cycle in pico-seconds */
+static u32 bus_scale; /* scaling factor convert ns to bus cycles */
+
+/*
+ * The timer chip is already set up at HZ interrupts per second here,
+ * but we do not accept timer interrupts yet. We only allow the BP
+ * to calibrate.
+ */
+static unsigned int __init get_8254_timer_count(void)
+{
+ /*extern spinlock_t i8253_lock;*/
+ /*unsigned long flags;*/
+ unsigned int count;
+ /*spin_lock_irqsave(&i8253_lock, flags);*/
+ outb_p(0x00, 0x43);
+ count = inb_p(0x40);
+ count |= inb_p(0x40) << 8;
+ /*spin_unlock_irqrestore(&i8253_lock, flags);*/
+ return count;
+}
+
+void __init wait_8254_wraparound(void)
+{
+ unsigned int curr_count, prev_count=~0;
+ int delta;
+ curr_count = get_8254_timer_count();
+ do {
+ prev_count = curr_count;
+ curr_count = get_8254_timer_count();
+ delta = curr_count-prev_count;
+ /*
+ * This limit for delta seems arbitrary, but it isn't, it's slightly
+ * above the level of error a buggy Mercury/Neptune chipset timer can
+ * cause.
+ */
+ } while (delta < 300);
+}
+
+/*
+ * This function sets up the local APIC timer, with a timeout of
+ * 'clocks' APIC bus clock. During calibration we actually call
+ * this function with a very large value and read the current time after
+ * a well defined period of time as expired.
+ *
+ * Calibration is only performed once, for CPU0!
+ *
+ * We do reads before writes even if unnecessary, to get around the
+ * P5 APIC double write bug.
+ */
+#define APIC_DIVISOR 1
+static void __setup_APIC_LVTT(unsigned int clocks)
+{
+ unsigned int lvtt1_value, tmp_value;
+ lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV)|LOCAL_TIMER_VECTOR;
+ apic_write_around(APIC_LVTT, lvtt1_value);
+ tmp_value = apic_read(APIC_TDCR);
+ apic_write_around(APIC_TDCR, (tmp_value | APIC_TDR_DIV_1));
+ apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
+}
+
+/*
+ * this is done for every CPU from setup_APIC_clocks() below.
+ * We setup each local APIC with a zero timeout value for now.
+ * Unlike Linux, we don't have to wait for slices etc.
+ */
+void setup_APIC_timer(void * data)
+{
+ unsigned long flags;
+ __save_flags(flags);
+ __sti();
+ __setup_APIC_LVTT(0);
+ __restore_flags(flags);
+}
+
+/*
+ * In this function we calibrate APIC bus clocks to the external timer.
+ *
+ * As a result we have the Bys Speed and CPU speed in Hz.
+ *
+ * We want to do the calibration only once (for CPU0). CPUs connected by the
+ * same APIC bus have the very same bus frequency.
+ *
+ * This bit is a bit shoddy since we use the very same periodic timer interrupt
+ * we try to eliminate to calibrate the APIC.
+ */
+
+int __init calibrate_APIC_clock(void)
+{
+ unsigned long long t1 = 0, t2 = 0;
+ long tt1, tt2;
+ long result;
+ int i;
+ const int LOOPS = HZ/10;
+
+ printk("Calibrating APIC timer for CPU%d...\n", smp_processor_id());
+
+ /* Put whatever arbitrary (but long enough) timeout
+ * value into the APIC clock, we just want to get the
+ * counter running for calibration. */
+ __setup_APIC_LVTT(1000000000);
+
+ /* The timer chip counts down to zero. Let's wait
+ * for a wraparound to start exact measurement:
+ * (the current tick might have been already half done) */
+ wait_8254_wraparound();
+
+ /* We wrapped around just now. Let's start: */
+ rdtscll(t1);
+ tt1 = apic_read(APIC_TMCCT);
+
+ /* Let's wait LOOPS wraprounds: */
+ for (i = 0; i < LOOPS; i++)
+ wait_8254_wraparound();
+
+ tt2 = apic_read(APIC_TMCCT);
+ rdtscll(t2);
+
+ /* The APIC bus clock counter is 32 bits only, it
+ * might have overflown, but note that we use signed
+ * longs, thus no extra care needed.
+ * underflown to be exact, as the timer counts down ;) */
+ result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
+
+ printk("..... CPU speed is %ld.%04ld MHz.\n",
+ ((long)(t2-t1)/LOOPS) / (1000000/HZ),
+ ((long)(t2-t1)/LOOPS) % (1000000/HZ));
+
+ printk("..... Bus speed is %ld.%04ld MHz.\n",
+ result / (1000000/HZ),
+ result % (1000000/HZ));
+
+ /*
+ * KAF: Moved this to time.c where it's calculated relative to the TSC.
+ * Therefore works on machines with no local APIC.
+ */
+ /*cpu_freq = (u64)(((t2-t1)/LOOPS)*HZ);*/
+
+ /* set up multipliers for accurate timer code */
+ bus_freq = result*HZ;
+ bus_cycle = (u32) (1000000000000LL/bus_freq); /* in pico seconds */
+ bus_scale = (1000*262144)/bus_cycle;
+
+ printk("..... bus_scale = 0x%08X\n", bus_scale);
+ /* reset APIC to zero timeout value */
+ __setup_APIC_LVTT(0);
+ return result;
+}
+
+/*
+ * initialise the APIC timers for all CPUs
+ * we start with the first and find out processor frequency and bus speed
+ */
+void __init setup_APIC_clocks (void)
+{
+ printk("Using local APIC timer interrupts.\n");
+ using_apic_timer = 1;
+ __cli();
+ /* calibrate CPU0 for CPU speed and BUS speed */
+ bus_freq = calibrate_APIC_clock();
+ /* Now set up the timer for real. */
+ setup_APIC_timer((void *)bus_freq);
+ __sti();
+ /* and update all other cpus */
+ smp_call_function(setup_APIC_timer, (void *)bus_freq, 1, 1);
+}
+
+#undef APIC_DIVISOR
+
+/*
+ * reprogram the APIC timer. Timeoutvalue is in ns from start of boot
+ * returns 1 on success
+ * returns 0 if the timeout value is too small or in the past.
+ */
+int reprogram_ac_timer(s_time_t timeout)
+{
+ s_time_t now;
+ s_time_t expire;
+ u64 apic_tmict;
+
+ /*
+ * We use this value because we don't trust zero (we think it may just
+ * cause an immediate interrupt). At least this is guaranteed to hold it
+ * off for ages (esp. since the clock ticks on bus clock, not cpu clock!).
+ */
+ if ( timeout == 0 )
+ {
+ apic_tmict = 0xffffffff;
+ goto reprogram;
+ }
+
+ now = NOW();
+ expire = timeout - now; /* value from now */
+
+ if ( expire <= 0 )
+ {
+ Dprintk("APICT[%02d] Timeout in the past 0x%08X%08X > 0x%08X%08X\n",
+ smp_processor_id(), (u32)(now>>32),
+ (u32)now, (u32)(timeout>>32),(u32)timeout);
+ return 0;
+ }
+
+ /*
+ * If we don't have local APIC then we just poll the timer list off the
+ * PIT interrupt. Cheesy but good enough to work on eg. VMware :-)
+ */
+ if ( !cpu_has_apic )
+ return 1;
+
+ /* conversion to bus units */
+ apic_tmict = (((u64)bus_scale) * expire)>>18;
+
+ if ( apic_tmict >= 0xffffffff )
+ {
+ Dprintk("APICT[%02d] Timeout value too large\n", smp_processor_id());
+ apic_tmict = 0xffffffff;
+ }
+
+ if ( apic_tmict == 0 )
+ {
+ Dprintk("APICT[%02d] timeout value too small\n", smp_processor_id());
+ return 0;
+ }
+
+ reprogram:
+ /* Program the timer. */
+ apic_write(APIC_TMICT, (unsigned long)apic_tmict);
+
+ return 1;
+}
+
+unsigned int apic_timer_irqs [NR_CPUS];
+
+void smp_apic_timer_interrupt(struct pt_regs * regs)
+{
+ int cpu = smp_processor_id();
+
+ ack_APIC_irq();
+
+ apic_timer_irqs[cpu]++;
+ perfc_incrc(apic_timer);
+
+ __cpu_raise_softirq(cpu, AC_TIMER_SOFTIRQ);
+}
+
+/*
+ * This interrupt should _never_ happen with our APIC/SMP architecture
+ */
+asmlinkage void smp_spurious_interrupt(void)
+{
+ unsigned long v;
+
+ /*
+ * Check if this really is a spurious interrupt and ACK it
+ * if it is a vectored one. Just in case...
+ * Spurious interrupts should not be ACKed.
+ */
+ v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
+ if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
+ ack_APIC_irq();
+
+ /* see sw-dev-man vol 3, chapter 7.4.13.5 */
+ printk("spurious APIC interrupt on CPU#%d, should never happen.\n",
+ smp_processor_id());
+}
+
+/*
+ * This interrupt should never happen with our APIC/SMP architecture
+ */
+
+asmlinkage void smp_error_interrupt(void)
+{
+ unsigned long v, v1;
+
+ /* First tickle the hardware, only then report what went on. -- REW */
+ v = apic_read(APIC_ESR);
+ apic_write(APIC_ESR, 0);
+ v1 = apic_read(APIC_ESR);
+ ack_APIC_irq();
+ atomic_inc(&irq_err_count);
+
+ /* Here is what the APIC error bits mean:
+ 0: Send CS error
+ 1: Receive CS error
+ 2: Send accept error
+ 3: Receive accept error
+ 4: Reserved
+ 5: Send illegal vector
+ 6: Received illegal vector
+ 7: Illegal register address
+ */
+ printk ("APIC error on CPU%d: %02lx(%02lx)\n",
+ smp_processor_id(), v , v1);
+}
+
+/*
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
+ */
+int __init APIC_init_uniprocessor (void)
+{
+ if (!smp_found_config && !cpu_has_apic)
+ return -1;
+
+ /*
+ * Complain if the BIOS pretends there is one.
+ */
+ if (!cpu_has_apic&&APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]))
+ {
+ printk("BIOS bug, local APIC #%d not detected!...\n",
+ boot_cpu_physical_apicid);
+ return -1;
+ }
+
+ verify_local_APIC();
+
+ connect_bsp_APIC();
+
+#ifdef CONFIG_SMP
+ cpu_online_map = 1;
+#endif
+ phys_cpu_present_map = 1;
+ apic_write_around(APIC_ID, boot_cpu_physical_apicid);
+
+ apic_pm_init2();
+
+ setup_local_APIC();
+
+#ifdef CONFIG_X86_IO_APIC
+ if (smp_found_config && nr_ioapics)
+ setup_IO_APIC();
+#endif
+ setup_APIC_clocks();
+
+ return 0;
+}
--- /dev/null
+#include <xen/config.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+#include <asm/page.h>
+
+#define SECONDARY_CPU_FLAG 0xA5A5A5A5
+
+ .text
+
+ENTRY(start)
+ jmp hal_entry
+
+ .align 4
+
+/*** MULTIBOOT HEADER ****/
+ /* Magic number indicating a Multiboot header. */
+ .long 0x1BADB002
+ /* Flags to bootloader (see Multiboot spec). */
+ .long 0x00000002
+ /* Checksum: must be the negated sum of the first two fields. */
+ .long -0x1BADB004
+
+hal_entry:
+ /* Set up a few descriptors: on entry only CS is guaranteed good. */
+ lgdt %cs:nopaging_gdt_descr-__PAGE_OFFSET
+ mov $(__HYPERVISOR_DS),%ecx
+ mov %ecx,%ds
+ mov %ecx,%es
+ mov %ecx,%fs
+ mov %ecx,%gs
+ ljmp $(__HYPERVISOR_CS),$(1f)-__PAGE_OFFSET
+1: lss stack_start-__PAGE_OFFSET,%esp
+
+ /* Reset EFLAGS (subsumes CLI and CLD). */
+ pushl $0
+ popf
+
+ /* CPU type checks. We need P6+. */
+ mov $0x200000,%edx
+ pushfl
+ pop %ecx
+ and %edx,%ecx
+ jne bad_cpu # ID bit should be clear
+ pushl %edx
+ popfl
+ pushfl
+ pop %ecx
+ and %edx,%ecx
+ je bad_cpu # ID bit should be set
+
+ /* Set up CR0. */
+ mov %cr0,%ecx
+ and $0x00000011,%ecx # save ET and PE
+ or $0x00050022,%ecx # set AM, WP, NE and MP
+ mov %ecx,%cr0
+
+ /* Set up FPU. */
+ fninit
+
+ /* Set up CR4, except global flag which Intel requires should be */
+ /* left until after paging is enabled (IA32 Manual Vol. 3, Sec. 2.5) */
+ mov %cr4,%ecx
+ or mmu_cr4_features-__PAGE_OFFSET,%ecx
+ mov %ecx,mmu_cr4_features-__PAGE_OFFSET
+ and $0x7f,%ecx /* disable GLOBAL bit */
+ mov %ecx,%cr4
+
+#ifdef CONFIG_SMP
+ /* Is this a non-boot processor? */
+ cmp $(SECONDARY_CPU_FLAG),%ebx
+ jne continue_boot_cpu
+
+ call start_paging
+ lidt idt_descr
+ jmp start_secondary
+#endif
+
+continue_boot_cpu:
+ add $__PAGE_OFFSET,%ebx
+ push %ebx /* Multiboot info struct */
+ push %eax /* Multiboot magic value */
+
+ /* Initialize BSS (no nasty surprises!) */
+ mov $__bss_start-__PAGE_OFFSET,%edi
+ mov $_end-__PAGE_OFFSET,%ecx
+ sub %edi,%ecx
+ xor %eax,%eax
+ rep stosb
+
+ /* Copy all modules (dom0 + initrd if present) out of the Xen heap */
+ mov (%esp),%eax
+ cmp $0x2BADB002,%eax
+ jne skip_dom0_copy
+ sub $__PAGE_OFFSET,%ebx /* turn back into a phys addr */
+ mov 0x14(%ebx),%edi /* mbi->mods_count */
+ dec %edi /* mbi->mods_count-- */
+ jb skip_dom0_copy /* skip if no modules */
+ mov 0x18(%ebx),%eax /* mbi->mods_addr */
+ mov (%eax),%ebx /* %ebx = mod[0]->mod_start */
+ shl $4,%edi
+ add %edi,%eax
+ mov 0x4(%eax),%eax /* %eax = mod[mod_count-1]->end */
+ mov %eax,%ecx
+ sub %ebx,%ecx /* %ecx = byte len of all mods */
+ mov $(MAX_DIRECTMAP_ADDRESS), %edi
+ add %ecx, %edi /* %edi = src + length */
+ shr $2,%ecx /* %ecx = length/4 */
+1: sub $4,%eax /* %eax = src, %edi = dst */
+ sub $4,%edi
+ mov (%eax),%ebx
+ mov %ebx,(%edi)
+ loop 1b
+skip_dom0_copy:
+
+ /* Initialize low and high mappings of all memory with 4MB pages */
+ mov $idle_pg_table-__PAGE_OFFSET,%edi
+ mov $0x1e3,%eax /* PRESENT+RW+A+D+4MB+GLOBAL */
+1: mov %eax,__PAGE_OFFSET>>20(%edi) /* high mapping */
+ stosl /* low mapping */
+ add $(1<<L2_PAGETABLE_SHIFT),%eax
+ cmp $MAX_DIRECTMAP_ADDRESS+0x1e3,%eax
+ jne 1b
+
+ call start_paging
+ call setup_idt
+ lidt idt_descr
+
+ /* Call into main C routine. This should never return.*/
+ call cmain
+ ud2 /* Force a panic (invalid opcode). */
+
+start_paging:
+ mov $idle_pg_table-__PAGE_OFFSET,%eax
+ mov %eax,%cr3
+ mov %cr0,%eax
+ or $0x80010000,%eax /* set PG and WP bits */
+ mov %eax,%cr0
+ jmp 1f
+1: /* Install relocated selectors (FS/GS unused). */
+ lgdt gdt_descr
+ mov $(__HYPERVISOR_DS),%ecx
+ mov %ecx,%ds
+ mov %ecx,%es
+ mov %ecx,%ss
+ ljmp $(__HYPERVISOR_CS),$1f
+1: /* Paging enabled, so we can now enable GLOBAL mappings in CR4. */
+ movl mmu_cr4_features,%ecx
+ movl %ecx,%cr4
+ /* Relocate ESP */
+ add $__PAGE_OFFSET,%esp
+ /* Relocate EIP via return jump */
+ pop %ecx
+ add $__PAGE_OFFSET,%ecx
+ jmp *%ecx
+
+
+/*** INTERRUPT INITIALISATION ***/
+
+setup_idt:
+ lea ignore_int,%edx
+ mov $(__HYPERVISOR_CS << 16),%eax
+ mov %dx,%ax /* selector = 0x0010 = cs */
+ mov $0x8E00,%dx /* interrupt gate - dpl=0, present */
+
+ lea SYMBOL_NAME(idt_table),%edi
+ mov $256,%ecx
+1: mov %eax,(%edi)
+ mov %edx,4(%edi)
+ add $8,%edi
+ loop 1b
+ ret
+
+/* This is the default interrupt handler. */
+int_msg:
+ .asciz "Unknown interrupt\n"
+ ALIGN
+ignore_int:
+ cld
+ push %eax
+ push %ecx
+ push %edx
+ pushl %es
+ pushl %ds
+ mov $(__HYPERVISOR_DS),%eax
+ mov %eax,%ds
+ mov %eax,%es
+ pushl $int_msg
+ call SYMBOL_NAME(printf)
+1: jmp 1b
+
+bad_cpu_msg:
+ .asciz "Bad CPU type. Need P6+."
+ ALIGN
+bad_cpu:
+ pushl $bad_cpu_msg
+ call SYMBOL_NAME(printf)
+1: jmp 1b
+
+/*** STACK LOCATION ***/
+
+ENTRY(stack_start)
+ .long SYMBOL_NAME(cpu0_stack) + 8100 - __PAGE_OFFSET
+ .long __HYPERVISOR_DS
+
+/*** DESCRIPTOR TABLES ***/
+
+.globl SYMBOL_NAME(idt)
+.globl SYMBOL_NAME(gdt)
+
+ ALIGN
+
+ .word 0
+idt_descr:
+ .word 256*8-1
+SYMBOL_NAME(idt):
+ .long SYMBOL_NAME(idt_table)
+
+ .word 0
+gdt_descr:
+ .word (LAST_RESERVED_GDT_ENTRY*8)+7
+SYMBOL_NAME(gdt):
+ .long SYMBOL_NAME(gdt_table) /* gdt base */
+
+ .word 0
+nopaging_gdt_descr:
+ .word (LAST_RESERVED_GDT_ENTRY*8)+7
+ .long SYMBOL_NAME(gdt_table)-__PAGE_OFFSET
+
+ ALIGN
+/* NB. Rings != 0 get access up to 0xFC400000. This allows access to the */
+/* machine->physical mapping table. Ring 0 can access all memory. */
+ENTRY(gdt_table)
+ .fill FIRST_RESERVED_GDT_ENTRY,8,0
+ .quad 0x0000000000000000 /* unused */
+ .quad 0x00cf9a000000ffff /* 0x0808 ring 0 4.00GB code at 0x0 */
+ .quad 0x00cf92000000ffff /* 0x0810 ring 0 4.00GB data at 0x0 */
+ .quad 0x00cfba000000c3ff /* 0x0819 ring 1 3.95GB code at 0x0 */
+ .quad 0x00cfb2000000c3ff /* 0x0821 ring 1 3.95GB data at 0x0 */
+ .quad 0x00cffa000000c3ff /* 0x082b ring 3 3.95GB code at 0x0 */
+ .quad 0x00cff2000000c3ff /* 0x0833 ring 3 3.95GB data at 0x0 */
+ .quad 0x0000000000000000 /* unused */
+ .fill 2*NR_CPUS,8,0 /* space for TSS and LDT per CPU */
+
+ .org 0x1000
+ENTRY(idle_pg_table) # Initial page directory is 4kB
+ .org 0x2000
+ENTRY(cpu0_stack) # Initial stack is 8kB
+ .org 0x4000
+ENTRY(stext)
+ENTRY(_stext)
--- /dev/null
+/*
+ * Precise Delay Loops for i386
+ *
+ * Copyright (C) 1993 Linus Torvalds
+ * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ * The __delay function must _NOT_ be inlined as its execution time
+ * depends wildly on alignment on many x86 processors. The additional
+ * jump magic is needed to get the timing stable on all the CPU's
+ * we have to worry about.
+ */
+
+#include <xen/config.h>
+#include <xen/delay.h>
+#include <asm/msr.h>
+#include <asm/processor.h>
+
+void __udelay(unsigned long usecs)
+{
+ unsigned long ticks = usecs * ticks_per_usec;
+ unsigned long s, e;
+
+ rdtscl(s);
+ do
+ {
+ rep_nop();
+ rdtscl(e);
+ } while ((e-s) < ticks);
+}
--- /dev/null
+/******************************************************************************
+ * domain_page.h
+ *
+ * Allow temporary mapping of domain pages. Based on ideas from the
+ * Linux PKMAP code -- the copyrights and credits are retained below.
+ */
+
+/*
+ * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
+ * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de *
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <xen/mm.h>
+#include <xen/perfc.h>
+#include <asm/domain_page.h>
+#include <asm/flushtlb.h>
+
+unsigned long *mapcache;
+static unsigned int map_idx, shadow_map_idx[NR_CPUS];
+static spinlock_t map_lock = SPIN_LOCK_UNLOCKED;
+
+/* Use a spare PTE bit to mark entries ready for recycling. */
+#define READY_FOR_TLB_FLUSH (1<<10)
+
+static void flush_all_ready_maps(void)
+{
+ unsigned long *cache = mapcache;
+
+ /* A bit skanky -- depends on having an aligned PAGE_SIZE set of PTEs. */
+ do { if ( (*cache & READY_FOR_TLB_FLUSH) ) *cache = 0; }
+ while ( ((unsigned long)(++cache) & ~PAGE_MASK) != 0 );
+
+ perfc_incrc(domain_page_tlb_flush);
+ local_flush_tlb();
+}
+
+
+void *map_domain_mem(unsigned long pa)
+{
+ unsigned long va;
+ unsigned int idx, cpu = smp_processor_id();
+ unsigned long *cache = mapcache;
+ unsigned long flags;
+
+ perfc_incrc(map_domain_mem_count);
+
+ spin_lock_irqsave(&map_lock, flags);
+
+ /* Has some other CPU caused a wrap? We must flush if so. */
+ if ( map_idx < shadow_map_idx[cpu] )
+ {
+ perfc_incrc(domain_page_tlb_flush);
+ local_flush_tlb();
+ }
+
+ for ( ; ; )
+ {
+ idx = map_idx = (map_idx + 1) & (MAPCACHE_ENTRIES - 1);
+ if ( idx == 0 ) flush_all_ready_maps();
+ if ( cache[idx] == 0 ) break;
+ }
+
+ cache[idx] = (pa & PAGE_MASK) | __PAGE_HYPERVISOR;
+
+ spin_unlock_irqrestore(&map_lock, flags);
+
+ shadow_map_idx[cpu] = idx;
+
+ va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT) + (pa & ~PAGE_MASK);
+ return (void *)va;
+}
+
+void unmap_domain_mem(void *va)
+{
+ unsigned int idx;
+ idx = ((unsigned long)va - MAPCACHE_VIRT_START) >> PAGE_SHIFT;
+ mapcache[idx] |= READY_FOR_TLB_FLUSH;
+}
--- /dev/null
+/*
+ * linux/arch/i386/entry.S
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * entry.S contains the system-call and fault low-level handling routines.
+ * This also contains the timer-interrupt handler, as well as all interrupts
+ * and faults that can result in a task-switch.
+ *
+ * Stack layout in 'ret_from_system_call':
+ * 0(%esp) - %ebx
+ * 4(%esp) - %ecx
+ * 8(%esp) - %edx
+ * C(%esp) - %esi
+ * 10(%esp) - %edi
+ * 14(%esp) - %ebp
+ * 18(%esp) - %eax
+ * 1C(%esp) - %ds
+ * 20(%esp) - %es
+ * 24(%esp) - %fs
+ * 28(%esp) - %gs
+ * 2C(%esp) - orig_eax
+ * 30(%esp) - %eip
+ * 34(%esp) - %cs
+ * 38(%esp) - %eflags
+ * 3C(%esp) - %oldesp
+ * 40(%esp) - %oldss
+ *
+ * "current" is in register %ebx during any slow entries.
+ */
+/* The idea for callbacks from monitor -> guest OS.
+ *
+ * First, we require that all callbacks (either via a supplied
+ * interrupt-descriptor-table, or via the special event or failsafe callbacks
+ * in the shared-info-structure) are to ring 1. This just makes life easier,
+ * in that it means we don't have to do messy GDT/LDT lookups to find
+ * out which the privilege-level of the return code-selector. That code
+ * would just be a hassle to write, and would need to account for running
+ * off the end of the GDT/LDT, for example. For all callbacks we check
+ * that the provided
+ * return CS is not == __HYPERVISOR_{CS,DS}. Apart from that we're safe as
+ * don't allow a guest OS to install ring-0 privileges into the GDT/LDT.
+ * It's up to the guest OS to ensure all returns via the IDT are to ring 1.
+ * If not, we load incorrect SS/ESP values from the TSS (for ring 1 rather
+ * than the correct ring) and bad things are bound to ensue -- IRET is
+ * likely to fault, and we may end up killing the domain (no harm can
+ * come to the hypervisor itself, though).
+ *
+ * When doing a callback, we check if the return CS is in ring 0. If so,
+ * callback is delayed until next return to ring != 0.
+ * If return CS is in ring 1, then we create a callback frame
+ * starting at return SS/ESP. The base of the frame does an intra-privilege
+ * interrupt-return.
+ * If return CS is in ring > 1, we create a callback frame starting
+ * at SS/ESP taken from appropriate section of the current TSS. The base
+ * of the frame does an inter-privilege interrupt-return.
+ *
+ * Note that the "failsafe callback" uses a special stackframe:
+ * { return_DS, return_ES, return_FS, return_GS, return_EIP,
+ * return_CS, return_EFLAGS[, return_ESP, return_SS] }
+ * That is, original values for DS/ES/FS/GS are placed on stack rather than
+ * in DS/ES/FS/GS themselves. Why? It saves us loading them, only to have them
+ * saved/restored in guest OS. Furthermore, if we load them we may cause
+ * a fault if they are invalid, which is a hassle to deal with. We avoid
+ * that problem if we don't load them :-) This property allows us to use
+ * the failsafe callback as a fallback: if we ever fault on loading DS/ES/FS/GS
+ * on return to ring != 0, we can simply package it up as a return via
+ * the failsafe callback, and let the guest OS sort it out (perhaps by
+ * killing an application process). Note that we also do this for any
+ * faulting IRET -- just let the guest OS handle it via the event
+ * callback.
+ *
+ * We terminate a domain in the following cases:
+ * - creating a callback stack frame (due to bad ring-1 stack).
+ * - faulting IRET on entry to failsafe callback handler.
+ * So, each domain must keep its ring-1 %ss/%esp and failsafe callback
+ * handler in good order (absolutely no faults allowed!).
+ */
+
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+
+EBX = 0x00
+ECX = 0x04
+EDX = 0x08
+ESI = 0x0C
+EDI = 0x10
+EBP = 0x14
+EAX = 0x18
+DS = 0x1C
+ES = 0x20
+FS = 0x24
+GS = 0x28
+ORIG_EAX = 0x2C
+EIP = 0x30
+CS = 0x34
+EFLAGS = 0x38
+OLDESP = 0x3C
+OLDSS = 0x40
+
+/* Offsets in task_struct */
+PROCESSOR = 0
+HYP_EVENTS = 2
+SHARED_INFO = 4
+EVENT_SEL = 8
+EVENT_ADDR = 12
+FAILSAFE_BUFFER = 16
+FAILSAFE_SEL = 32
+FAILSAFE_ADDR = 36
+
+/* Offsets in shared_info_t */
+#define UPCALL_PENDING /* 0 */
+#define UPCALL_MASK 1
+
+/* Offsets in guest_trap_bounce */
+GTB_ERROR_CODE = 0
+GTB_CR2 = 4
+GTB_FLAGS = 8
+GTB_CS = 10
+GTB_EIP = 12
+GTBF_TRAP = 1
+GTBF_TRAP_NOCODE = 2
+GTBF_TRAP_CR2 = 4
+
+CF_MASK = 0x00000001
+IF_MASK = 0x00000200
+NT_MASK = 0x00004000
+
+
+
+#define SAVE_ALL_NOSEGREGS \
+ cld; \
+ pushl %gs; \
+ pushl %fs; \
+ pushl %es; \
+ pushl %ds; \
+ pushl %eax; \
+ pushl %ebp; \
+ pushl %edi; \
+ pushl %esi; \
+ pushl %edx; \
+ pushl %ecx; \
+ pushl %ebx; \
+
+#define SAVE_ALL \
+ SAVE_ALL_NOSEGREGS \
+ movl $(__HYPERVISOR_DS),%edx; \
+ movl %edx,%ds; \
+ movl %edx,%es; \
+ movl %edx,%fs; \
+ movl %edx,%gs; \
+ sti;
+
+#define GET_CURRENT(reg) \
+ movl $4096-4, reg; \
+ orl %esp, reg; \
+ andl $~3,reg; \
+ movl (reg),reg;
+
+ENTRY(continue_nonidle_task)
+ GET_CURRENT(%ebx)
+ jmp test_all_events
+
+ ALIGN
+/*
+ * HYPERVISOR_multicall(call_list, nr_calls)
+ * Execute a list of 'nr_calls' system calls, pointed at by 'call_list'.
+ * This is fairly easy except that:
+ * 1. We may fault reading the call list, and must patch that up; and
+ * 2. We cannot recursively call HYPERVISOR_multicall, or a malicious
+ * caller could cause our stack to blow up.
+ */
+do_multicall:
+ popl %eax
+ cmpl $SYMBOL_NAME(multicall_return_from_call),%eax
+ je multicall_return_from_call
+ pushl %ebx
+ movl 4(%esp),%ebx /* EBX == call_list */
+ movl 8(%esp),%ecx /* ECX == nr_calls */
+multicall_loop:
+ pushl %ecx
+multicall_fault1:
+ pushl 20(%ebx) # args[4]
+multicall_fault2:
+ pushl 16(%ebx) # args[3]
+multicall_fault3:
+ pushl 12(%ebx) # args[2]
+multicall_fault4:
+ pushl 8(%ebx) # args[1]
+multicall_fault5:
+ pushl 4(%ebx) # args[0]
+multicall_fault6:
+ movl (%ebx),%eax # op
+ andl $255,%eax
+ call *SYMBOL_NAME(hypervisor_call_table)(,%eax,4)
+multicall_return_from_call:
+multicall_fault7:
+ movl %eax,24(%ebx) # args[5] == result
+ addl $20,%esp
+ popl %ecx
+ addl $(ARGS_PER_MULTICALL_ENTRY*4),%ebx
+ loop multicall_loop
+ popl %ebx
+ xorl %eax,%eax
+ jmp ret_from_hypervisor_call
+
+.section __ex_table,"a"
+ .align 4
+ .long multicall_fault1, multicall_fixup1
+ .long multicall_fault2, multicall_fixup2
+ .long multicall_fault3, multicall_fixup3
+ .long multicall_fault4, multicall_fixup4
+ .long multicall_fault5, multicall_fixup5
+ .long multicall_fault6, multicall_fixup6
+.previous
+
+.section .fixup,"ax"
+multicall_fixup6:
+ addl $4,%esp
+multicall_fixup5:
+ addl $4,%esp
+multicall_fixup4:
+ addl $4,%esp
+multicall_fixup3:
+ addl $4,%esp
+multicall_fixup2:
+ addl $4,%esp
+multicall_fixup1:
+ addl $4,%esp
+ popl %ebx
+ movl $-EFAULT,%eax
+ jmp ret_from_hypervisor_call
+.previous
+
+ ALIGN
+restore_all_guest:
+ # First, may need to restore %ds if clobbered by create_bounce_frame
+ pushl %ss
+ popl %ds
+ # Second, create a failsafe copy of DS,ES,FS,GS in case any are bad
+ leal DS(%esp),%esi
+ leal FAILSAFE_BUFFER(%ebx),%edi
+ movsl
+ movsl
+ movsl
+ movsl
+ # Finally, restore guest registers -- faults will cause failsafe
+ popl %ebx
+ popl %ecx
+ popl %edx
+ popl %esi
+ popl %edi
+ popl %ebp
+ popl %eax
+1: popl %ds
+2: popl %es
+3: popl %fs
+4: popl %gs
+ addl $4,%esp
+5: iret
+.section .fixup,"ax"
+10: subl $4,%esp
+ pushl %gs
+9: pushl %fs
+8: pushl %es
+7: pushl %ds
+6: pushl %eax
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %edx
+ pushl %ecx
+ pushl %ebx
+ pushl %ss
+ popl %ds
+ pushl %ss
+ popl %es
+ jmp failsafe_callback
+.previous
+.section __ex_table,"a"
+ .align 4
+ .long 1b,6b
+ .long 2b,7b
+ .long 3b,8b
+ .long 4b,9b
+ .long 5b,10b
+.previous
+
+/* No special register assumptions */
+failsafe_callback:
+ GET_CURRENT(%ebx)
+ movzwl PROCESSOR(%ebx),%eax
+ shl $4,%eax
+ lea guest_trap_bounce(%eax),%edx
+ movl FAILSAFE_ADDR(%ebx),%eax
+ movl %eax,GTB_EIP(%edx)
+ movl FAILSAFE_SEL(%ebx),%eax
+ movw %ax,GTB_CS(%edx)
+ call create_bounce_frame
+ subl $16,%esi # add DS/ES/FS/GS to failsafe stack frame
+ leal FAILSAFE_BUFFER(%ebx),%ebp
+ movl 0(%ebp),%eax # DS
+FAULT1: movl %eax,(%esi)
+ movl 4(%ebp),%eax # ES
+FAULT2: movl %eax,4(%esi)
+ movl 8(%ebp),%eax # FS
+FAULT3: movl %eax,8(%esi)
+ movl 12(%ebp),%eax # GS
+FAULT4: movl %eax,12(%esi)
+ movl %esi,OLDESP(%esp)
+ popl %ebx
+ popl %ecx
+ popl %edx
+ popl %esi
+ popl %edi
+ popl %ebp
+ popl %eax
+ addl $20,%esp # skip DS/ES/FS/GS/ORIG_EAX
+FAULT5: iret
+
+
+ ALIGN
+# Simple restore -- we should never fault as we we will only interrupt ring 0
+# when sane values have been placed in all registers. The only exception is
+# NMI, which may interrupt before good values have been placed in DS-GS.
+# The NMI return code deals with this problem itself.
+restore_all_xen:
+ popl %ebx
+ popl %ecx
+ popl %edx
+ popl %esi
+ popl %edi
+ popl %ebp
+ popl %eax
+ popl %ds
+ popl %es
+ popl %fs
+ popl %gs
+ addl $4,%esp
+ iret
+
+ ALIGN
+ENTRY(hypervisor_call)
+ pushl %eax # save orig_eax
+ SAVE_ALL
+ GET_CURRENT(%ebx)
+ andl $255,%eax
+ call *SYMBOL_NAME(hypervisor_call_table)(,%eax,4)
+
+ret_from_hypervisor_call:
+ movl %eax,EAX(%esp) # save the return value
+
+test_all_events:
+ xorl %ecx,%ecx
+ notl %ecx
+ cli # tests must not race interrupts
+/*test_softirqs:*/
+ movzwl PROCESSOR(%ebx),%eax
+ shl $6,%eax # sizeof(irq_cpustat) == 64
+ test %ecx,SYMBOL_NAME(irq_stat)(%eax,1)
+ jnz process_softirqs
+/*test_hyp_events:*/
+ testw %cx, HYP_EVENTS(%ebx)
+ jnz process_hyp_events
+/*test_guest_events:*/
+ movl SHARED_INFO(%ebx),%eax
+ testb $0xFF,UPCALL_MASK(%eax)
+ jnz restore_all_guest
+ testb $0xFF,UPCALL_PENDING(%eax)
+ jz restore_all_guest
+ movb $1,UPCALL_MASK(%eax) # Upcalls are masked during delivery
+/*process_guest_events:*/
+ movzwl PROCESSOR(%ebx),%edx
+ shl $4,%edx # sizeof(guest_trap_bounce) == 16
+ lea guest_trap_bounce(%edx),%edx
+ movl EVENT_ADDR(%ebx),%eax
+ movl %eax,GTB_EIP(%edx)
+ movl EVENT_SEL(%ebx),%eax
+ movw %ax,GTB_CS(%edx)
+ call create_bounce_frame
+ jmp restore_all_guest
+
+ ALIGN
+process_softirqs:
+ sti
+ call SYMBOL_NAME(do_softirq)
+ jmp test_all_events
+
+ ALIGN
+process_hyp_events:
+ sti
+ call SYMBOL_NAME(do_hyp_events)
+ jmp test_all_events
+
+/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK: */
+/* {EIP, CS, EFLAGS, [ESP, SS]} */
+/* %edx == guest_trap_bounce, %ebx == task_struct */
+/* %eax,%ecx are clobbered. %ds:%esi contain new OLDSS/OLDESP. */
+create_bounce_frame:
+ mov CS+4(%esp),%cl
+ test $2,%cl
+ jz 1f /* jump if returning to an existing ring-1 activation */
+ /* obtain ss/esp from TSS -- no current ring-1 activations */
+ movzwl PROCESSOR(%ebx),%eax
+ /* next 4 lines multiply %eax by 8320, which is sizeof(tss_struct) */
+ movl %eax, %ecx
+ shll $7, %ecx
+ shll $13, %eax
+ addl %ecx,%eax
+ addl $init_tss + 12,%eax
+ movl (%eax),%esi /* tss->esp1 */
+FAULT6: movl 4(%eax),%ds /* tss->ss1 */
+ /* base of stack frame must contain ss/esp (inter-priv iret) */
+ subl $8,%esi
+ movl OLDESP+4(%esp),%eax
+FAULT7: movl %eax,(%esi)
+ movl OLDSS+4(%esp),%eax
+FAULT8: movl %eax,4(%esi)
+ jmp 2f
+1: /* obtain ss/esp from oldss/oldesp -- a ring-1 activation exists */
+ movl OLDESP+4(%esp),%esi
+FAULT9: movl OLDSS+4(%esp),%ds
+2: /* Construct a stack frame: EFLAGS, CS/EIP */
+ subl $12,%esi
+ movl EIP+4(%esp),%eax
+FAULT10:movl %eax,(%esi)
+ movl CS+4(%esp),%eax
+FAULT11:movl %eax,4(%esi)
+ movl EFLAGS+4(%esp),%eax
+FAULT12:movl %eax,8(%esi)
+ /* Rewrite our stack frame and return to ring 1. */
+ /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
+ andl $0xfffcbeff,%eax
+ movl %eax,EFLAGS+4(%esp)
+ movl %ds,OLDSS+4(%esp)
+ movl %esi,OLDESP+4(%esp)
+ movzwl %es:GTB_CS(%edx),%eax
+ movl %eax,CS+4(%esp)
+ movl %es:GTB_EIP(%edx),%eax
+ movl %eax,EIP+4(%esp)
+ ret
+
+
+.section __ex_table,"a"
+ .align 4
+ .long FAULT1, crash_domain_fixup3 # Fault writing to ring-1 stack
+ .long FAULT2, crash_domain_fixup3 # Fault writing to ring-1 stack
+ .long FAULT3, crash_domain_fixup3 # Fault writing to ring-1 stack
+ .long FAULT4, crash_domain_fixup3 # Fault writing to ring-1 stack
+ .long FAULT5, crash_domain_fixup1 # Fault executing failsafe iret
+ .long FAULT6, crash_domain_fixup2 # Fault loading ring-1 stack selector
+ .long FAULT7, crash_domain_fixup2 # Fault writing to ring-1 stack
+ .long FAULT8, crash_domain_fixup2 # Fault writing to ring-1 stack
+ .long FAULT9, crash_domain_fixup2 # Fault loading ring-1 stack selector
+ .long FAULT10,crash_domain_fixup2 # Fault writing to ring-1 stack
+ .long FAULT11,crash_domain_fixup2 # Fault writing to ring-1 stack
+ .long FAULT12,crash_domain_fixup2 # Fault writing to ring-1 stack
+ .long FAULT13,crash_domain_fixup3 # Fault writing to ring-1 stack
+ .long FAULT14,crash_domain_fixup3 # Fault writing to ring-1 stack
+.previous
+
+# This handler kills domains which experience unrecoverable faults.
+.section .fixup,"ax"
+crash_domain_fixup1:
+ subl $4,%esp
+ SAVE_ALL
+ jmp crash_domain
+crash_domain_fixup2:
+ addl $4,%esp
+crash_domain_fixup3:
+ pushl %ss
+ popl %ds
+ jmp crash_domain
+.previous
+
+ ALIGN
+process_guest_exception_and_events:
+ movzwl PROCESSOR(%ebx),%eax
+ shl $4,%eax
+ lea guest_trap_bounce(%eax),%edx
+ testb $~0,GTB_FLAGS(%edx)
+ jz test_all_events
+ call create_bounce_frame # just the basic frame
+ mov %es:GTB_FLAGS(%edx),%cl
+ test $GTBF_TRAP_NOCODE,%cl
+ jnz 2f
+ subl $4,%esi # push error_code onto guest frame
+ movl %es:GTB_ERROR_CODE(%edx),%eax
+FAULT13:movl %eax,(%esi)
+ test $GTBF_TRAP_CR2,%cl
+ jz 1f
+ subl $4,%esi # push %cr2 onto guest frame
+ movl %es:GTB_CR2(%edx),%eax
+FAULT14:movl %eax,(%esi)
+1: movl %esi,OLDESP(%esp)
+2: push %es # unclobber %ds
+ pop %ds
+ movb $0,GTB_FLAGS(%edx)
+ jmp test_all_events
+
+ ALIGN
+ENTRY(ret_from_intr)
+ GET_CURRENT(%ebx)
+ movb CS(%esp),%al
+ testb $3,%al # return to non-supervisor?
+ jne test_all_events
+ jmp restore_all_xen
+
+ENTRY(divide_error)
+ pushl $0 # no error code
+ pushl $ SYMBOL_NAME(do_divide_error)
+ ALIGN
+error_code:
+ pushl %fs
+ pushl %es
+ pushl %ds
+ pushl %eax
+ xorl %eax,%eax
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %edx
+ decl %eax # eax = -1
+ pushl %ecx
+ pushl %ebx
+ cld
+ movl %gs,%ecx
+ movl ORIG_EAX(%esp), %esi # get the error code
+ movl GS(%esp), %edi # get the function address
+ movl %eax, ORIG_EAX(%esp)
+ movl %ecx, GS(%esp)
+ movl $(__HYPERVISOR_DS),%edx
+ movl %edx,%ds
+ movl %edx,%es
+ movl %edx,%fs
+ movl %edx,%gs
+ movl %esp,%edx
+ pushl %esi # push the error code
+ pushl %edx # push the pt_regs pointer
+ GET_CURRENT(%ebx)
+ call *%edi
+ addl $8,%esp
+ movb CS(%esp),%al
+ testb $3,%al
+ je restore_all_xen
+ jmp process_guest_exception_and_events
+
+ENTRY(coprocessor_error)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_coprocessor_error)
+ jmp error_code
+
+ENTRY(simd_coprocessor_error)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_simd_coprocessor_error)
+ jmp error_code
+
+ENTRY(device_not_available)
+ pushl $0
+ pushl $SYMBOL_NAME(math_state_restore)
+ jmp error_code
+
+ENTRY(debug)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_debug)
+ jmp error_code
+
+ENTRY(int3)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_int3)
+ jmp error_code
+
+ENTRY(overflow)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_overflow)
+ jmp error_code
+
+ENTRY(bounds)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_bounds)
+ jmp error_code
+
+ENTRY(invalid_op)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_invalid_op)
+ jmp error_code
+
+ENTRY(coprocessor_segment_overrun)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun)
+ jmp error_code
+
+ENTRY(invalid_TSS)
+ pushl $ SYMBOL_NAME(do_invalid_TSS)
+ jmp error_code
+
+ENTRY(segment_not_present)
+ pushl $ SYMBOL_NAME(do_segment_not_present)
+ jmp error_code
+
+ENTRY(stack_segment)
+ pushl $ SYMBOL_NAME(do_stack_segment)
+ jmp error_code
+
+ENTRY(general_protection)
+ pushl $ SYMBOL_NAME(do_general_protection)
+ jmp error_code
+
+ENTRY(alignment_check)
+ pushl $ SYMBOL_NAME(do_alignment_check)
+ jmp error_code
+
+ENTRY(page_fault)
+ pushl $ SYMBOL_NAME(do_page_fault)
+ jmp error_code
+
+ENTRY(machine_check)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_machine_check)
+ jmp error_code
+
+ENTRY(spurious_interrupt_bug)
+ pushl $0
+ pushl $ SYMBOL_NAME(do_spurious_interrupt_bug)
+ jmp error_code
+
+ENTRY(nmi)
+ # Save state but do not trash the segment registers!
+ # We may otherwise be unable to reload them or copy them to ring 1.
+ pushl %eax
+ SAVE_ALL_NOSEGREGS
+
+ # Check for hardware problems. These are always fatal so we can
+ # reload DS and ES when handling them.
+ inb $0x61,%al
+ testb $0x80,%al
+ jne nmi_parity_err
+ testb $0x40,%al
+ jne nmi_io_err
+ movl %eax,%ebx
+
+ # Okay, its almost a normal NMI tick. We can only process it if:
+ # A. We are the outermost Xen activation (in which case we have
+ # the selectors safely saved on our stack)
+ # B. DS-GS all contain sane Xen values.
+ # In all other cases we bail without touching DS-GS, as we have
+ # interrupted an enclosing Xen activation in tricky prologue or
+ # epilogue code.
+ movb CS(%esp),%al
+ testb $3,%al
+ jne do_watchdog_tick
+ movl DS(%esp),%eax
+ cmpw $(__HYPERVISOR_DS),%ax
+ jne nmi_badseg
+ movl ES(%esp),%eax
+ cmpw $(__HYPERVISOR_DS),%ax
+ jne nmi_badseg
+ movl FS(%esp),%eax
+ cmpw $(__HYPERVISOR_DS),%ax
+ jne nmi_badseg
+ movl GS(%esp),%eax
+ cmpw $(__HYPERVISOR_DS),%ax
+ jne nmi_badseg
+
+do_watchdog_tick:
+ movl $(__HYPERVISOR_DS),%edx
+ movl %edx,%ds
+ movl %edx,%es
+ movl %esp,%edx
+ pushl %ebx # reason
+ pushl %edx # regs
+ call SYMBOL_NAME(do_nmi)
+ addl $8,%esp
+ movb CS(%esp),%al
+ testb $3,%al
+ je restore_all_xen
+ GET_CURRENT(%ebx)
+ jmp restore_all_guest
+
+nmi_badseg:
+ popl %ebx
+ popl %ecx
+ popl %edx
+ popl %esi
+ popl %edi
+ popl %ebp
+ popl %eax
+ addl $20,%esp
+ iret
+
+nmi_parity_err:
+ movl $(__HYPERVISOR_DS),%edx
+ movl %edx,%ds
+ movl %edx,%es
+ jmp SYMBOL_NAME(mem_parity_error)
+
+nmi_io_err:
+ movl $(__HYPERVISOR_DS),%edx
+ movl %edx,%ds
+ movl %edx,%es
+ jmp SYMBOL_NAME(io_check_error)
+
+.data
+ENTRY(hypervisor_call_table)
+ .long SYMBOL_NAME(do_set_trap_table) /* 0 */
+ .long SYMBOL_NAME(do_mmu_update)
+ .long SYMBOL_NAME(do_console_write)
+ .long SYMBOL_NAME(do_set_gdt)
+ .long SYMBOL_NAME(do_stack_switch)
+ .long SYMBOL_NAME(do_set_callbacks) /* 5 */
+ .long SYMBOL_NAME(do_ni_syscall) # do_net_io_op
+ .long SYMBOL_NAME(do_fpu_taskswitch)
+ .long SYMBOL_NAME(do_sched_op)
+ .long SYMBOL_NAME(do_dom0_op)
+ .long SYMBOL_NAME(do_ni_syscall) /* 10 */ # do_network_op
+ .long SYMBOL_NAME(do_ni_syscall) # do_block_io_op
+ .long SYMBOL_NAME(do_set_debugreg)
+ .long SYMBOL_NAME(do_get_debugreg)
+ .long SYMBOL_NAME(do_update_descriptor)
+ .long SYMBOL_NAME(do_set_fast_trap) /* 15 */
+ .long SYMBOL_NAME(do_dom_mem_op)
+ .long SYMBOL_NAME(do_multicall)
+ .long SYMBOL_NAME(do_kbd_op)
+ .long SYMBOL_NAME(do_update_va_mapping)
+ .long SYMBOL_NAME(do_set_timer_op) /* 20 */
+ .long SYMBOL_NAME(do_event_channel_op)
+ .long SYMBOL_NAME(do_xen_version)
+ .long SYMBOL_NAME(do_console_io)
+ .long SYMBOL_NAME(do_physdev_op)
+ .long SYMBOL_NAME(do_update_va_mapping_otherdomain) /* 25 */
+ .rept NR_syscalls-((.-hypervisor_call_table)/4)
+ .long SYMBOL_NAME(do_ni_syscall)
+ .endr
--- /dev/null
+/*
+ * linux/arch/i386/mm/extable.c
+ */
+
+#include <xen/config.h>
+#include <xen/module.h>
+#include <xen/spinlock.h>
+#include <asm/uaccess.h>
+
+extern const struct exception_table_entry __start___ex_table[];
+extern const struct exception_table_entry __stop___ex_table[];
+
+static inline unsigned long
+search_one_table(const struct exception_table_entry *first,
+ const struct exception_table_entry *last,
+ unsigned long value)
+{
+ while (first <= last) {
+ const struct exception_table_entry *mid;
+ long diff;
+
+ mid = (last - first) / 2 + first;
+ diff = mid->insn - value;
+ if (diff == 0)
+ return mid->fixup;
+ else if (diff < 0)
+ first = mid+1;
+ else
+ last = mid-1;
+ }
+ return 0;
+}
+
+extern spinlock_t modlist_lock;
+
+unsigned long
+search_exception_table(unsigned long addr)
+{
+ unsigned long ret = 0;
+
+#ifndef CONFIG_MODULES
+ /* There is only the kernel to search. */
+ ret = search_one_table(__start___ex_table, __stop___ex_table-1, addr);
+ return ret;
+#else
+ unsigned long flags;
+ /* The kernel is the last "module" -- no need to treat it special. */
+ struct module *mp;
+
+ spin_lock_irqsave(&modlist_lock, flags);
+ for (mp = module_list; mp != NULL; mp = mp->next) {
+ if (mp->ex_table_start == NULL || !(mp->flags&(MOD_RUNNING|MOD_INITIALIZING)))
+ continue;
+ ret = search_one_table(mp->ex_table_start,
+ mp->ex_table_end - 1, addr);
+ if (ret)
+ break;
+ }
+ spin_unlock_irqrestore(&modlist_lock, flags);
+ return ret;
+#endif
+}
--- /dev/null
+/******************************************************************************
+ * flushtlb.c
+ *
+ * TLB flushes are timestamped using a global virtual 'clock' which ticks
+ * on any TLB flush on any processor.
+ *
+ * Copyright (c) 2003, K A Fraser
+ */
+
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <xen/interrupt.h>
+#include <asm/flushtlb.h>
+
+u32 tlbflush_clock;
+u32 tlbflush_time[NR_CPUS];
+
+void tlb_clocktick(void)
+{
+ u32 y, ny;
+
+ /* Tick the clock. 'y' contains the current time after the tick. */
+ ny = tlbflush_clock;
+ do {
+#ifdef CONFIG_SMP
+ if ( unlikely(((y = ny+1) & TLBCLOCK_EPOCH_MASK) == 0) )
+ {
+ raise_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ);
+ y = tlbflush_clock;
+ break;
+ }
+#else
+ y = ny+1;
+#endif
+ }
+ while ( unlikely((ny = cmpxchg(&tlbflush_clock, y-1, y)) != y-1) );
+
+ /* Update this CPU's timestamp to new time. */
+ tlbflush_time[smp_processor_id()] = y;
+}
--- /dev/null
+/*
+ * linux/arch/i386/kernel/i387.c
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+
+void init_fpu(void)
+{
+ __asm__("fninit");
+ if ( cpu_has_xmm ) load_mxcsr(0x1f80);
+ set_bit(PF_DONEFPUINIT, ¤t->flags);
+}
+
+static inline void __save_init_fpu( struct task_struct *tsk )
+{
+ if ( cpu_has_fxsr ) {
+ asm volatile( "fxsave %0 ; fnclex"
+ : "=m" (tsk->thread.i387.fxsave) );
+ } else {
+ asm volatile( "fnsave %0 ; fwait"
+ : "=m" (tsk->thread.i387.fsave) );
+ }
+ clear_bit(PF_USEDFPU, &tsk->flags);
+}
+
+void save_init_fpu( struct task_struct *tsk )
+{
+ /*
+ * The guest OS may have set the 'virtual STTS' flag.
+ * This causes us to set the real flag, so we'll need
+ * to temporarily clear it while saving f-p state.
+ */
+ if ( test_bit(PF_GUEST_STTS, &tsk->flags) ) clts();
+ __save_init_fpu(tsk);
+ stts();
+}
+
+void restore_fpu( struct task_struct *tsk )
+{
+ if ( cpu_has_fxsr ) {
+ asm volatile( "fxrstor %0"
+ : : "m" (tsk->thread.i387.fxsave) );
+ } else {
+ asm volatile( "frstor %0"
+ : : "m" (tsk->thread.i387.fsave) );
+ }
+}
--- /dev/null
+/******************************************************************************
+ * i8259.c
+ *
+ * Well, this is required for SMP systems as well, as it build interrupt
+ * tables for IO APICS as well as uniprocessor 8259-alikes.
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <asm/ptrace.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/interrupt.h>
+#include <xen/irq.h>
+
+#include <asm/atomic.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/desc.h>
+#include <asm/bitops.h>
+#include <xen/delay.h>
+#include <asm/apic.h>
+
+
+/*
+ * Common place to define all x86 IRQ vectors
+ *
+ * This builds up the IRQ handler stubs using some ugly macros in irq.h
+ *
+ * These macros create the low-level assembly IRQ routines that save
+ * register context and call do_IRQ(). do_IRQ() then does all the
+ * operations that are needed to keep the AT (or SMP IOAPIC)
+ * interrupt-controller happy.
+ */
+
+BUILD_COMMON_IRQ()
+
+#define BI(x,y) \
+ BUILD_IRQ(x##y)
+
+#define BUILD_16_IRQS(x) \
+ BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
+ BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
+ BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
+ BI(x,c) BI(x,d) BI(x,e) BI(x,f)
+
+/*
+ * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
+ * (these are usually mapped to vectors 0x30-0x3f)
+ */
+ BUILD_16_IRQS(0x0)
+
+#ifdef CONFIG_X86_IO_APIC
+/*
+ * The IO-APIC gives us many more interrupt sources. Most of these
+ * are unused but an SMP system is supposed to have enough memory ...
+ * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
+ * across the spectrum, so we really want to be prepared to get all
+ * of these. Plus, more powerful systems might have more than 64
+ * IO-APIC registers.
+ *
+ * (these are usually mapped into the 0x30-0xff vector range)
+ */
+ BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
+ BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
+ BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
+ BUILD_16_IRQS(0xc)
+#endif
+
+#undef BUILD_16_IRQS
+#undef BI
+
+
+/*
+ * The following vectors are part of the Linux architecture, there
+ * is no hardware IRQ pin equivalent for them, they are triggered
+ * through the ICC by us (IPIs)
+ */
+#ifdef CONFIG_SMP
+ BUILD_SMP_INTERRUPT(event_check_interrupt,EVENT_CHECK_VECTOR)
+ BUILD_SMP_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
+ BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
+#endif
+
+/*
+ * every pentium local APIC has two 'local interrupts', with a
+ * soft-definable vector attached to both interrupts, one of
+ * which is a timer interrupt, the other one is error counter
+ * overflow. Linux uses the local APIC timer interrupt to get
+ * a much simpler SMP time architecture:
+ */
+#ifdef CONFIG_X86_LOCAL_APIC
+ BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
+ BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
+ BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
+#endif
+
+#define IRQ(x,y) \
+ IRQ##x##y##_interrupt
+
+#define IRQLIST_16(x) \
+ IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
+ IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
+ IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
+ IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
+
+ void (*interrupt[NR_IRQS])(void) = {
+ IRQLIST_16(0x0),
+
+#ifdef CONFIG_X86_IO_APIC
+ IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3),
+ IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
+ IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
+ IRQLIST_16(0xc)
+#endif
+ };
+
+#undef IRQ
+#undef IRQLIST_16
+
+/*
+ * This is the 'legacy' 8259A Programmable Interrupt Controller,
+ * present in the majority of PC/AT boxes.
+ * plus some generic x86 specific things if generic specifics makes
+ * any sense at all.
+ * this file should become arch/i386/kernel/irq.c when the old irq.c
+ * moves to arch independent land
+ */
+
+spinlock_t i8259A_lock = SPIN_LOCK_UNLOCKED;
+
+static void end_8259A_irq (unsigned int irq)
+{
+ if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
+ enable_8259A_irq(irq);
+}
+
+#define shutdown_8259A_irq disable_8259A_irq
+
+void mask_and_ack_8259A(unsigned int);
+
+static unsigned int startup_8259A_irq(unsigned int irq)
+{
+ enable_8259A_irq(irq);
+ return 0; /* never anything pending */
+}
+
+static struct hw_interrupt_type i8259A_irq_type = {
+ "XT-PIC",
+ startup_8259A_irq,
+ shutdown_8259A_irq,
+ enable_8259A_irq,
+ disable_8259A_irq,
+ mask_and_ack_8259A,
+ end_8259A_irq,
+ NULL
+};
+
+/*
+ * 8259A PIC functions to handle ISA devices:
+ */
+
+/*
+ * This contains the irq mask for both 8259A irq controllers,
+ */
+static unsigned int cached_irq_mask = 0xffff;
+
+#define __byte(x,y) (((unsigned char *)&(y))[x])
+#define cached_21 (__byte(0,cached_irq_mask))
+#define cached_A1 (__byte(1,cached_irq_mask))
+
+/*
+ * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
+ * boards the timer interrupt is not really connected to any IO-APIC pin,
+ * it's fed to the master 8259A's IR0 line only.
+ *
+ * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
+ * this 'mixed mode' IRQ handling costs nothing because it's only used
+ * at IRQ setup time.
+ */
+unsigned long io_apic_irqs;
+
+void disable_8259A_irq(unsigned int irq)
+{
+ unsigned int mask = 1 << irq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+ cached_irq_mask |= mask;
+ if (irq & 8)
+ outb(cached_A1,0xA1);
+ else
+ outb(cached_21,0x21);
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+void enable_8259A_irq(unsigned int irq)
+{
+ unsigned int mask = ~(1 << irq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+ cached_irq_mask &= mask;
+ if (irq & 8)
+ outb(cached_A1,0xA1);
+ else
+ outb(cached_21,0x21);
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+int i8259A_irq_pending(unsigned int irq)
+{
+ unsigned int mask = 1<<irq;
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+ if (irq < 8)
+ ret = inb(0x20) & mask;
+ else
+ ret = inb(0xA0) & (mask >> 8);
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+
+ return ret;
+}
+
+void make_8259A_irq(unsigned int irq)
+{
+ disable_irq_nosync(irq);
+ io_apic_irqs &= ~(1<<irq);
+ irq_desc[irq].handler = &i8259A_irq_type;
+ enable_irq(irq);
+}
+
+/*
+ * This function assumes to be called rarely. Switching between
+ * 8259A registers is slow.
+ * This has to be protected by the irq controller spinlock
+ * before being called.
+ */
+static inline int i8259A_irq_real(unsigned int irq)
+{
+ int value;
+ int irqmask = 1<<irq;
+
+ if (irq < 8) {
+ outb(0x0B,0x20); /* ISR register */
+ value = inb(0x20) & irqmask;
+ outb(0x0A,0x20); /* back to the IRR register */
+ return value;
+ }
+ outb(0x0B,0xA0); /* ISR register */
+ value = inb(0xA0) & (irqmask >> 8);
+ outb(0x0A,0xA0); /* back to the IRR register */
+ return value;
+}
+
+/*
+ * Careful! The 8259A is a fragile beast, it pretty
+ * much _has_ to be done exactly like this (mask it
+ * first, _then_ send the EOI, and the order of EOI
+ * to the two 8259s is important!
+ */
+void mask_and_ack_8259A(unsigned int irq)
+{
+ unsigned int irqmask = 1 << irq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+ /*
+ * Lightweight spurious IRQ detection. We do not want
+ * to overdo spurious IRQ handling - it's usually a sign
+ * of hardware problems, so we only do the checks we can
+ * do without slowing down good hardware unnecesserily.
+ *
+ * Note that IRQ7 and IRQ15 (the two spurious IRQs
+ * usually resulting from the 8259A-1|2 PICs) occur
+ * even if the IRQ is masked in the 8259A. Thus we
+ * can check spurious 8259A IRQs without doing the
+ * quite slow i8259A_irq_real() call for every IRQ.
+ * This does not cover 100% of spurious interrupts,
+ * but should be enough to warn the user that there
+ * is something bad going on ...
+ */
+ if (cached_irq_mask & irqmask)
+ goto spurious_8259A_irq;
+ cached_irq_mask |= irqmask;
+
+ handle_real_irq:
+ if (irq & 8) {
+ inb(0xA1); /* DUMMY - (do we need this?) */
+ outb(cached_A1,0xA1);
+ outb(0x60+(irq&7),0xA0);/* 'Specific EOI' to slave */
+ outb(0x62,0x20); /* 'Specific EOI' to master-IRQ2 */
+ } else {
+ inb(0x21); /* DUMMY - (do we need this?) */
+ outb(cached_21,0x21);
+ outb(0x60+irq,0x20); /* 'Specific EOI' to master */
+ }
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+ return;
+
+ spurious_8259A_irq:
+ /*
+ * this is the slow path - should happen rarely.
+ */
+ if (i8259A_irq_real(irq))
+ /*
+ * oops, the IRQ _is_ in service according to the
+ * 8259A - not spurious, go handle it.
+ */
+ goto handle_real_irq;
+
+ {
+ static int spurious_irq_mask;
+ /*
+ * At this point we can be sure the IRQ is spurious,
+ * lets ACK and report it. [once per IRQ]
+ */
+ if (!(spurious_irq_mask & irqmask)) {
+ printk("spurious 8259A interrupt: IRQ%d.\n", irq);
+ spurious_irq_mask |= irqmask;
+ }
+ atomic_inc(&irq_err_count);
+ /*
+ * Theoretically we do not have to handle this IRQ,
+ * but in Linux this does not cause problems and is
+ * simpler for us.
+ */
+ goto handle_real_irq;
+ }
+}
+
+void __init init_8259A(int auto_eoi)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+
+ outb(0xff, 0x21); /* mask all of 8259A-1 */
+ outb(0xff, 0xA1); /* mask all of 8259A-2 */
+
+ /*
+ * outb_p - this has to work on a wide range of PC hardware.
+ */
+ outb_p(0x11, 0x20); /* ICW1: select 8259A-1 init */
+ outb_p(0x30 + 0, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
+ outb_p(0x04, 0x21); /* 8259A-1 (the master) has a slave on IR2 */
+ if (auto_eoi)
+ outb_p(0x03, 0x21); /* master does Auto EOI */
+ else
+ outb_p(0x01, 0x21); /* master expects normal EOI */
+
+ outb_p(0x11, 0xA0); /* ICW1: select 8259A-2 init */
+ outb_p(0x30 + 8, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
+ outb_p(0x02, 0xA1); /* 8259A-2 is a slave on master's IR2 */
+ outb_p(0x01, 0xA1); /* (slave's support for AEOI in flat mode
+ is to be investigated) */
+
+ if (auto_eoi)
+ /*
+ * in AEOI mode we just have to mask the interrupt
+ * when acking.
+ */
+ i8259A_irq_type.ack = disable_8259A_irq;
+ else
+ i8259A_irq_type.ack = mask_and_ack_8259A;
+
+ udelay(100); /* wait for 8259A to initialize */
+
+ outb(cached_21, 0x21); /* restore master IRQ mask */
+ outb(cached_A1, 0xA1); /* restore slave IRQ mask */
+
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+
+/*
+ * IRQ2 is cascade interrupt to second interrupt controller
+ */
+
+static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL};
+
+void __init init_ISA_irqs (void)
+{
+ int i;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ init_bsp_APIC();
+#endif
+ init_8259A(0);
+
+ for (i = 0; i < NR_IRQS; i++) {
+ irq_desc[i].status = IRQ_DISABLED;
+ irq_desc[i].action = 0;
+ irq_desc[i].depth = 1;
+
+ if (i < 16) {
+ /*
+ * 16 old-style INTA-cycle interrupts:
+ */
+ irq_desc[i].handler = &i8259A_irq_type;
+ } else {
+ /*
+ * 'high' PCI IRQs filled in on demand
+ */
+ irq_desc[i].handler = &no_irq_type;
+ }
+ }
+}
+
+void __init init_IRQ(void)
+{
+ int i;
+
+ init_ISA_irqs();
+
+ /*
+ * Cover the whole vector space, no vector can escape
+ * us. (some of these will be overridden and become
+ * 'special' SMP interrupts)
+ */
+ for (i = 0; i < NR_IRQS; i++) {
+ int vector = FIRST_EXTERNAL_VECTOR + i;
+ if (vector != HYPERVISOR_CALL_VECTOR)
+ set_intr_gate(vector, interrupt[i]);
+ }
+
+#ifdef CONFIG_SMP
+ /*
+ * IRQ0 must be given a fixed assignment and initialized,
+ * because it's used before the IO-APIC is set up.
+ */
+ set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
+
+ /*
+ * The reschedule interrupt is a CPU-to-CPU reschedule-helper
+ * IPI, driven by wakeup.
+ */
+ set_intr_gate(EVENT_CHECK_VECTOR, event_check_interrupt);
+
+ /* IPI for invalidation */
+ set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+
+ /* IPI for generic function call */
+ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ /* self generated IPI for local APIC timer */
+ set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+
+ /* IPI vectors for APIC spurious and error interrupts */
+ set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
+ set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+#endif
+
+ /*
+ * Set the clock to HZ Hz, we already have a valid
+ * vector now:
+ */
+#define CLOCK_TICK_RATE 1193180 /* crystal freq (Hz) */
+#define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
+ outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */
+ outb_p(LATCH & 0xff , 0x40); /* LSB */
+ outb(LATCH >> 8 , 0x40); /* MSB */
+
+ setup_irq(2, &irq2);
+}
+
--- /dev/null
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <asm/desc.h>
+
+struct task_struct idle0_task = IDLE0_TASK(idle0_task);
+
+/*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+ * no more per-task TSS's. The TSS size is kept cacheline-aligned
+ * so they are allowed to end up in the .data.cacheline_aligned
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+ */
+struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS };
+
--- /dev/null
+/*
+ * Intel IO-APIC support for multi-Pentium hosts.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
+ *
+ * Many thanks to Stig Venaas for trying out countless experimental
+ * patches and reporting/debugging problems patiently!
+ *
+ * (c) 1999, Multiple IO-APIC support, developed by
+ * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
+ * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
+ * further tested and cleaned up by Zach Brown <zab@redhat.com>
+ * and Ingo Molnar <mingo@redhat.com>
+ *
+ * Fixes
+ * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
+ * thanks to Eric Gilmore
+ * and Rolf G. Tews
+ * for testing these extensively
+ * Paul Diefenbaugh : Added full ACPI support
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/interrupt.h>
+#include <xen/irq.h>
+#include <xen/delay.h>
+#include <xen/sched.h>
+#include <xen/config.h>
+#include <asm/mc146818rtc.h>
+#include <asm/io.h>
+#include <asm/mpspec.h>
+#include <asm/io_apic.h>
+#include <asm/smp.h>
+#include <asm/desc.h>
+#include <asm/smpboot.h>
+
+#ifdef CONFIG_X86_IO_APIC
+
+#undef APIC_LOCKUP_DEBUG
+
+#define APIC_LOCKUP_DEBUG
+
+static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED;
+
+unsigned int int_dest_addr_mode = APIC_DEST_LOGICAL;
+unsigned char int_delivery_mode = dest_LowestPrio;
+
+
+/*
+ * # of IRQ routing registers
+ */
+int nr_ioapic_registers[MAX_IO_APICS];
+
+/*
+ * Rough estimation of how many shared IRQs there are, can
+ * be changed anytime.
+ */
+#define MAX_PLUS_SHARED_IRQS NR_IRQS
+#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+static struct irq_pin_list {
+ int apic, pin, next;
+} irq_2_pin[PIN_MAP_SIZE];
+
+/*
+ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
+ * shared ISA-space IRQs, so we have to support them. We are super
+ * fast in the common case, and fast for shared ISA-space IRQs.
+ */
+static void __init add_pin_to_irq(unsigned int irq, int apic, int pin)
+{
+ static int first_free_entry = NR_IRQS;
+ struct irq_pin_list *entry = irq_2_pin + irq;
+
+ while (entry->next)
+ entry = irq_2_pin + entry->next;
+
+ if (entry->pin != -1) {
+ entry->next = first_free_entry;
+ entry = irq_2_pin + entry->next;
+ if (++first_free_entry >= PIN_MAP_SIZE)
+ panic("io_apic.c: whoops");
+ }
+ entry->apic = apic;
+ entry->pin = pin;
+}
+
+/*
+ * Reroute an IRQ to a different pin.
+ */
+static void __init replace_pin_at_irq(unsigned int irq,
+ int oldapic, int oldpin,
+ int newapic, int newpin)
+{
+ struct irq_pin_list *entry = irq_2_pin + irq;
+
+ while (1) {
+ if (entry->apic == oldapic && entry->pin == oldpin) {
+ entry->apic = newapic;
+ entry->pin = newpin;
+ }
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+}
+
+#define __DO_ACTION(R, ACTION, FINAL) \
+ \
+{ \
+ int pin; \
+ struct irq_pin_list *entry = irq_2_pin + irq; \
+ \
+ for (;;) { \
+ unsigned int reg; \
+ pin = entry->pin; \
+ if (pin == -1) \
+ break; \
+ reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
+ reg ACTION; \
+ io_apic_write(entry->apic, 0x10 + R + pin*2, reg); \
+ if (!entry->next) \
+ break; \
+ entry = irq_2_pin + entry->next; \
+ } \
+ FINAL; \
+}
+
+#define DO_ACTION(name,R,ACTION, FINAL) \
+ \
+ static void name##_IO_APIC_irq (unsigned int irq) \
+ __DO_ACTION(R, ACTION, FINAL)
+
+DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) )
+DO_ACTION( __unmask, 0, &= 0xfffeffff, )
+DO_ACTION( __edge, 0, &= 0xffff7fff, )
+DO_ACTION( __level, 0, |= 0x00008000, )
+
+static void mask_IO_APIC_irq (unsigned int irq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __mask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void unmask_IO_APIC_irq (unsigned int irq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __unmask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+{
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ /* Check delivery_mode to be sure we're not clearing an SMI pin */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+ *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ if (entry.delivery_mode == dest_SMI)
+ return;
+
+ /*
+ * Disable it in the IO-APIC irq-routing table:
+ */
+ memset(&entry, 0, sizeof(entry));
+ entry.mask = 1;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void clear_IO_APIC (void)
+{
+ int apic, pin;
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+ clear_IO_APIC_pin(apic, pin);
+}
+
+static void set_ioapic_affinity (unsigned int irq, unsigned long mask)
+{
+ unsigned long flags;
+
+ /*
+ * Only the first 8 bits are valid.
+ */
+ mask = mask << 24;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __DO_ACTION(1, = mask, )
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+#define balance_irq(_irq) ((void)0)
+
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+int pirq_entries [MAX_PIRQS];
+int pirqs_enabled;
+
+int skip_ioapic_setup;
+#if 0
+
+static int __init noioapic_setup(char *str)
+{
+ skip_ioapic_setup = 1;
+ return 1;
+}
+
+__setup("noapic", noioapic_setup);
+
+static int __init ioapic_setup(char *str)
+{
+ skip_ioapic_setup = 0;
+ return 1;
+}
+
+__setup("apic", ioapic_setup);
+
+
+
+static int __init ioapic_pirq_setup(char *str)
+{
+ int i, max;
+ int ints[MAX_PIRQS+1];
+
+ get_options(str, ARRAY_SIZE(ints), ints);
+
+ for (i = 0; i < MAX_PIRQS; i++)
+ pirq_entries[i] = -1;
+
+ pirqs_enabled = 1;
+ printk(KERN_INFO "PIRQ redirection, working around broken MP-BIOS.\n");
+ max = MAX_PIRQS;
+ if (ints[0] < MAX_PIRQS)
+ max = ints[0];
+
+ for (i = 0; i < max; i++) {
+ printk(KERN_DEBUG "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+ /*
+ * PIRQs are mapped upside down, usually.
+ */
+ pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+ }
+ return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+
+#endif
+
+/*
+ * Find the IRQ entry number of a certain pin.
+ */
+static int __init find_irq_entry(int apic, int pin, int type)
+{
+ int i;
+
+ for (i = 0; i < mp_irq_entries; i++)
+ if (mp_irqs[i].mpc_irqtype == type &&
+ (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
+ mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
+ mp_irqs[i].mpc_dstirq == pin)
+ return i;
+
+ return -1;
+}
+
+/*
+ * Find the pin to which IRQ[irq] (ISA) is connected
+ */
+static int __init find_isa_irq_pin(int irq, int type)
+{
+ int i;
+
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].mpc_srcbus;
+
+ if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
+ (mp_irqs[i].mpc_irqtype == type) &&
+ (mp_irqs[i].mpc_srcbusirq == irq))
+
+ return mp_irqs[i].mpc_dstirq;
+ }
+ return -1;
+}
+
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+static int pin_2_irq(int idx, int apic, int pin);
+
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
+{
+ int apic, i, best_guess = -1;
+
+ Dprintk("querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+ bus, slot, pin);
+ if ((mp_bus_id_to_pci_bus==NULL) || (mp_bus_id_to_pci_bus[bus] == -1)) {
+ printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+ return -1;
+ }
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].mpc_srcbus;
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
+ mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+ break;
+
+ if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
+ !mp_irqs[i].mpc_irqtype &&
+ (bus == lbus) &&
+ (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
+ int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
+
+ if (!(apic || IO_APIC_IRQ(irq)))
+ continue;
+
+ if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
+ return irq;
+ /*
+ * Use the first all-but-pin matching entry as a
+ * best-guess fuzzy result for broken mptables.
+ */
+ if (best_guess < 0)
+ best_guess = irq;
+ }
+ }
+ return best_guess;
+}
+
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int __init EISA_ELCR(unsigned int irq)
+{
+ if (irq < 16) {
+ unsigned int port = 0x4d0 + (irq >> 3);
+ return (inb(port) >> (irq & 7)) & 1;
+ }
+ printk(KERN_INFO "Broken MPtable reports ISA irq %d\n", irq);
+ return 0;
+}
+
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value. If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
+#define default_EISA_polarity(idx) (0)
+
+/* ISA interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_ISA_trigger(idx) (0)
+#define default_ISA_polarity(idx) (0)
+
+/* PCI interrupts are always polarity one level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_PCI_trigger(idx) (1)
+#define default_PCI_polarity(idx) (1)
+
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx) (1)
+#define default_MCA_polarity(idx) (0)
+
+static int __init MPBIOS_polarity(int idx)
+{
+ int bus = mp_irqs[idx].mpc_srcbus;
+ int polarity;
+
+ /*
+ * Determine IRQ line polarity (high active or low active):
+ */
+ switch (mp_irqs[idx].mpc_irqflag & 3)
+ {
+ case 0: /* conforms, ie. bus-type dependent polarity */
+ {
+ switch (mp_bus_id_to_type[bus])
+ {
+ case MP_BUS_ISA: /* ISA pin */
+ {
+ polarity = default_ISA_polarity(idx);
+ break;
+ }
+ case MP_BUS_EISA: /* EISA pin */
+ {
+ polarity = default_EISA_polarity(idx);
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ polarity = default_PCI_polarity(idx);
+ break;
+ }
+ case MP_BUS_MCA: /* MCA pin */
+ {
+ polarity = default_MCA_polarity(idx);
+ break;
+ }
+ default:
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ }
+ break;
+ }
+ case 1: /* high active */
+ {
+ polarity = 0;
+ break;
+ }
+ case 2: /* reserved */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ case 3: /* low active */
+ {
+ polarity = 1;
+ break;
+ }
+ default: /* invalid */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ }
+ return polarity;
+}
+
+static int __init MPBIOS_trigger(int idx)
+{
+ int bus = mp_irqs[idx].mpc_srcbus;
+ int trigger;
+
+ /*
+ * Determine IRQ trigger mode (edge or level sensitive):
+ */
+ switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+ {
+ case 0: /* conforms, ie. bus-type dependent */
+ {
+ switch (mp_bus_id_to_type[bus])
+ {
+ case MP_BUS_ISA: /* ISA pin */
+ {
+ trigger = default_ISA_trigger(idx);
+ break;
+ }
+ case MP_BUS_EISA: /* EISA pin */
+ {
+ trigger = default_EISA_trigger(idx);
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ trigger = default_PCI_trigger(idx);
+ break;
+ }
+ case MP_BUS_MCA: /* MCA pin */
+ {
+ trigger = default_MCA_trigger(idx);
+ break;
+ }
+ default:
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 1;
+ break;
+ }
+ }
+ break;
+ }
+ case 1: /* edge */
+ {
+ trigger = 0;
+ break;
+ }
+ case 2: /* reserved */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 1;
+ break;
+ }
+ case 3: /* level */
+ {
+ trigger = 1;
+ break;
+ }
+ default: /* invalid */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 0;
+ break;
+ }
+ }
+ return trigger;
+}
+
+static inline int irq_polarity(int idx)
+{
+ return MPBIOS_polarity(idx);
+}
+
+static inline int irq_trigger(int idx)
+{
+ return MPBIOS_trigger(idx);
+}
+
+static int pin_2_irq(int idx, int apic, int pin)
+{
+ int irq, i;
+ int bus = mp_irqs[idx].mpc_srcbus;
+
+ /*
+ * Debugging check, we are in big trouble if this message pops up!
+ */
+ if (mp_irqs[idx].mpc_dstirq != pin)
+ printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+
+ switch (mp_bus_id_to_type[bus])
+ {
+ case MP_BUS_ISA: /* ISA pin */
+ case MP_BUS_EISA:
+ case MP_BUS_MCA:
+ {
+ irq = mp_irqs[idx].mpc_srcbusirq;
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ /*
+ * PCI IRQs are mapped in order
+ */
+ i = irq = 0;
+ while (i < apic)
+ irq += nr_ioapic_registers[i++];
+ irq += pin;
+ break;
+ }
+ default:
+ {
+ printk(KERN_ERR "unknown bus type %d.\n",bus);
+ irq = 0;
+ break;
+ }
+ }
+
+ /*
+ * PCI IRQ command line redirection. Yes, limits are hardcoded.
+ */
+ if ((pin >= 16) && (pin <= 23)) {
+ if (pirq_entries[pin-16] != -1) {
+ if (!pirq_entries[pin-16]) {
+ printk(KERN_DEBUG "disabling PIRQ%d\n", pin-16);
+ } else {
+ irq = pirq_entries[pin-16];
+ printk(KERN_DEBUG "using PIRQ%d -> IRQ %d\n",
+ pin-16, irq);
+ }
+ }
+ }
+ return irq;
+}
+
+static inline int IO_APIC_irq_trigger(int irq)
+{
+ int apic, idx, pin;
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ idx = find_irq_entry(apic,pin,mp_INT);
+ if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
+ return irq_trigger(idx);
+ }
+ }
+ /*
+ * nonexistent IRQs are edge default
+ */
+ return 0;
+}
+
+int irq_vector[NR_IRQS] = { FIRST_DEVICE_VECTOR , 0 };
+
+static int __init assign_irq_vector(int irq)
+{
+ static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
+ if (IO_APIC_VECTOR(irq) > 0)
+ return IO_APIC_VECTOR(irq);
+next:
+ current_vector += 8;
+
+ /* XXX Skip the guestOS -> Xen syscall vector! XXX */
+ if (current_vector == HYPERVISOR_CALL_VECTOR) goto next;
+ /* XXX Skip the Linux/BSD fast-trap vector! XXX */
+ if (current_vector == 0x80) goto next;
+
+ if (current_vector > FIRST_SYSTEM_VECTOR) {
+ offset++;
+ current_vector = FIRST_DEVICE_VECTOR + offset;
+ }
+
+ if (current_vector == FIRST_SYSTEM_VECTOR)
+ panic("ran out of interrupt sources!");
+
+ IO_APIC_VECTOR(irq) = current_vector;
+ return current_vector;
+}
+
+extern void (*interrupt[NR_IRQS])(void);
+
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
+
+static unsigned int startup_edge_ioapic_irq(unsigned int irq);
+#define shutdown_edge_ioapic_irq disable_edge_ioapic_irq
+#define enable_edge_ioapic_irq unmask_IO_APIC_irq
+static void disable_edge_ioapic_irq (unsigned int irq);
+static void ack_edge_ioapic_irq(unsigned int irq);
+static void end_edge_ioapic_irq (unsigned int i);
+static struct hw_interrupt_type ioapic_edge_irq_type = {
+ "IO-APIC-edge",
+ startup_edge_ioapic_irq,
+ shutdown_edge_ioapic_irq,
+ enable_edge_ioapic_irq,
+ disable_edge_ioapic_irq,
+ ack_edge_ioapic_irq,
+ end_edge_ioapic_irq,
+ set_ioapic_affinity,
+};
+
+static unsigned int startup_level_ioapic_irq (unsigned int irq);
+#define shutdown_level_ioapic_irq mask_IO_APIC_irq
+#define enable_level_ioapic_irq unmask_IO_APIC_irq
+#define disable_level_ioapic_irq mask_IO_APIC_irq
+static void mask_and_ack_level_ioapic_irq (unsigned int irq);
+static void end_level_ioapic_irq (unsigned int irq);
+static struct hw_interrupt_type ioapic_level_irq_type = {
+ "IO-APIC-level",
+ startup_level_ioapic_irq,
+ shutdown_level_ioapic_irq,
+ enable_level_ioapic_irq,
+ disable_level_ioapic_irq,
+ mask_and_ack_level_ioapic_irq,
+ end_level_ioapic_irq,
+ set_ioapic_affinity,
+};
+
+void __init setup_IO_APIC_irqs(void)
+{
+ struct IO_APIC_route_entry entry;
+ int apic, pin, idx, irq, first_notcon = 1, vector;
+ unsigned long flags;
+
+ printk(KERN_DEBUG "init IO_APIC IRQs\n");
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+
+ /*
+ * add it to the IO-APIC irq-routing table:
+ */
+ memset(&entry,0,sizeof(entry));
+
+ entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.dest_mode = (INT_DEST_ADDR_MODE != 0);
+ entry.mask = 0; /* enable IRQ */
+ entry.dest.logical.logical_dest = target_cpus();
+
+ idx = find_irq_entry(apic,pin,mp_INT);
+ if (idx == -1) {
+ if (first_notcon) {
+ printk(KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+ first_notcon = 0;
+ } else
+ printk(", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+ continue;
+ }
+
+ entry.trigger = irq_trigger(idx);
+ entry.polarity = irq_polarity(idx);
+
+ if (irq_trigger(idx)) {
+ entry.trigger = 1;
+ entry.mask = 1;
+ }
+
+ irq = pin_2_irq(idx, apic, pin);
+ /*
+ * skip adding the timer int on secondary nodes, which causes
+ * a small but painful rift in the time-space continuum
+ */
+ if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
+ && (apic != 0) && (irq == 0))
+ continue;
+ else
+ add_pin_to_irq(irq, apic, pin);
+
+ if (!apic && !IO_APIC_IRQ(irq))
+ continue;
+
+ if (IO_APIC_IRQ(irq)) {
+ vector = assign_irq_vector(irq);
+ entry.vector = vector;
+
+ if (IO_APIC_irq_trigger(irq))
+ irq_desc[irq].handler = &ioapic_level_irq_type;
+ else
+ irq_desc[irq].handler = &ioapic_edge_irq_type;
+
+ set_intr_gate(vector, interrupt[irq]);
+
+ if (!apic && (irq < 16))
+ disable_8259A_irq(irq);
+ }
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ }
+
+ if (!first_notcon)
+ printk(" not connected.\n");
+}
+
+/*
+ * Set up the 8259A-master output pin as broadcast to all
+ * CPUs.
+ */
+void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
+{
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ memset(&entry,0,sizeof(entry));
+
+ disable_8259A_irq(0);
+
+ /* mask LVT0 */
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+
+ /*
+ * We use logical delivery to get the timer IRQ
+ * to the first CPU.
+ */
+ entry.dest_mode = (INT_DEST_ADDR_MODE != 0);
+ entry.mask = 0; /* unmask IRQ now */
+ entry.dest.logical.logical_dest = target_cpus();
+ entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.polarity = 0;
+ entry.trigger = 0;
+ entry.vector = vector;
+
+ /*
+ * The timer IRQ doesn't have to know that behind the
+ * scene we have a 8259A-master in AEOI mode ...
+ */
+ irq_desc[0].handler = &ioapic_edge_irq_type;
+
+ /*
+ * Add it to the IO-APIC irq-routing table:
+ */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ enable_8259A_irq(0);
+}
+
+void __init UNEXPECTED_IO_APIC(void)
+{
+ printk(KERN_WARNING
+ "An unexpected IO-APIC was found. If this kernel release is less than\n"
+ "three months old please report this to linux-smp@vger.kernel.org\n");
+}
+
+void __init print_IO_APIC(void)
+{
+#ifndef NDEBUG
+ int apic, i;
+ struct IO_APIC_reg_00 reg_00;
+ struct IO_APIC_reg_01 reg_01;
+ struct IO_APIC_reg_02 reg_02;
+ struct IO_APIC_reg_03 reg_03;
+ unsigned long flags;
+
+ printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+ for (i = 0; i < nr_ioapics; i++)
+ printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+ mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+
+ /*
+ * We are a bit conservative about what we expect. We have to
+ * know about every hardware change ASAP.
+ */
+ printk(KERN_INFO "testing the IO APIC.......................\n");
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(int *)®_00 = io_apic_read(apic, 0);
+ *(int *)®_01 = io_apic_read(apic, 1);
+ if (reg_01.version >= 0x10)
+ *(int *)®_02 = io_apic_read(apic, 2);
+ if (reg_01.version >= 0x20)
+ *(int *)®_03 = io_apic_read(apic, 3);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ printk("\n");
+ printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+ printk(KERN_DEBUG ".... register #00: %08X\n", *(int *)®_00);
+ printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.ID);
+ printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.delivery_type);
+ printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.LTS);
+ if (reg_00.__reserved_0 || reg_00.__reserved_1 || reg_00.__reserved_2)
+ UNEXPECTED_IO_APIC();
+
+ printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01);
+ printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.entries);
+ if ( (reg_01.entries != 0x0f) && /* older (Neptune) boards */
+ (reg_01.entries != 0x17) && /* typical ISA+PCI boards */
+ (reg_01.entries != 0x1b) && /* Compaq Proliant boards */
+ (reg_01.entries != 0x1f) && /* dual Xeon boards */
+ (reg_01.entries != 0x22) && /* bigger Xeon boards */
+ (reg_01.entries != 0x2E) &&
+ (reg_01.entries != 0x3F)
+ )
+ UNEXPECTED_IO_APIC();
+
+ printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.PRQ);
+ printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.version);
+ if ( (reg_01.version != 0x01) && /* 82489DX IO-APICs */
+ (reg_01.version != 0x02) && /* VIA */
+ (reg_01.version != 0x03) && /* later VIA */
+ (reg_01.version != 0x10) && /* oldest IO-APICs */
+ (reg_01.version != 0x11) && /* Pentium/Pro IO-APICs */
+ (reg_01.version != 0x13) && /* Xeon IO-APICs */
+ (reg_01.version != 0x20) /* Intel P64H (82806 AA) */
+ )
+ UNEXPECTED_IO_APIC();
+ if (reg_01.__reserved_1 || reg_01.__reserved_2)
+ UNEXPECTED_IO_APIC();
+
+ /*
+ * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
+ * but the value of reg_02 is read as the previous read register
+ * value, so ignore it if reg_02 == reg_01.
+ */
+ if (reg_01.version >= 0x10 && *(int *)®_02 != *(int *)®_01) {
+ printk(KERN_DEBUG ".... register #02: %08X\n", *(int *)®_02);
+ printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.arbitration);
+ if (reg_02.__reserved_1 || reg_02.__reserved_2)
+ UNEXPECTED_IO_APIC();
+ }
+
+ /*
+ * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
+ * or reg_03, but the value of reg_0[23] is read as the previous read
+ * register value, so ignore it if reg_03 == reg_0[12].
+ */
+ if (reg_01.version >= 0x20 && *(int *)®_03 != *(int *)®_02 &&
+ *(int *)®_03 != *(int *)®_01) {
+ printk(KERN_DEBUG ".... register #03: %08X\n", *(int *)®_03);
+ printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.boot_DT);
+ if (reg_03.__reserved_1)
+ UNEXPECTED_IO_APIC();
+ }
+
+ printk(KERN_DEBUG ".... IRQ redirection table:\n");
+
+ printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
+ " Stat Dest Deli Vect: \n");
+
+ for (i = 0; i <= reg_01.entries; i++) {
+ struct IO_APIC_route_entry entry;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
+ *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ printk(KERN_DEBUG " %02x %03X %02X ",
+ i,
+ entry.dest.logical.logical_dest,
+ entry.dest.physical.physical_dest
+ );
+
+ printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
+ entry.mask,
+ entry.trigger,
+ entry.irr,
+ entry.polarity,
+ entry.delivery_status,
+ entry.dest_mode,
+ entry.delivery_mode,
+ entry.vector
+ );
+ }
+ }
+ printk(KERN_DEBUG "IRQ to pin mappings:\n");
+ for (i = 0; i < NR_IRQS; i++) {
+ struct irq_pin_list *entry = irq_2_pin + i;
+ if (entry->pin < 0)
+ continue;
+ printk(KERN_DEBUG "IRQ%d ", i);
+ for (;;) {
+ printk("-> %d:%d", entry->apic, entry->pin);
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+ printk("\n");
+ }
+
+ printk(KERN_INFO ".................................... done.\n");
+#endif
+}
+
+
+#if 0 /* Maybe useful for debugging, but not currently used anywhere. */
+
+static void print_APIC_bitfield (int base)
+{
+ unsigned int v;
+ int i, j;
+
+ printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
+ for (i = 0; i < 8; i++) {
+ v = apic_read(base + i*0x10);
+ for (j = 0; j < 32; j++) {
+ if (v & (1<<j))
+ printk("1");
+ else
+ printk("0");
+ }
+ printk("\n");
+ }
+}
+
+
+void /*__init*/ print_local_APIC(void * dummy)
+{
+ unsigned int v, ver, maxlvt;
+
+ printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+ smp_processor_id(), hard_smp_processor_id());
+ v = apic_read(APIC_ID);
+ printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v));
+ v = apic_read(APIC_LVR);
+ printk(KERN_INFO "... APIC VERSION: %08x\n", v);
+ ver = GET_APIC_VERSION(v);
+ maxlvt = get_maxlvt();
+
+ v = apic_read(APIC_TASKPRI);
+ printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
+
+ if (APIC_INTEGRATED(ver)) { /* !82489DX */
+ v = apic_read(APIC_ARBPRI);
+ printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+ v & APIC_ARBPRI_MASK);
+ v = apic_read(APIC_PROCPRI);
+ printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+ }
+
+ v = apic_read(APIC_EOI);
+ printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
+ v = apic_read(APIC_RRR);
+ printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+ v = apic_read(APIC_LDR);
+ printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
+ v = apic_read(APIC_DFR);
+ printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+ v = apic_read(APIC_SPIV);
+ printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
+
+ printk(KERN_DEBUG "... APIC ISR field:\n");
+ print_APIC_bitfield(APIC_ISR);
+ printk(KERN_DEBUG "... APIC TMR field:\n");
+ print_APIC_bitfield(APIC_TMR);
+ printk(KERN_DEBUG "... APIC IRR field:\n");
+ print_APIC_bitfield(APIC_IRR);
+
+ if (APIC_INTEGRATED(ver)) { /* !82489DX */
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
+ v = apic_read(APIC_ESR);
+ printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+ }
+
+ v = apic_read(APIC_ICR);
+ printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
+ v = apic_read(APIC_ICR2);
+ printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+
+ v = apic_read(APIC_LVTT);
+ printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
+
+ if (maxlvt > 3) { /* PC is LVT#4. */
+ v = apic_read(APIC_LVTPC);
+ printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
+ }
+ v = apic_read(APIC_LVT0);
+ printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
+ v = apic_read(APIC_LVT1);
+ printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
+
+ if (maxlvt > 2) { /* ERR is LVT#3. */
+ v = apic_read(APIC_LVTERR);
+ printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
+ }
+
+ v = apic_read(APIC_TMICT);
+ printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
+ v = apic_read(APIC_TMCCT);
+ printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
+ v = apic_read(APIC_TDCR);
+ printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+ printk("\n");
+}
+
+void print_all_local_APICs (void)
+{
+ smp_call_function(print_local_APIC, NULL, 1, 1);
+ print_local_APIC(NULL);
+}
+
+void /*__init*/ print_PIC(void)
+{
+ extern spinlock_t i8259A_lock;
+ unsigned int v, flags;
+
+ printk(KERN_DEBUG "\nprinting PIC contents\n");
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+
+ v = inb(0xa1) << 8 | inb(0x21);
+ printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
+
+ v = inb(0xa0) << 8 | inb(0x20);
+ printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
+
+ outb(0x0b,0xa0);
+ outb(0x0b,0x20);
+ v = inb(0xa0) << 8 | inb(0x20);
+ outb(0x0a,0xa0);
+ outb(0x0a,0x20);
+
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+
+ printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
+
+ v = inb(0x4d1) << 8 | inb(0x4d0);
+ printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
+}
+
+#endif /* 0 */
+
+
+static void __init enable_IO_APIC(void)
+{
+ struct IO_APIC_reg_01 reg_01;
+ int i;
+ unsigned long flags;
+
+ for (i = 0; i < PIN_MAP_SIZE; i++) {
+ irq_2_pin[i].pin = -1;
+ irq_2_pin[i].next = 0;
+ }
+ if (!pirqs_enabled)
+ for (i = 0; i < MAX_PIRQS; i++)
+ pirq_entries[i] = -1;
+
+ /*
+ * The number of IO-APIC IRQ registers (== #pins):
+ */
+ for (i = 0; i < nr_ioapics; i++) {
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(int *)®_01 = io_apic_read(i, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ nr_ioapic_registers[i] = reg_01.entries+1;
+ }
+
+ /*
+ * Do not trust the IO-APIC being empty at bootup
+ */
+ clear_IO_APIC();
+}
+
+/*
+ * Not an __init, needed by the reboot code
+ */
+void disable_IO_APIC(void)
+{
+ /*
+ * Clear the IO-APIC before rebooting:
+ */
+ clear_IO_APIC();
+
+ disconnect_bsp_APIC();
+}
+
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
+ */
+
+static void __init setup_ioapic_ids_from_mpc (void)
+{
+ struct IO_APIC_reg_00 reg_00;
+ unsigned long phys_id_present_map = phys_cpu_present_map;
+ int apic;
+ int i;
+ unsigned char old_id;
+ unsigned long flags;
+
+ if (clustered_apic_mode)
+ /* We don't have a good way to do this yet - hack */
+ phys_id_present_map = (u_long) 0xf;
+ /*
+ * Set the IOAPIC ID to the value stored in the MPC table.
+ */
+ for (apic = 0; apic < nr_ioapics; apic++) {
+
+ /* Read the register 0 value */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(int *)®_00 = io_apic_read(apic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ old_id = mp_ioapics[apic].mpc_apicid;
+
+ if (mp_ioapics[apic].mpc_apicid >= apic_broadcast_id) {
+ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+ apic, mp_ioapics[apic].mpc_apicid);
+ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+ reg_00.ID);
+ mp_ioapics[apic].mpc_apicid = reg_00.ID;
+ }
+
+ /*
+ * Sanity check, is the ID really free? Every APIC in a
+ * system must have a unique ID or we get lots of nice
+ * 'stuck on smp_invalidate_needed IPI wait' messages.
+ * I/O APIC IDs no longer have any meaning for xAPICs and SAPICs.
+ */
+ if ((clustered_apic_mode != CLUSTERED_APIC_XAPIC) &&
+ (phys_id_present_map & (1 << mp_ioapics[apic].mpc_apicid))) {
+ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+ apic, mp_ioapics[apic].mpc_apicid);
+ for (i = 0; i < 0xf; i++)
+ if (!(phys_id_present_map & (1 << i)))
+ break;
+ if (i >= apic_broadcast_id)
+ panic("Max APIC ID exceeded!\n");
+ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+ i);
+ phys_id_present_map |= 1 << i;
+ mp_ioapics[apic].mpc_apicid = i;
+ } else {
+ printk("Setting %d in the phys_id_present_map\n", mp_ioapics[apic].mpc_apicid);
+ phys_id_present_map |= 1 << mp_ioapics[apic].mpc_apicid;
+ }
+
+
+ /*
+ * We need to adjust the IRQ routing table
+ * if the ID changed.
+ */
+ if (old_id != mp_ioapics[apic].mpc_apicid)
+ for (i = 0; i < mp_irq_entries; i++)
+ if (mp_irqs[i].mpc_dstapic == old_id)
+ mp_irqs[i].mpc_dstapic
+ = mp_ioapics[apic].mpc_apicid;
+
+ /*
+ * Read the right value from the MPC table and
+ * write it into the ID register.
+ */
+ printk(KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
+ mp_ioapics[apic].mpc_apicid);
+
+ reg_00.ID = mp_ioapics[apic].mpc_apicid;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0, *(int *)®_00);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ /*
+ * Sanity check
+ */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(int *)®_00 = io_apic_read(apic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ if (reg_00.ID != mp_ioapics[apic].mpc_apicid)
+ panic("could not set ID!\n");
+ else
+ printk(" ok.\n");
+ }
+}
+
+/*
+ * There is a nasty bug in some older SMP boards, their mptable lies
+ * about the timer IRQ. We do the following to work around the situation:
+ *
+ * - timer IRQ defaults to IO-APIC IRQ
+ * - if this function detects that timer IRQs are defunct, then we fall
+ * back to ISA timer IRQs
+ */
+static int __init timer_irq_works(void)
+{
+ unsigned int t1 = jiffies;
+
+ sti();
+ /* Let ten ticks pass... */
+ mdelay((10 * 1000) / HZ);
+
+ /*
+ * Expect a few ticks at least, to be sure some possible
+ * glue logic does not lock up after one or two first
+ * ticks in a non-ExtINT mode. Also the local APIC
+ * might have cached one ExtINT interrupt. Finally, at
+ * least one tick may be lost due to delays.
+ */
+ if (jiffies - t1 > 4)
+ return 1;
+
+ return 0;
+}
+
+static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ }
+
+/*
+ * Starting up a edge-triggered IO-APIC interrupt is
+ * nasty - we need to make sure that we get the edge.
+ * If it is already asserted for some reason, we need
+ * return 1 to indicate that is was pending.
+ *
+ * This is not complete - we should be able to fake
+ * an edge even if it isn't on the 8259A...
+ */
+
+static unsigned int startup_edge_ioapic_irq(unsigned int irq)
+{
+ int was_pending = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ if (irq < 16) {
+ disable_8259A_irq(irq);
+ if (i8259A_irq_pending(irq))
+ was_pending = 1;
+ }
+ __unmask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return was_pending;
+}
+
+/*
+ * Once we have recorded IRQ_PENDING already, we can mask the
+ * interrupt for real. This prevents IRQ storms from unhandled
+ * devices.
+ */
+static void ack_edge_ioapic_irq(unsigned int irq)
+{
+ balance_irq(irq);
+ if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
+ == (IRQ_PENDING | IRQ_DISABLED))
+ mask_IO_APIC_irq(irq);
+ ack_APIC_irq();
+}
+
+static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ }
+
+
+/*
+ * Level triggered interrupts can just be masked,
+ * and shutting down and starting up the interrupt
+ * is the same as enabling and disabling them -- except
+ * with a startup need to return a "was pending" value.
+ *
+ * Level triggered interrupts are special because we
+ * do not touch any IO-APIC register while handling
+ * them. We ack the APIC in the end-IRQ handler, not
+ * in the start-IRQ-handler. Protection against reentrance
+ * from the same interrupt is still provided, both by the
+ * generic IRQ layer and by the fact that an unacked local
+ * APIC does not accept IRQs.
+ */
+static unsigned int startup_level_ioapic_irq (unsigned int irq)
+{
+ unmask_IO_APIC_irq(irq);
+
+ return 0; /* don't check for pending */
+}
+
+static void mask_and_ack_level_ioapic_irq(unsigned int irq)
+{
+ unsigned long v;
+ int i;
+
+ balance_irq(irq);
+
+ mask_IO_APIC_irq(irq);
+
+/*
+ * It appears there is an erratum which affects at least version 0x11
+ * of I/O APIC (that's the 82093AA and cores integrated into various
+ * chipsets). Under certain conditions a level-triggered interrupt is
+ * erroneously delivered as edge-triggered one but the respective IRR
+ * bit gets set nevertheless. As a result the I/O unit expects an EOI
+ * message but it will never arrive and further interrupts are blocked
+ * from the source. The exact reason is so far unknown, but the
+ * phenomenon was observed when two consecutive interrupt requests
+ * from a given source get delivered to the same CPU and the source is
+ * temporarily disabled in between.
+ *
+ * A workaround is to simulate an EOI message manually. We achieve it
+ * by setting the trigger mode to edge and then to level when the edge
+ * trigger mode gets detected in the TMR of a local APIC for a
+ * level-triggered interrupt. We mask the source for the time of the
+ * operation to prevent an edge-triggered interrupt escaping meanwhile.
+ * The idea is from Manfred Spraul. --macro
+ */
+ i = IO_APIC_VECTOR(irq);
+ v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+
+ ack_APIC_irq();
+
+ if (!(v & (1 << (i & 0x1f)))) {
+#ifdef APIC_LOCKUP_DEBUG
+ struct irq_pin_list *entry;
+#endif
+
+#ifdef APIC_MISMATCH_DEBUG
+ atomic_inc(&irq_mis_count);
+#endif
+ spin_lock(&ioapic_lock);
+ __edge_IO_APIC_irq(irq);
+#ifdef APIC_LOCKUP_DEBUG
+ for (entry = irq_2_pin + irq;;) {
+ unsigned int reg;
+
+ if (entry->pin == -1)
+ break;
+ reg = io_apic_read(entry->apic, 0x10 + entry->pin * 2);
+ if (reg & 0x00004000)
+ printk(KERN_CRIT "Aieee!!! Remote IRR"
+ " still set after unlock!\n");
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+#endif
+ __level_IO_APIC_irq(irq);
+ spin_unlock(&ioapic_lock);
+ }
+}
+
+static void end_level_ioapic_irq(unsigned int irq)
+{
+ unmask_IO_APIC_irq(irq);
+}
+
+static inline void init_IO_APIC_traps(void)
+{
+ int irq;
+
+ /*
+ * NOTE! The local APIC isn't very good at handling
+ * multiple interrupts at the same interrupt level.
+ * As the interrupt level is determined by taking the
+ * vector number and shifting that right by 4, we
+ * want to spread these out a bit so that they don't
+ * all fall in the same interrupt level.
+ *
+ * Also, we've got to be careful not to trash gate
+ * 0x80, because int 0x80 is hm, kind of importantish. ;)
+ */
+ for (irq = 0; irq < NR_IRQS ; irq++) {
+ if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq)) {
+ /*
+ * Hmm.. We don't have an entry for this,
+ * so default to an old-fashioned 8259
+ * interrupt if we can..
+ */
+ if (irq < 16)
+ make_8259A_irq(irq);
+ else
+ /* Strange. Oh, well.. */
+ irq_desc[irq].handler = &no_irq_type;
+ }
+ }
+}
+
+static void enable_lapic_irq (unsigned int irq)
+{
+ unsigned long v;
+
+ v = apic_read(APIC_LVT0);
+ apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+}
+
+static void disable_lapic_irq (unsigned int irq)
+{
+ unsigned long v;
+
+ v = apic_read(APIC_LVT0);
+ apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+}
+
+static void ack_lapic_irq (unsigned int irq)
+{
+ ack_APIC_irq();
+}
+
+static void end_lapic_irq (unsigned int i) { /* nothing */ }
+
+static struct hw_interrupt_type lapic_irq_type = {
+ "local-APIC-edge",
+ NULL, /* startup_irq() not used for IRQ0 */
+ NULL, /* shutdown_irq() not used for IRQ0 */
+ enable_lapic_irq,
+ disable_lapic_irq,
+ ack_lapic_irq,
+ end_lapic_irq
+};
+
+/*
+ * This looks a bit hackish but it's about the only one way of sending
+ * a few INTA cycles to 8259As and any associated glue logic. ICR does
+ * not support the ExtINT mode, unfortunately. We need to send these
+ * cycles as some i82489DX-based boards have glue logic that keeps the
+ * 8259A interrupt line asserted until INTA. --macro
+ */
+static inline void unlock_ExtINT_logic(void)
+{
+ int pin, i;
+ struct IO_APIC_route_entry entry0, entry1;
+ unsigned char save_control, save_freq_select;
+ unsigned long flags;
+
+ pin = find_isa_irq_pin(8, mp_INT);
+ if (pin == -1)
+ return;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
+ *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ clear_IO_APIC_pin(0, pin);
+
+ memset(&entry1, 0, sizeof(entry1));
+
+ entry1.dest_mode = 0; /* physical delivery */
+ entry1.mask = 0; /* unmask IRQ now */
+ entry1.dest.physical.physical_dest = hard_smp_processor_id();
+ entry1.delivery_mode = dest_ExtINT;
+ entry1.polarity = entry0.polarity;
+ entry1.trigger = 0;
+ entry1.vector = 0;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+ io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ save_control = CMOS_READ(RTC_CONTROL);
+ save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+ CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
+ RTC_FREQ_SELECT);
+ CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
+
+ i = 100;
+ while (i-- > 0) {
+ mdelay(10);
+ if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
+ i -= 10;
+ }
+
+ CMOS_WRITE(save_control, RTC_CONTROL);
+ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+ clear_IO_APIC_pin(0, pin);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+ io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * This code may look a bit paranoid, but it's supposed to cooperate with
+ * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
+ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
+ * fanatically on his truly buggy board.
+ */
+static inline void check_timer(void)
+{
+ extern int timer_ack;
+ int pin1, pin2;
+ int vector;
+
+ /*
+ * get/set the timer IRQ vector:
+ */
+ disable_8259A_irq(0);
+ vector = assign_irq_vector(0);
+ set_intr_gate(vector, interrupt[0]);
+
+ /*
+ * Subtle, code in do_timer_interrupt() expects an AEOI
+ * mode for the 8259A whenever interrupts are routed
+ * through I/O APICs. Also IRQ0 has to be enabled in
+ * the 8259A which implies the virtual wire has to be
+ * disabled in the local APIC.
+ */
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+ init_8259A(1);
+ timer_ack = 1;
+ enable_8259A_irq(0);
+
+ pin1 = find_isa_irq_pin(0, mp_INT);
+ pin2 = find_isa_irq_pin(0, mp_ExtINT);
+
+ printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2);
+
+ if (pin1 != -1) {
+ /*
+ * Ok, does IRQ0 through the IOAPIC work?
+ */
+ unmask_IO_APIC_irq(0);
+ if (timer_irq_works())
+ return;
+ clear_IO_APIC_pin(0, pin1);
+ printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
+ }
+
+ printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
+ if (pin2 != -1) {
+ printk("\n..... (found pin %d) ...", pin2);
+ /*
+ * legacy devices should be connected to IO APIC #0
+ */
+ setup_ExtINT_IRQ0_pin(pin2, vector);
+ if (timer_irq_works()) {
+ printk("works.\n");
+ if (pin1 != -1)
+ replace_pin_at_irq(0, 0, pin1, 0, pin2);
+ else
+ add_pin_to_irq(0, 0, pin2);
+ return;
+ }
+ /*
+ * Cleanup, just in case ...
+ */
+ clear_IO_APIC_pin(0, pin2);
+ }
+ printk(" failed.\n");
+
+ printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
+
+ disable_8259A_irq(0);
+ irq_desc[0].handler = &lapic_irq_type;
+ apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
+ enable_8259A_irq(0);
+
+ if (timer_irq_works()) {
+ printk(" works.\n");
+ return;
+ }
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
+ printk(" failed.\n");
+
+ printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
+
+ init_8259A(0);
+ make_8259A_irq(0);
+ apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
+
+ unlock_ExtINT_logic();
+
+ if (timer_irq_works()) {
+ printk(" works.\n");
+ return;
+ }
+ printk(" failed :(.\n");
+ panic("IO-APIC + timer doesn't work! pester mingo@redhat.com");
+}
+
+/*
+ *
+ * IRQ's that are handled by the old PIC in all cases:
+ * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
+ * Linux doesn't really care, as it's not actually used
+ * for any interrupt handling anyway.
+ * - There used to be IRQ13 here as well, but all
+ * MPS-compliant must not use it for FPU coupling and we
+ * want to use exception 16 anyway. And there are
+ * systems who connect it to an I/O APIC for other uses.
+ * Thus we don't mark it special any longer.
+ *
+ * Additionally, something is definitely wrong with irq9
+ * on PIIX4 boards.
+ */
+#define PIC_IRQS (1<<2)
+
+void __init setup_IO_APIC(void)
+{
+ enable_IO_APIC();
+
+ io_apic_irqs = ~PIC_IRQS;
+ printk("ENABLING IO-APIC IRQs\n");
+
+ /*
+ * Set up IO-APIC IRQ routing.
+ */
+ if (!acpi_ioapic)
+ setup_ioapic_ids_from_mpc();
+ sync_Arb_IDs();
+ setup_IO_APIC_irqs();
+ init_IO_APIC_traps();
+ check_timer();
+ if (!acpi_ioapic)
+ print_IO_APIC();
+}
+
+#endif /* CONFIG_X86_IO_APIC */
+
+
+
+/* --------------------------------------------------------------------------
+ ACPI-based IOAPIC Configuration
+ -------------------------------------------------------------------------- */
+
+#ifdef CONFIG_ACPI_BOOT
+
+#define IO_APIC_MAX_ID 15
+
+int __init io_apic_get_unique_id (int ioapic, int apic_id)
+{
+ struct IO_APIC_reg_00 reg_00;
+ static unsigned long apic_id_map = 0;
+ unsigned long flags;
+ int i = 0;
+
+ /*
+ * The P4 platform supports up to 256 APIC IDs on two separate APIC
+ * buses (one for LAPICs, one for IOAPICs), where predecessors only
+ * supports up to 16 on one shared APIC bus.
+ *
+ * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
+ * advantage of new APIC bus architecture.
+ */
+
+ if (!apic_id_map)
+ apic_id_map = phys_cpu_present_map;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(int *)®_00 = io_apic_read(ioapic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ if (apic_id >= IO_APIC_MAX_ID) {
+ printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
+ "%d\n", ioapic, apic_id, reg_00.ID);
+ apic_id = reg_00.ID;
+ }
+
+ /* XAPICs do not need unique IDs */
+ if (clustered_apic_mode == CLUSTERED_APIC_XAPIC){
+ printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n",
+ ioapic, apic_id);
+ return apic_id;
+ }
+
+ /*
+ * Every APIC in a system must have a unique ID or we get lots of nice
+ * 'stuck on smp_invalidate_needed IPI wait' messages.
+ */
+ if (apic_id_map & (1 << apic_id)) {
+
+ for (i = 0; i < IO_APIC_MAX_ID; i++) {
+ if (!(apic_id_map & (1 << i)))
+ break;
+ }
+
+ if (i == IO_APIC_MAX_ID)
+ panic("Max apic_id exceeded!\n");
+
+ printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
+ "trying %d\n", ioapic, apic_id, i);
+
+ apic_id = i;
+ }
+
+ apic_id_map |= (1 << apic_id);
+
+ if (reg_00.ID != apic_id) {
+ reg_00.ID = apic_id;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(ioapic, 0, *(int *)®_00);
+ *(int *)®_00 = io_apic_read(ioapic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ /* Sanity check */
+ if (reg_00.ID != apic_id)
+ panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
+ }
+
+ printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+
+ return apic_id;
+}
+
+
+int __init io_apic_get_version (int ioapic)
+{
+ struct IO_APIC_reg_01 reg_01;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(int *)®_01 = io_apic_read(ioapic, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return reg_01.version;
+}
+
+
+int __init io_apic_get_redir_entries (int ioapic)
+{
+ struct IO_APIC_reg_01 reg_01;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(int *)®_01 = io_apic_read(ioapic, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return reg_01.entries;
+}
+
+
+int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
+{
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ if (!IO_APIC_IRQ(irq)) {
+ printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0/n",
+ ioapic);
+ return -EINVAL;
+ }
+
+ /*
+ * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
+ * Note that we mask (disable) IRQs now -- these get enabled when the
+ * corresponding device driver registers for this IRQ.
+ */
+
+ memset(&entry,0,sizeof(entry));
+
+ entry.delivery_mode = dest_LowestPrio;
+ entry.dest_mode = INT_DELIVERY_MODE;
+ entry.dest.logical.logical_dest = target_cpus();
+ entry.mask = 1; /* Disabled (masked) */
+ entry.trigger = edge_level;
+ entry.polarity = active_high_low;
+
+ add_pin_to_irq(irq, ioapic, pin);
+
+ entry.vector = assign_irq_vector(irq);
+
+ printk(KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
+ "IRQ %d Mode:%i Active:%i)\n", ioapic,
+ mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low);
+
+ if (edge_level) {
+ irq_desc[irq].handler = &ioapic_level_irq_type;
+ } else {
+ irq_desc[irq].handler = &ioapic_edge_irq_type;
+ }
+
+ set_intr_gate(entry.vector, interrupt[irq]);
+
+ if (!ioapic && (irq < 16))
+ disable_8259A_irq(irq);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return 0;
+}
+
+#endif /*CONFIG_ACPI_BOOT*/
+
+extern char opt_leveltrigger[], opt_edgetrigger[];
+
+static int __init ioapic_trigger_setup(void)
+{
+ char *p;
+ irq_desc_t *desc;
+ long irq;
+
+ p = opt_leveltrigger;
+ while ( *p != '\0' )
+ {
+ irq = simple_strtol(p, &p, 10);
+ if ( (irq <= 0) || (irq >= NR_IRQS) )
+ {
+ printk("IRQ '%ld' out of range in level-trigger list '%s'\n",
+ irq, opt_leveltrigger);
+ break;
+ }
+
+ printk("Forcing IRQ %ld to level-trigger: ", irq);
+
+ desc = &irq_desc[irq];
+ spin_lock_irq(&desc->lock);
+
+ if ( desc->handler == &ioapic_level_irq_type )
+ {
+ printk("already level-triggered (no force applied).\n");
+ }
+ else if ( desc->handler != &ioapic_edge_irq_type )
+ {
+ printk("cannot force (can only force IO-APIC-edge IRQs).\n");
+ }
+ else
+ {
+ desc->handler = &ioapic_level_irq_type;
+ __mask_IO_APIC_irq(irq);
+ __level_IO_APIC_irq(irq);
+ printk("done.\n");
+ }
+
+ spin_unlock_irq(&desc->lock);
+
+ if ( *p == '\0' )
+ break;
+
+ if ( *p != ',' )
+ {
+ printk("Unexpected character '%c' in level-trigger list '%s'\n",
+ *p, opt_leveltrigger);
+ break;
+ }
+
+ p++;
+ }
+
+ p = opt_edgetrigger;
+ while ( *p != '\0' )
+ {
+ irq = simple_strtol(p, &p, 10);
+ if ( (irq <= 0) || (irq >= NR_IRQS) )
+ {
+ printk("IRQ '%ld' out of range in edge-trigger list '%s'\n",
+ irq, opt_edgetrigger);
+ break;
+ }
+
+ printk("Forcing IRQ %ld to edge-trigger: ", irq);
+
+ desc = &irq_desc[irq];
+ spin_lock_irq(&desc->lock);
+
+ if ( desc->handler == &ioapic_edge_irq_type )
+ {
+ printk("already edge-triggered (no force applied).\n");
+ }
+ else if ( desc->handler != &ioapic_level_irq_type )
+ {
+ printk("cannot force (can only force IO-APIC-level IRQs).\n");
+ }
+ else
+ {
+ desc->handler = &ioapic_edge_irq_type;
+ __edge_IO_APIC_irq(irq);
+ desc->status |= IRQ_PENDING; /* may have lost a masked edge */
+ printk("done.\n");
+ }
+
+ spin_unlock_irq(&desc->lock);
+
+ if ( *p == '\0' )
+ break;
+
+ if ( *p != ',' )
+ {
+ printk("Unexpected character '%c' in edge-trigger list '%s'\n",
+ *p, opt_edgetrigger);
+ break;
+ }
+
+ p++;
+ }
+
+ return 0;
+}
+
+__initcall(ioapic_trigger_setup);
--- /dev/null
+/*
+ * linux/arch/i386/kernel/irq.c
+ *
+ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setup_irqs with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ */
+
+/*
+ * (mostly architecture independent, will move to kernel/irq.c in 2.5.)
+ *
+ * IRQs are in fact implemented a bit like signal handlers for the kernel.
+ * Naturally it's not a 1:1 relation, but there are similarities.
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/interrupt.h>
+#include <xen/irq.h>
+#include <xen/slab.h>
+#include <xen/event.h>
+#include <asm/mpspec.h>
+#include <asm/io_apic.h>
+#include <asm/msr.h>
+#include <asm/hardirq.h>
+#include <asm/ptrace.h>
+#include <asm/atomic.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <asm/flushtlb.h>
+#include <xen/delay.h>
+#include <xen/timex.h>
+#include <xen/perfc.h>
+#include <asm/smpboot.h>
+
+/*
+ * Linux has a controller-independent x86 interrupt architecture.
+ * every controller has a 'controller-template', that is used
+ * by the main code to do the right thing. Each driver-visible
+ * interrupt source is transparently wired to the apropriate
+ * controller. Thus drivers need not be aware of the
+ * interrupt-controller.
+ *
+ * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
+ * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
+ * (IO-APICs assumed to be messaging to Pentium local-APICs)
+ *
+ * the code is designed to be easily extended with new/different
+ * interrupt controllers, without having to do assembly magic.
+ */
+
+/*
+ * Controller mappings for all interrupt sources:
+ */
+irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned =
+{ [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}};
+
+#ifdef CONFIG_SMP
+/* NB. XXX We'll want some way of fiddling with this from DOM0. */
+unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL };
+#endif
+
+static void __do_IRQ_guest(int irq);
+
+/*
+ * Special irq handlers.
+ */
+
+void no_action(int cpl, void *dev_id, struct pt_regs *regs) { }
+
+/*
+ * Generic no controller code
+ */
+
+static void enable_none(unsigned int irq) { }
+static unsigned int startup_none(unsigned int irq) { return 0; }
+static void disable_none(unsigned int irq) { }
+static void ack_none(unsigned int irq)
+{
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves, it doesnt deserve
+ * a generic callback i think.
+ */
+#if CONFIG_X86
+ printk("unexpected IRQ trap at vector %02x\n", irq);
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * Currently unexpected vectors happen only on SMP and APIC.
+ * We _must_ ack these because every local APIC has only N
+ * irq slots per priority level, and a 'hanging, unacked' IRQ
+ * holds up an irq slot - in excessive cases (when multiple
+ * unexpected vectors occur) that might lock up the APIC
+ * completely.
+ */
+ ack_APIC_irq();
+#endif
+#endif
+}
+
+/* startup is the same as "enable", shutdown is same as "disable" */
+#define shutdown_none disable_none
+#define end_none enable_none
+
+struct hw_interrupt_type no_irq_type = {
+ "none",
+ startup_none,
+ shutdown_none,
+ enable_none,
+ disable_none,
+ ack_none,
+ end_none
+};
+
+atomic_t irq_err_count;
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+atomic_t irq_mis_count;
+#endif
+#endif
+
+/*
+ * Generic, controller-independent functions:
+ */
+
+/*
+ * Global interrupt locks for SMP. Allow interrupts to come in on any
+ * CPU, yet make cli/sti act globally to protect critical regions..
+ */
+
+#ifdef CONFIG_SMP
+unsigned char global_irq_holder = 0xff;
+unsigned volatile long global_irq_lock; /* pendantic: long for set_bit --RR */
+
+#define MAXCOUNT 100000000
+
+/*
+ * I had a lockup scenario where a tight loop doing
+ * spin_unlock()/spin_lock() on CPU#1 was racing with
+ * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but
+ * apparently the spin_unlock() information did not make it
+ * through to CPU#0 ... nasty, is this by design, do we have to limit
+ * 'memory update oscillation frequency' artificially like here?
+ *
+ * Such 'high frequency update' races can be avoided by careful design, but
+ * some of our major constructs like spinlocks use similar techniques,
+ * it would be nice to clarify this issue. Set this define to 0 if you
+ * want to check whether your system freezes. I suspect the delay done
+ * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but
+ * i thought that such things are guaranteed by design, since we use
+ * the 'LOCK' prefix.
+ */
+#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 0
+
+#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND
+# define SYNC_OTHER_CORES(x) udelay(x+1)
+#else
+/*
+ * We have to allow irqs to arrive between __sti and __cli
+ */
+# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop")
+#endif
+
+static inline void wait_on_irq(int cpu)
+{
+ for (;;) {
+
+ /*
+ * Wait until all interrupts are gone. Wait
+ * for bottom half handlers unless we're
+ * already executing in one..
+ */
+ if (!irqs_running())
+ if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock))
+ break;
+
+ /* Duh, we have to loop. Release the lock to avoid deadlocks */
+ clear_bit(0,&global_irq_lock);
+
+ for (;;) {
+ __sti();
+ SYNC_OTHER_CORES(cpu);
+ __cli();
+ if (irqs_running())
+ continue;
+ if (global_irq_lock)
+ continue;
+ if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock))
+ continue;
+ if (!test_and_set_bit(0,&global_irq_lock))
+ break;
+ }
+ }
+}
+
+/*
+ * This is called when we want to synchronize with
+ * interrupts. We may for example tell a device to
+ * stop sending interrupts: but to make sure there
+ * are no interrupts that are executing on another
+ * CPU we need to call this function.
+ */
+void synchronize_irq(void)
+{
+ if (irqs_running()) {
+ /* Stupid approach */
+ cli();
+ sti();
+ }
+}
+
+static inline void get_irqlock(int cpu)
+{
+ if (test_and_set_bit(0,&global_irq_lock)) {
+ /* do we already hold the lock? */
+ if ((unsigned char) cpu == global_irq_holder)
+ return;
+ /* Uhhuh.. Somebody else got it. Wait.. */
+ do {
+ do {
+ rep_nop();
+ } while (test_bit(0,&global_irq_lock));
+ } while (test_and_set_bit(0,&global_irq_lock));
+ }
+ /*
+ * We also to make sure that nobody else is running
+ * in an interrupt context.
+ */
+ wait_on_irq(cpu);
+
+ /*
+ * Ok, finally..
+ */
+ global_irq_holder = cpu;
+}
+
+#define EFLAGS_IF_SHIFT 9
+
+/*
+ * A global "cli()" while in an interrupt context
+ * turns into just a local cli(). Interrupts
+ * should use spinlocks for the (very unlikely)
+ * case that they ever want to protect against
+ * each other.
+ *
+ * If we already have local interrupts disabled,
+ * this will not turn a local disable into a
+ * global one (problems with spinlocks: this makes
+ * save_flags+cli+sti usable inside a spinlock).
+ */
+void __global_cli(void)
+{
+ unsigned int flags;
+
+ __save_flags(flags);
+ if (flags & (1 << EFLAGS_IF_SHIFT)) {
+ int cpu = smp_processor_id();
+ __cli();
+ if (!local_irq_count(cpu))
+ get_irqlock(cpu);
+ }
+}
+
+void __global_sti(void)
+{
+ int cpu = smp_processor_id();
+
+ if (!local_irq_count(cpu))
+ release_irqlock(cpu);
+ __sti();
+}
+
+/*
+ * SMP flags value to restore to:
+ * 0 - global cli
+ * 1 - global sti
+ * 2 - local cli
+ * 3 - local sti
+ */
+unsigned long __global_save_flags(void)
+{
+ int retval;
+ int local_enabled;
+ unsigned long flags;
+ int cpu = smp_processor_id();
+
+ __save_flags(flags);
+ local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1;
+ /* default to local */
+ retval = 2 + local_enabled;
+
+ /* check for global flags if we're not in an interrupt */
+ if (!local_irq_count(cpu)) {
+ if (local_enabled)
+ retval = 1;
+ if (global_irq_holder == cpu)
+ retval = 0;
+ }
+ return retval;
+}
+
+void __global_restore_flags(unsigned long flags)
+{
+ switch (flags) {
+ case 0:
+ __global_cli();
+ break;
+ case 1:
+ __global_sti();
+ break;
+ case 2:
+ __cli();
+ break;
+ case 3:
+ __sti();
+ break;
+ default:
+ printk("global_restore_flags: %08lx (%08lx)\n",
+ flags, (&flags)[-1]);
+ }
+}
+
+#endif
+
+/*
+ * This should really return information about whether
+ * we should do bottom half handling etc. Right now we
+ * end up _always_ checking the bottom half, which is a
+ * waste of time and is not what some drivers would
+ * prefer.
+ */
+static int handle_IRQ_event(unsigned int irq,
+ struct pt_regs * regs,
+ struct irqaction * action)
+{
+ int status;
+ int cpu = smp_processor_id();
+
+ irq_enter(cpu, irq);
+
+ status = 1; /* Force the "do bottom halves" bit */
+
+ if (!(action->flags & SA_INTERRUPT))
+ __sti();
+
+ do {
+ status |= action->flags;
+ action->handler(irq, action->dev_id, regs);
+ action = action->next;
+ } while (action);
+
+ __cli();
+
+ irq_exit(cpu, irq);
+
+ return status;
+}
+
+/*
+ * Generic enable/disable code: this just calls
+ * down into the PIC-specific version for the actual
+ * hardware disable after having gotten the irq
+ * controller lock.
+ */
+
+/**
+ * disable_irq_nosync - disable an irq without waiting
+ * @irq: Interrupt to disable
+ *
+ * Disable the selected interrupt line. Disables and Enables are
+ * nested.
+ * Unlike disable_irq(), this function does not ensure existing
+ * instances of the IRQ handler have completed before returning.
+ *
+ * This function may be called from IRQ context.
+ */
+
+inline void disable_irq_nosync(unsigned int irq)
+{
+ irq_desc_t *desc = irq_desc + irq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ if (!desc->depth++) {
+ desc->status |= IRQ_DISABLED;
+ desc->handler->disable(irq);
+ }
+ spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+/**
+ * disable_irq - disable an irq and wait for completion
+ * @irq: Interrupt to disable
+ *
+ * Disable the selected interrupt line. Enables and Disables are
+ * nested.
+ * This function waits for any pending IRQ handlers for this interrupt
+ * to complete before returning. If you use this function while
+ * holding a resource the IRQ handler may need you will deadlock.
+ *
+ * This function may be called - with care - from IRQ context.
+ */
+
+void disable_irq(unsigned int irq)
+{
+ disable_irq_nosync(irq);
+
+ if (!local_irq_count(smp_processor_id())) {
+ do {
+ barrier();
+ cpu_relax();
+ } while (irq_desc[irq].status & IRQ_INPROGRESS);
+ }
+}
+
+/**
+ * enable_irq - enable handling of an irq
+ * @irq: Interrupt to enable
+ *
+ * Undoes the effect of one call to disable_irq(). If this
+ * matches the last disable, processing of interrupts on this
+ * IRQ line is re-enabled.
+ *
+ * This function may be called from IRQ context.
+ */
+
+void enable_irq(unsigned int irq)
+{
+ irq_desc_t *desc = irq_desc + irq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ switch (desc->depth) {
+ case 1: {
+ unsigned int status = desc->status & ~IRQ_DISABLED;
+ desc->status = status;
+ if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+ desc->status = status | IRQ_REPLAY;
+ hw_resend_irq(desc->handler,irq);
+ }
+ desc->handler->enable(irq);
+ /* fall-through */
+ }
+ default:
+ desc->depth--;
+ break;
+ case 0:
+ printk("enable_irq(%u) unbalanced from %p\n", irq,
+ __builtin_return_address(0));
+ }
+ spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+asmlinkage unsigned int do_IRQ(struct pt_regs regs)
+{
+ /*
+ * We ack quickly, we don't want the irq controller
+ * thinking we're snobs just because some other CPU has
+ * disabled global interrupts (we have already done the
+ * INT_ACK cycles, it's too late to try to pretend to the
+ * controller that we aren't taking the interrupt).
+ *
+ * 0 return value means that this irq is already being
+ * handled by some other CPU. (or is disabled)
+ */
+ int irq = regs.orig_eax & 0xff; /* high bits used in ret_from_ code */
+ irq_desc_t *desc = irq_desc + irq;
+ struct irqaction * action;
+ unsigned int status;
+
+#ifdef PERF_COUNTERS
+ int cpu = smp_processor_id();
+ u32 cc_start, cc_end;
+
+ perfc_incra(irqs, cpu);
+ rdtscl(cc_start);
+#endif
+
+ spin_lock(&desc->lock);
+ desc->handler->ack(irq);
+
+ /*
+ REPLAY is when Linux resends an IRQ that was dropped earlier
+ WAITING is used by probe to mark irqs that are being tested
+ */
+ status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
+ status |= IRQ_PENDING; /* we _want_ to handle it */
+
+ /* We hook off guest-bound IRQs for special handling. */
+ if ( status & IRQ_GUEST )
+ {
+ __do_IRQ_guest(irq);
+ spin_unlock(&desc->lock);
+ return 1;
+ }
+
+ /*
+ * If the IRQ is disabled for whatever reason, we cannot use the action we
+ * have.
+ */
+ action = NULL;
+ if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
+ action = desc->action;
+ status &= ~IRQ_PENDING; /* we commit to handling */
+ status |= IRQ_INPROGRESS; /* we are handling it */
+ }
+ desc->status = status;
+
+ /*
+ * If there is no IRQ handler or it was disabled, exit early. Since we set
+ * PENDING, if another processor is handling a different instance of this
+ * same irq, the other processor will take care of it.
+ */
+ if (!action)
+ goto out;
+
+ /*
+ * Edge triggered interrupts need to remember pending events. This applies
+ * to any hw interrupts that allow a second instance of the same irq to
+ * arrive while we are in do_IRQ or in the handler. But the code here only
+ * handles the _second_ instance of the irq, not the third or fourth. So
+ * it is mostly useful for irq hardware that does not mask cleanly in an
+ * SMP environment.
+ */
+ for (;;) {
+ spin_unlock(&desc->lock);
+ handle_IRQ_event(irq, ®s, action);
+ spin_lock(&desc->lock);
+
+ if (!(desc->status & IRQ_PENDING))
+ break;
+ desc->status &= ~IRQ_PENDING;
+ }
+ desc->status &= ~IRQ_INPROGRESS;
+ out:
+ /*
+ * The ->end() handler has to deal with interrupts which got disabled
+ * while the handler was running.
+ */
+ desc->handler->end(irq);
+ spin_unlock(&desc->lock);
+
+#ifdef PERF_COUNTERS
+ rdtscl(cc_end);
+
+ if ( !action || (!(action->flags & SA_NOPROFILE)) )
+ {
+ perfc_adda(irq_time, cpu, cc_end - cc_start);
+#ifndef NDEBUG
+ if ( (cc_end - cc_start) > (cpu_khz * 100) )
+ printk("Long interrupt %08x -> %08x\n", cc_start, cc_end);
+#endif
+ }
+#endif
+
+ return 1;
+}
+
+/**
+ * request_irq - allocate an interrupt line
+ * @irq: Interrupt line to allocate
+ * @handler: Function to be called when the IRQ occurs
+ * @irqflags: Interrupt type flags
+ * @devname: An ascii name for the claiming device
+ * @dev_id: A cookie passed back to the handler function
+ *
+ * This call allocates interrupt resources and enables the
+ * interrupt line and IRQ handling. From the point this
+ * call is made your handler function may be invoked. Since
+ * your handler function must clear any interrupt the board
+ * raises, you must take care both to initialise your hardware
+ * and to set up the interrupt handler in the right order.
+ *
+ * Dev_id must be globally unique. Normally the address of the
+ * device data structure is used as the cookie. Since the handler
+ * receives this value it makes sense to use it.
+ *
+ * If your interrupt is shared you must pass a non NULL dev_id
+ * as this is required when freeing the interrupt.
+ *
+ * Flags:
+ *
+ * SA_SHIRQ Interrupt is shared
+ *
+ * SA_INTERRUPT Disable local interrupts while processing
+ */
+
+int request_irq(unsigned int irq,
+ void (*handler)(int, void *, struct pt_regs *),
+ unsigned long irqflags,
+ const char * devname,
+ void *dev_id)
+{
+ int retval;
+ struct irqaction * action;
+
+ if (irq >= NR_IRQS)
+ return -EINVAL;
+ if (!handler)
+ return -EINVAL;
+
+ action = (struct irqaction *)
+ kmalloc(sizeof(struct irqaction), GFP_KERNEL);
+ if (!action)
+ return -ENOMEM;
+
+ action->handler = handler;
+ action->flags = irqflags;
+ action->mask = 0;
+ action->name = devname;
+ action->next = NULL;
+ action->dev_id = dev_id;
+
+ retval = setup_irq(irq, action);
+ if (retval)
+ kfree(action);
+
+ return retval;
+}
+
+/**
+ * free_irq - free an interrupt
+ * @irq: Interrupt line to free
+ * @dev_id: Device identity to free
+ *
+ * Remove an interrupt handler. The handler is removed and if the
+ * interrupt line is no longer in use by any driver it is disabled.
+ * On a shared IRQ the caller must ensure the interrupt is disabled
+ * on the card it drives before calling this function. The function
+ * does not return until any executing interrupts for this IRQ
+ * have completed.
+ *
+ * This function may be called from interrupt context.
+ *
+ * Bugs: Attempting to free an irq in a handler for the same irq hangs
+ * the machine.
+ */
+
+void free_irq(unsigned int irq, void *dev_id)
+{
+ irq_desc_t *desc;
+ struct irqaction **p;
+ unsigned long flags;
+
+ if (irq >= NR_IRQS)
+ return;
+
+ desc = irq_desc + irq;
+ spin_lock_irqsave(&desc->lock,flags);
+ p = &desc->action;
+ for (;;) {
+ struct irqaction * action = *p;
+ if (action) {
+ struct irqaction **pp = p;
+ p = &action->next;
+ if (action->dev_id != dev_id)
+ continue;
+
+ /* Found it - now remove it from the list of entries */
+ *pp = action->next;
+ if (!desc->action) {
+ desc->status |= IRQ_DISABLED;
+ desc->handler->shutdown(irq);
+ }
+ spin_unlock_irqrestore(&desc->lock,flags);
+
+#ifdef CONFIG_SMP
+ /* Wait to make sure it's not being used on another CPU */
+ while (desc->status & IRQ_INPROGRESS) {
+ barrier();
+ cpu_relax();
+ }
+#endif
+ kfree(action);
+ return;
+ }
+ printk("Trying to free free IRQ%d\n",irq);
+ spin_unlock_irqrestore(&desc->lock,flags);
+ return;
+ }
+}
+
+/*
+ * IRQ autodetection code..
+ *
+ * This depends on the fact that any interrupt that
+ * comes in on to an unassigned handler will get stuck
+ * with "IRQ_WAITING" cleared and the interrupt
+ * disabled.
+ */
+
+static spinlock_t probe_sem = SPIN_LOCK_UNLOCKED;
+
+/**
+ * probe_irq_on - begin an interrupt autodetect
+ *
+ * Commence probing for an interrupt. The interrupts are scanned
+ * and a mask of potential interrupt lines is returned.
+ *
+ */
+
+unsigned long probe_irq_on(void)
+{
+ unsigned int i;
+ irq_desc_t *desc;
+ unsigned long val;
+ unsigned long s=0, e=0;
+
+ spin_lock(&probe_sem);
+ /*
+ * something may have generated an irq long ago and we want to
+ * flush such a longstanding irq before considering it as spurious.
+ */
+ for (i = NR_IRQS-1; i > 0; i--) {
+ desc = irq_desc + i;
+
+ spin_lock_irq(&desc->lock);
+ if (!irq_desc[i].action)
+ irq_desc[i].handler->startup(i);
+ spin_unlock_irq(&desc->lock);
+ }
+
+ /* Wait for longstanding interrupts to trigger (20ms delay). */
+ rdtscl(s);
+ do {
+ synchronize_irq();
+ rdtscl(e);
+ } while ( ((e-s)/ticks_per_usec) < 20000 );
+
+ /*
+ * enable any unassigned irqs
+ * (we must startup again here because if a longstanding irq
+ * happened in the previous stage, it may have masked itself)
+ */
+ for (i = NR_IRQS-1; i > 0; i--) {
+ desc = irq_desc + i;
+
+ spin_lock_irq(&desc->lock);
+ if (!desc->action) {
+ desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
+ if (desc->handler->startup(i))
+ desc->status |= IRQ_PENDING;
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+
+ /*
+ * Wait for spurious interrupts to trigger (100ms delay).
+ */
+ rdtscl(s);
+ do {
+ synchronize_irq();
+ rdtscl(e);
+ } while ( ((e-s)/ticks_per_usec) < 100000 );
+
+ /*
+ * Now filter out any obviously spurious interrupts
+ */
+ val = 0;
+ for (i = 0; i < NR_IRQS; i++) {
+ irq_desc_t *desc = irq_desc + i;
+ unsigned int status;
+
+ spin_lock_irq(&desc->lock);
+ status = desc->status;
+
+ if (status & IRQ_AUTODETECT) {
+ /* It triggered already - consider it spurious. */
+ if (!(status & IRQ_WAITING)) {
+ desc->status = status & ~IRQ_AUTODETECT;
+ desc->handler->shutdown(i);
+ } else
+ if (i < 32)
+ val |= 1 << i;
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+
+ return val;
+}
+
+/*
+ * Return a mask of triggered interrupts (this
+ * can handle only legacy ISA interrupts).
+ */
+
+/**
+ * probe_irq_mask - scan a bitmap of interrupt lines
+ * @val: mask of interrupts to consider
+ *
+ * Scan the ISA bus interrupt lines and return a bitmap of
+ * active interrupts. The interrupt probe logic state is then
+ * returned to its previous value.
+ *
+ * Note: we need to scan all the irq's even though we will
+ * only return ISA irq numbers - just so that we reset them
+ * all to a known state.
+ */
+unsigned int probe_irq_mask(unsigned long val)
+{
+ int i;
+ unsigned int mask;
+
+ mask = 0;
+ for (i = 0; i < NR_IRQS; i++) {
+ irq_desc_t *desc = irq_desc + i;
+ unsigned int status;
+
+ spin_lock_irq(&desc->lock);
+ status = desc->status;
+
+ if (status & IRQ_AUTODETECT) {
+ if (i < 16 && !(status & IRQ_WAITING))
+ mask |= 1 << i;
+
+ desc->status = status & ~IRQ_AUTODETECT;
+ desc->handler->shutdown(i);
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+ spin_unlock(&probe_sem);
+
+ return mask & val;
+}
+
+/*
+ * Return the one interrupt that triggered (this can
+ * handle any interrupt source).
+ */
+
+/**
+ * probe_irq_off - end an interrupt autodetect
+ * @val: mask of potential interrupts (unused)
+ *
+ * Scans the unused interrupt lines and returns the line which
+ * appears to have triggered the interrupt. If no interrupt was
+ * found then zero is returned. If more than one interrupt is
+ * found then minus the first candidate is returned to indicate
+ * their is doubt.
+ *
+ * The interrupt probe logic state is returned to its previous
+ * value.
+ *
+ * BUGS: When used in a module (which arguably shouldnt happen)
+ * nothing prevents two IRQ probe callers from overlapping. The
+ * results of this are non-optimal.
+ */
+
+int probe_irq_off(unsigned long val)
+{
+ int i, irq_found, nr_irqs;
+
+ nr_irqs = 0;
+ irq_found = 0;
+ for (i = 0; i < NR_IRQS; i++) {
+ irq_desc_t *desc = irq_desc + i;
+ unsigned int status;
+
+ spin_lock_irq(&desc->lock);
+ status = desc->status;
+
+ if (status & IRQ_AUTODETECT) {
+ if (!(status & IRQ_WAITING)) {
+ if (!nr_irqs)
+ irq_found = i;
+ nr_irqs++;
+ }
+ desc->status = status & ~IRQ_AUTODETECT;
+ desc->handler->shutdown(i);
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+ spin_unlock(&probe_sem);
+
+ if (nr_irqs > 1)
+ irq_found = -irq_found;
+ return irq_found;
+}
+
+/* this was setup_x86_irq but it seems pretty generic */
+int setup_irq(unsigned int irq, struct irqaction * new)
+{
+ int shared = 0;
+ unsigned long flags;
+ struct irqaction *old, **p;
+ irq_desc_t *desc = irq_desc + irq;
+
+ /*
+ * The following block of code has to be executed atomically
+ */
+ spin_lock_irqsave(&desc->lock,flags);
+
+ if ( desc->status & IRQ_GUEST )
+ {
+ spin_unlock_irqrestore(&desc->lock,flags);
+ return -EBUSY;
+ }
+
+ p = &desc->action;
+ if ((old = *p) != NULL) {
+ /* Can't share interrupts unless both agree to */
+ if (!(old->flags & new->flags & SA_SHIRQ)) {
+ spin_unlock_irqrestore(&desc->lock,flags);
+ return -EBUSY;
+ }
+
+ /* add new interrupt at end of irq queue */
+ do {
+ p = &old->next;
+ old = *p;
+ } while (old);
+ shared = 1;
+ }
+
+ *p = new;
+
+ if (!shared) {
+ desc->depth = 0;
+ desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING);
+ desc->handler->startup(irq);
+ }
+
+ spin_unlock_irqrestore(&desc->lock,flags);
+
+ return 0;
+}
+
+
+
+/*
+ * HANDLING OF GUEST-BOUND PHYSICAL IRQS
+ */
+
+#define IRQ_MAX_GUESTS 7
+typedef struct {
+ u8 nr_guests;
+ u8 in_flight;
+ u8 shareable;
+ struct task_struct *guest[IRQ_MAX_GUESTS];
+} irq_guest_action_t;
+
+static void __do_IRQ_guest(int irq)
+{
+ irq_desc_t *desc = &irq_desc[irq];
+ irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
+ struct task_struct *p;
+ int i;
+
+ for ( i = 0; i < action->nr_guests; i++ )
+ {
+ p = action->guest[i];
+ if ( !test_and_set_bit(irq, &p->pirq_mask) )
+ action->in_flight++;
+ send_guest_pirq(p, irq);
+ }
+}
+
+int pirq_guest_unmask(struct task_struct *p)
+{
+ irq_desc_t *desc;
+ int i, j, pirq;
+ u32 m;
+ shared_info_t *s = p->shared_info;
+
+ for ( i = 0; i < 2; i++ )
+ {
+ m = p->pirq_mask[i];
+ while ( (j = ffs(m)) != 0 )
+ {
+ m &= ~(1 << --j);
+ pirq = (i << 5) + j;
+ desc = &irq_desc[pirq];
+ spin_lock_irq(&desc->lock);
+ if ( !test_bit(p->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) &&
+ test_and_clear_bit(pirq, &p->pirq_mask) &&
+ (--((irq_guest_action_t *)desc->action)->in_flight == 0) )
+ desc->handler->end(pirq);
+ spin_unlock_irq(&desc->lock);
+ }
+ }
+
+ return 0;
+}
+
+int pirq_guest_bind(struct task_struct *p, int irq, int will_share)
+{
+ unsigned long flags;
+ irq_desc_t *desc = &irq_desc[irq];
+ irq_guest_action_t *action;
+ int rc = 0;
+
+ if ( !IS_CAPABLE_PHYSDEV(p) )
+ return -EPERM;
+
+ spin_lock_irqsave(&desc->lock, flags);
+
+ action = (irq_guest_action_t *)desc->action;
+
+ if ( !(desc->status & IRQ_GUEST) )
+ {
+ if ( desc->action != NULL )
+ {
+ DPRINTK("Cannot bind IRQ %d to guest. In use by '%s'.\n",
+ irq, desc->action->name);
+ rc = -EBUSY;
+ goto out;
+ }
+
+ action = kmalloc(sizeof(irq_guest_action_t), GFP_KERNEL);
+ if ( (desc->action = (struct irqaction *)action) == NULL )
+ {
+ DPRINTK("Cannot bind IRQ %d to guest. Out of memory.\n", irq);
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ action->nr_guests = 0;
+ action->in_flight = 0;
+ action->shareable = will_share;
+
+ desc->depth = 0;
+ desc->status |= IRQ_GUEST;
+ desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING);
+ desc->handler->startup(irq);
+
+ /* Attempt to bind the interrupt target to the correct CPU. */
+ if ( desc->handler->set_affinity != NULL )
+ desc->handler->set_affinity(
+ irq, apicid_to_phys_cpu_present(p->processor));
+ }
+ else if ( !will_share || !action->shareable )
+ {
+ DPRINTK("Cannot bind IRQ %d to guest. Will not share with others.\n",
+ irq);
+ rc = -EBUSY;
+ goto out;
+ }
+
+ if ( action->nr_guests == IRQ_MAX_GUESTS )
+ {
+ DPRINTK("Cannot bind IRQ %d to guest. Already at max share.\n", irq);
+ rc = -EBUSY;
+ goto out;
+ }
+
+ action->guest[action->nr_guests++] = p;
+
+ out:
+ spin_unlock_irqrestore(&desc->lock, flags);
+ return rc;
+}
+
+int pirq_guest_unbind(struct task_struct *p, int irq)
+{
+ unsigned long flags;
+ irq_desc_t *desc = &irq_desc[irq];
+ irq_guest_action_t *action;
+ int i;
+
+ spin_lock_irqsave(&desc->lock, flags);
+
+ action = (irq_guest_action_t *)desc->action;
+
+ if ( test_and_clear_bit(irq, &p->pirq_mask) &&
+ (--action->in_flight == 0) )
+ desc->handler->end(irq);
+
+ if ( action->nr_guests == 1 )
+ {
+ desc->action = NULL;
+ kfree(action);
+ desc->status |= IRQ_DISABLED;
+ desc->status &= ~IRQ_GUEST;
+ desc->handler->shutdown(irq);
+ }
+ else
+ {
+ i = 0;
+ while ( action->guest[i] != p )
+ i++;
+ memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
+ action->nr_guests--;
+ }
+
+ spin_unlock_irqrestore(&desc->lock, flags);
+ return 0;
+}
--- /dev/null
+/******************************************************************************
+ * arch/i386/mm.c
+ *
+ * Modifications to Linux original are copyright (c) 2002-2003, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/init.h>
+#include <xen/mm.h>
+#include <asm/page.h>
+#include <asm/flushtlb.h>
+#include <asm/fixmap.h>
+#include <asm/domain_page.h>
+
+static inline void set_pte_phys(unsigned long vaddr,
+ l1_pgentry_t entry)
+{
+ l2_pgentry_t *l2ent;
+ l1_pgentry_t *l1ent;
+
+ l2ent = &idle_pg_table[l2_table_offset(vaddr)];
+ l1ent = l2_pgentry_to_l1(*l2ent) + l1_table_offset(vaddr);
+ *l1ent = entry;
+
+ /* It's enough to flush this one mapping. */
+ __flush_tlb_one(vaddr);
+}
+
+
+void __set_fixmap(enum fixed_addresses idx,
+ l1_pgentry_t entry)
+{
+ unsigned long address = __fix_to_virt(idx);
+
+ if ( likely(idx < __end_of_fixed_addresses) )
+ set_pte_phys(address, entry);
+ else
+ printk("Invalid __set_fixmap\n");
+}
+
+
+static void __init fixrange_init(unsigned long start,
+ unsigned long end,
+ l2_pgentry_t *pg_base)
+{
+ l2_pgentry_t *l2e;
+ int i;
+ unsigned long vaddr, page;
+
+ vaddr = start;
+ i = l2_table_offset(vaddr);
+ l2e = pg_base + i;
+
+ for ( ; (i < ENTRIES_PER_L2_PAGETABLE) && (vaddr != end); l2e++, i++ )
+ {
+ if ( !l2_pgentry_empty(*l2e) )
+ continue;
+ page = (unsigned long)get_free_page(GFP_KERNEL);
+ clear_page(page);
+ *l2e = mk_l2_pgentry(__pa(page) | __PAGE_HYPERVISOR);
+ vaddr += 1 << L2_PAGETABLE_SHIFT;
+ }
+}
+
+void __init paging_init(void)
+{
+ unsigned long addr;
+ void *ioremap_pt;
+ int i;
+
+ /* Idle page table 1:1 maps the first part of physical memory. */
+ for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
+ idle_pg_table[i] =
+ mk_l2_pgentry((i << L2_PAGETABLE_SHIFT) |
+ __PAGE_HYPERVISOR | _PAGE_PSE);
+
+ /*
+ * Fixed mappings, only the page table structure has to be
+ * created - mappings will be set by set_fixmap():
+ */
+ addr = FIXADDR_START & ~((1<<L2_PAGETABLE_SHIFT)-1);
+ fixrange_init(addr, 0, idle_pg_table);
+
+ /* Create page table for ioremap(). */
+ ioremap_pt = (void *)get_free_page(GFP_KERNEL);
+ clear_page(ioremap_pt);
+ idle_pg_table[IOREMAP_VIRT_START >> L2_PAGETABLE_SHIFT] =
+ mk_l2_pgentry(__pa(ioremap_pt) | __PAGE_HYPERVISOR);
+
+ /* Create read-only mapping of MPT for guest-OS use. */
+ idle_pg_table[READONLY_MPT_VIRT_START >> L2_PAGETABLE_SHIFT] =
+ idle_pg_table[RDWR_MPT_VIRT_START >> L2_PAGETABLE_SHIFT];
+ mk_l2_readonly(idle_pg_table +
+ (READONLY_MPT_VIRT_START >> L2_PAGETABLE_SHIFT));
+
+ /* Set up mapping cache for domain pages. */
+ mapcache = (unsigned long *)get_free_page(GFP_KERNEL);
+ clear_page(mapcache);
+ idle_pg_table[MAPCACHE_VIRT_START >> L2_PAGETABLE_SHIFT] =
+ mk_l2_pgentry(__pa(mapcache) | __PAGE_HYPERVISOR);
+
+ /* Set up linear page table mapping. */
+ idle_pg_table[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
+ mk_l2_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR);
+
+}
+
+void __init zap_low_mappings(void)
+{
+ int i;
+ for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
+ idle_pg_table[i] = mk_l2_pgentry(0);
+ flush_tlb_all_pge();
+}
+
+
+long do_stack_switch(unsigned long ss, unsigned long esp)
+{
+ int nr = smp_processor_id();
+ struct tss_struct *t = &init_tss[nr];
+
+ /* We need to do this check as we load and use SS on guest's behalf. */
+ if ( (ss & 3) == 0 )
+ return -EPERM;
+
+ current->thread.guestos_ss = ss;
+ current->thread.guestos_sp = esp;
+ t->ss1 = ss;
+ t->esp1 = esp;
+
+ return 0;
+}
+
+
+/* Returns TRUE if given descriptor is valid for GDT or LDT. */
+int check_descriptor(unsigned long a, unsigned long b)
+{
+ unsigned long base, limit;
+
+ /* A not-present descriptor will always fault, so is safe. */
+ if ( !(b & _SEGMENT_P) )
+ goto good;
+
+ /*
+ * We don't allow a DPL of zero. There is no legitimate reason for
+ * specifying DPL==0, and it gets rather dangerous if we also accept call
+ * gates (consider a call gate pointing at another guestos descriptor with
+ * DPL 0 -- this would get the OS ring-0 privileges).
+ */
+ if ( (b & _SEGMENT_DPL) == 0 )
+ goto bad;
+
+ if ( !(b & _SEGMENT_S) )
+ {
+ /*
+ * System segment:
+ * 1. Don't allow interrupt or trap gates as they belong in the IDT.
+ * 2. Don't allow TSS descriptors or task gates as we don't
+ * virtualise x86 tasks.
+ * 3. Don't allow LDT descriptors because they're unnecessary and
+ * I'm uneasy about allowing an LDT page to contain LDT
+ * descriptors. In any case, Xen automatically creates the
+ * required descriptor when reloading the LDT register.
+ * 4. We allow call gates but they must not jump to a private segment.
+ */
+
+ /* Disallow everything but call gates. */
+ if ( (b & _SEGMENT_TYPE) != 0xc00 )
+ goto bad;
+
+ /* Can't allow far jump to a Xen-private segment. */
+ if ( !VALID_CODESEL(a>>16) )
+ goto bad;
+
+ /* Reserved bits must be zero. */
+ if ( (b & 0xe0) != 0 )
+ goto bad;
+
+ /* No base/limit check is needed for a call gate. */
+ goto good;
+ }
+
+ /* Check that base/limit do not overlap Xen-private space. */
+ base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
+ limit = (b&0xf0000) | (a&0xffff);
+ limit++; /* We add one because limit is inclusive. */
+ if ( (b & _SEGMENT_G) )
+ limit <<= 12;
+ if ( ((base + limit) <= base) ||
+ ((base + limit) > PAGE_OFFSET) )
+ goto bad;
+
+ good:
+ return 1;
+ bad:
+ return 0;
+}
+
+
+long set_gdt(struct task_struct *p,
+ unsigned long *frames,
+ unsigned int entries)
+{
+ /* NB. There are 512 8-byte entries per GDT page. */
+ int i, nr_pages = (entries + 511) / 512;
+ unsigned long pfn;
+ struct desc_struct *vgdt;
+
+ /* Check the new GDT. */
+ for ( i = 0; i < nr_pages; i++ )
+ {
+ if ( unlikely(frames[i] >= max_page) ||
+ unlikely(!get_page_and_type(&frame_table[frames[i]],
+ p, PGT_gdt_page)) )
+ goto fail;
+ }
+
+ /* Copy reserved GDT entries to the new GDT. */
+ vgdt = map_domain_mem(frames[0] << PAGE_SHIFT);
+ memcpy(vgdt + FIRST_RESERVED_GDT_ENTRY,
+ gdt_table + FIRST_RESERVED_GDT_ENTRY,
+ NR_RESERVED_GDT_ENTRIES*8);
+ unmap_domain_mem(vgdt);
+
+ /* Tear down the old GDT. */
+ for ( i = 0; i < 16; i++ )
+ {
+ if ( (pfn = l1_pgentry_to_pagenr(p->mm.perdomain_pt[i])) != 0 )
+ put_page_and_type(&frame_table[pfn]);
+ p->mm.perdomain_pt[i] = mk_l1_pgentry(0);
+ }
+
+ /* Install the new GDT. */
+ for ( i = 0; i < nr_pages; i++ )
+ p->mm.perdomain_pt[i] =
+ mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR);
+
+ SET_GDT_ADDRESS(p, GDT_VIRT_START);
+ SET_GDT_ENTRIES(p, (entries*8)-1);
+
+ return 0;
+
+ fail:
+ while ( i-- > 0 )
+ put_page_and_type(&frame_table[frames[i]]);
+ return -EINVAL;
+}
+
+
+long do_set_gdt(unsigned long *frame_list, unsigned int entries)
+{
+ int nr_pages = (entries + 511) / 512;
+ unsigned long frames[16];
+ long ret;
+
+ if ( (entries <= LAST_RESERVED_GDT_ENTRY) || (entries > 8192) )
+ return -EINVAL;
+
+ if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) )
+ return -EFAULT;
+
+ if ( (ret = set_gdt(current, frames, entries)) == 0 )
+ {
+ local_flush_tlb();
+ __asm__ __volatile__ ("lgdt %0" : "=m" (*current->mm.gdt));
+ }
+
+ return ret;
+}
+
+
+long do_update_descriptor(
+ unsigned long pa, unsigned long word1, unsigned long word2)
+{
+ unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT;
+ struct pfn_info *page;
+ long ret = -EINVAL;
+
+ if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(word1, word2) )
+ return -EINVAL;
+
+ page = &frame_table[pfn];
+ if ( unlikely(!get_page(page, current)) )
+ goto out;
+
+ /* Check if the given frame is in use in an unsafe context. */
+ switch ( page->type_and_flags & PGT_type_mask )
+ {
+ case PGT_gdt_page:
+ /* Disallow updates of Xen-reserved descriptors in the current GDT. */
+ if ( (l1_pgentry_to_pagenr(current->mm.perdomain_pt[0]) == pfn) &&
+ (((pa&(PAGE_SIZE-1))>>3) >= FIRST_RESERVED_GDT_ENTRY) &&
+ (((pa&(PAGE_SIZE-1))>>3) <= LAST_RESERVED_GDT_ENTRY) )
+ goto out;
+ if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
+ goto out;
+ break;
+ case PGT_ldt_page:
+ if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
+ goto out;
+ break;
+ default:
+ if ( unlikely(!get_page_type(page, PGT_writeable_page)) )
+ goto out;
+ break;
+ }
+
+ /* All is good so make the update. */
+ gdt_pent = map_domain_mem(pa);
+ gdt_pent[0] = word1;
+ gdt_pent[1] = word2;
+ unmap_domain_mem(gdt_pent);
+
+ put_page_type(page);
+
+ ret = 0; /* success */
+
+ out:
+ put_page(page);
+ return ret;
+}
+
+#ifdef MEMORY_GUARD
+
+void *memguard_init(void *heap_start)
+{
+ l1_pgentry_t *l1;
+ int i, j;
+
+ /* Round the allocation pointer up to a page boundary. */
+ heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) &
+ PAGE_MASK);
+
+ /* Memory guarding is incompatible with super pages. */
+ for ( i = 0; i < (MAX_MONITOR_ADDRESS >> L2_PAGETABLE_SHIFT); i++ )
+ {
+ l1 = (l1_pgentry_t *)heap_start;
+ heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE);
+ for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
+ l1[j] = mk_l1_pgentry((i << L2_PAGETABLE_SHIFT) |
+ (j << L1_PAGETABLE_SHIFT) |
+ __PAGE_HYPERVISOR);
+ idle_pg_table[i] = idle_pg_table[i + l2_table_offset(PAGE_OFFSET)] =
+ mk_l2_pgentry(virt_to_phys(l1) | __PAGE_HYPERVISOR);
+ }
+
+ return heap_start;
+}
+
+static void __memguard_change_range(void *p, unsigned long l, int guard)
+{
+ l1_pgentry_t *l1;
+ l2_pgentry_t *l2;
+ unsigned long _p = (unsigned long)p;
+ unsigned long _l = (unsigned long)l;
+
+ /* Ensure we are dealing with a page-aligned whole number of pages. */
+ ASSERT((_p&PAGE_MASK) != 0);
+ ASSERT((_l&PAGE_MASK) != 0);
+ ASSERT((_p&~PAGE_MASK) == 0);
+ ASSERT((_l&~PAGE_MASK) == 0);
+
+ while ( _l != 0 )
+ {
+ l2 = &idle_pg_table[l2_table_offset(_p)];
+ l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
+ if ( guard )
+ *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) & ~_PAGE_PRESENT);
+ else
+ *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) | _PAGE_PRESENT);
+ _p += PAGE_SIZE;
+ _l -= PAGE_SIZE;
+ }
+}
+
+void memguard_guard_range(void *p, unsigned long l)
+{
+ __memguard_change_range(p, l, 1);
+ local_flush_tlb();
+}
+
+void memguard_unguard_range(void *p, unsigned long l)
+{
+ __memguard_change_range(p, l, 0);
+}
+
+int memguard_is_guarded(void *p)
+{
+ l1_pgentry_t *l1;
+ l2_pgentry_t *l2;
+ unsigned long _p = (unsigned long)p;
+ l2 = &idle_pg_table[l2_table_offset(_p)];
+ l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
+ return !(l1_pgentry_val(*l1) & _PAGE_PRESENT);
+}
+
+#endif
--- /dev/null
+/*
+ * Intel Multiprocessor Specificiation 1.1 and 1.4
+ * compliant MP-table parsing routines.
+ *
+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ * Fixes
+ * Erich Boleyn : MP v1.4 and additional changes.
+ * Alan Cox : Added EBDA scanning
+ * Ingo Molnar : various cleanups and rewrites
+ * Maciej W. Rozycki: Bits for default MP configurations
+ * Paul Diefenbaugh: Added full ACPI support
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/kernel.h>
+#include <xen/irq.h>
+#include <xen/smp.h>
+#include <xen/mm.h>
+#include <xen/acpi.h>
+#include <asm/acpi.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/mpspec.h>
+#include <asm/flushtlb.h>
+#include <asm/smpboot.h>
+
+int numnodes = 1; /* XXX Xen */
+
+/* Have we found an MP table */
+int smp_found_config;
+
+/*
+ * Various Linux-internal data structures created from the
+ * MP-table.
+ */
+int apic_version [MAX_APICS];
+int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+int mp_current_pci_id;
+int *mp_bus_id_to_type;
+int *mp_bus_id_to_node;
+int *mp_bus_id_to_local;
+int *mp_bus_id_to_pci_bus;
+int max_mp_busses;
+int max_irq_sources;
+
+/* I/O APIC entries */
+struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+struct mpc_config_intsrc *mp_irqs;
+
+/* MP IRQ source entries */
+int mp_irq_entries;
+
+int nr_ioapics;
+
+int pic_mode;
+unsigned long mp_lapic_addr;
+
+/* Processor that is doing the boot up */
+unsigned int boot_cpu_physical_apicid = -1U;
+unsigned int boot_cpu_logical_apicid = -1U;
+/* Internal processor count */
+static unsigned int num_processors;
+
+/* Bitmask of physically existing CPUs */
+unsigned long phys_cpu_present_map;
+unsigned long logical_cpu_present_map;
+
+#ifdef CONFIG_X86_CLUSTERED_APIC
+unsigned char esr_disable = 0;
+unsigned char clustered_apic_mode = CLUSTERED_APIC_NONE;
+unsigned int apic_broadcast_id = APIC_BROADCAST_ID_APIC;
+#endif
+unsigned char raw_phys_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+/*
+ * Intel MP BIOS table parsing routines:
+ */
+
+#ifndef CONFIG_X86_VISWS_APIC
+/*
+ * Checksum an MP configuration block.
+ */
+
+static int __init mpf_checksum(unsigned char *mp, int len)
+{
+ int sum = 0;
+
+ while (len--)
+ sum += *mp++;
+
+ return sum & 0xFF;
+}
+
+/*
+ * Processor encoding in an MP configuration block
+ */
+
+static char __init *mpc_family(int family,int model)
+{
+ static char n[32];
+ static char *model_defs[]=
+ {
+ "80486DX","80486DX",
+ "80486SX","80486DX/2 or 80487",
+ "80486SL","80486SX/2",
+ "Unknown","80486DX/2-WB",
+ "80486DX/4","80486DX/4-WB"
+ };
+
+ switch (family) {
+ case 0x04:
+ if (model < 10)
+ return model_defs[model];
+ break;
+
+ case 0x05:
+ return("Pentium(tm)");
+
+ case 0x06:
+ return("Pentium(tm) Pro");
+
+ case 0x0F:
+ if (model == 0x00)
+ return("Pentium 4(tm)");
+ if (model == 0x01)
+ return("Pentium 4(tm)");
+ if (model == 0x02)
+ return("Pentium 4(tm) XEON(tm)");
+ if (model == 0x0F)
+ return("Special controller");
+ }
+ sprintf(n,"Unknown CPU [%d:%d]",family, model);
+ return n;
+}
+
+/*
+ * Have to match translation table entries to main table entries by counter
+ * hence the mpc_record variable .... can't see a less disgusting way of
+ * doing this ....
+ */
+
+static int mpc_record;
+static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata;
+
+void __init MP_processor_info (struct mpc_config_processor *m)
+{
+ int ver, quad, logical_apicid;
+
+ if (!(m->mpc_cpuflag & CPU_ENABLED))
+ return;
+
+ logical_apicid = m->mpc_apicid;
+ if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
+ quad = translation_table[mpc_record]->trans_quad;
+ logical_apicid = (quad << 4) +
+ (m->mpc_apicid ? m->mpc_apicid << 1 : 1);
+ printk("Processor #%d %s APIC version %d (quad %d, apic %d)\n",
+ m->mpc_apicid,
+ mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 ,
+ (m->mpc_cpufeature & CPU_MODEL_MASK)>>4),
+ m->mpc_apicver, quad, logical_apicid);
+ } else {
+ printk("Processor #%d %s APIC version %d\n",
+ m->mpc_apicid,
+ mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 ,
+ (m->mpc_cpufeature & CPU_MODEL_MASK)>>4),
+ m->mpc_apicver);
+ }
+
+ if (m->mpc_featureflag&(1<<0))
+ Dprintk(" Floating point unit present.\n");
+ if (m->mpc_featureflag&(1<<7))
+ Dprintk(" Machine Exception supported.\n");
+ if (m->mpc_featureflag&(1<<8))
+ Dprintk(" 64 bit compare & exchange supported.\n");
+ if (m->mpc_featureflag&(1<<9))
+ Dprintk(" Internal APIC present.\n");
+ if (m->mpc_featureflag&(1<<11))
+ Dprintk(" SEP present.\n");
+ if (m->mpc_featureflag&(1<<12))
+ Dprintk(" MTRR present.\n");
+ if (m->mpc_featureflag&(1<<13))
+ Dprintk(" PGE present.\n");
+ if (m->mpc_featureflag&(1<<14))
+ Dprintk(" MCA present.\n");
+ if (m->mpc_featureflag&(1<<15))
+ Dprintk(" CMOV present.\n");
+ if (m->mpc_featureflag&(1<<16))
+ Dprintk(" PAT present.\n");
+ if (m->mpc_featureflag&(1<<17))
+ Dprintk(" PSE present.\n");
+ if (m->mpc_featureflag&(1<<18))
+ Dprintk(" PSN present.\n");
+ if (m->mpc_featureflag&(1<<19))
+ Dprintk(" Cache Line Flush Instruction present.\n");
+ /* 20 Reserved */
+ if (m->mpc_featureflag&(1<<21))
+ Dprintk(" Debug Trace and EMON Store present.\n");
+ if (m->mpc_featureflag&(1<<22))
+ Dprintk(" ACPI Thermal Throttle Registers present.\n");
+ if (m->mpc_featureflag&(1<<23))
+ Dprintk(" MMX present.\n");
+ if (m->mpc_featureflag&(1<<24))
+ Dprintk(" FXSR present.\n");
+ if (m->mpc_featureflag&(1<<25))
+ Dprintk(" XMM present.\n");
+ if (m->mpc_featureflag&(1<<26))
+ Dprintk(" Willamette New Instructions present.\n");
+ if (m->mpc_featureflag&(1<<27))
+ Dprintk(" Self Snoop present.\n");
+ if (m->mpc_featureflag&(1<<28))
+ Dprintk(" HT present.\n");
+ if (m->mpc_featureflag&(1<<29))
+ Dprintk(" Thermal Monitor present.\n");
+ /* 30, 31 Reserved */
+
+
+ if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+ Dprintk(" Bootup CPU\n");
+ boot_cpu_physical_apicid = m->mpc_apicid;
+ boot_cpu_logical_apicid = logical_apicid;
+ }
+
+ if (num_processors >= NR_CPUS){
+ printk(KERN_WARNING "NR_CPUS limit of %i reached. Cannot "
+ "boot CPU(apicid 0x%x).\n", NR_CPUS, m->mpc_apicid);
+ return;
+ }
+ num_processors++;
+
+ if (m->mpc_apicid > MAX_APICS) {
+ printk("Processor #%d INVALID. (Max ID: %d).\n",
+ m->mpc_apicid, MAX_APICS);
+ --num_processors;
+ return;
+ }
+ ver = m->mpc_apicver;
+
+ logical_cpu_present_map |= 1 << (num_processors-1);
+ phys_cpu_present_map |= apicid_to_phys_cpu_present(m->mpc_apicid);
+
+ /*
+ * Validate version
+ */
+ if (ver == 0x0) {
+ printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
+ ver = 0x10;
+ }
+ apic_version[m->mpc_apicid] = ver;
+ raw_phys_apicid[num_processors - 1] = m->mpc_apicid;
+}
+
+static void __init MP_bus_info (struct mpc_config_bus *m)
+{
+ char str[7];
+ int quad;
+
+ memcpy(str, m->mpc_bustype, 6);
+ str[6] = 0;
+
+ if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
+ quad = translation_table[mpc_record]->trans_quad;
+ mp_bus_id_to_node[m->mpc_busid] = quad;
+ mp_bus_id_to_local[m->mpc_busid] = translation_table[mpc_record]->trans_local;
+ quad_local_to_mp_bus_id[quad][translation_table[mpc_record]->trans_local] = m->mpc_busid;
+ printk("Bus #%d is %s (node %d)\n", m->mpc_busid, str, quad);
+ } else {
+ Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
+ }
+
+ if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+ } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
+ } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
+ mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
+ mp_current_pci_id++;
+ } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
+ } else {
+ printk("Unknown bustype %s - ignoring\n", str);
+ }
+}
+
+static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
+{
+ if (!(m->mpc_flags & MPC_APIC_USABLE))
+ return;
+
+ printk("I/O APIC #%d Version %d at 0x%X.\n",
+ m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
+ if (nr_ioapics >= MAX_IO_APICS) {
+ printk("Max # of I/O APICs (%d) exceeded (found %d).\n",
+ MAX_IO_APICS, nr_ioapics);
+ panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
+ }
+ if (!m->mpc_apicaddr) {
+ printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
+ " found in MP table, skipping!\n");
+ return;
+ }
+ mp_ioapics[nr_ioapics] = *m;
+ nr_ioapics++;
+}
+
+static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
+{
+ mp_irqs [mp_irq_entries] = *m;
+ Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
+ " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+ m->mpc_irqtype, m->mpc_irqflag & 3,
+ (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
+ m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+ if (++mp_irq_entries == max_irq_sources)
+ panic("Max # of irq sources exceeded!!\n");
+}
+
+static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
+{
+ Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
+ " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
+ m->mpc_irqtype, m->mpc_irqflag & 3,
+ (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+ m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+ /*
+ * Well it seems all SMP boards in existence
+ * use ExtINT/LVT1 == LINT0 and
+ * NMI/LVT2 == LINT1 - the following check
+ * will show us if this assumptions is false.
+ * Until then we do not have to add baggage.
+ */
+ if ((m->mpc_irqtype == mp_ExtINT) &&
+ (m->mpc_destapiclint != 0))
+ BUG();
+ if ((m->mpc_irqtype == mp_NMI) &&
+ (m->mpc_destapiclint != 1))
+ BUG();
+}
+
+static void __init MP_translation_info (struct mpc_config_translation *m)
+{
+ printk("Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
+
+ if (mpc_record >= MAX_MPC_ENTRY)
+ printk("MAX_MPC_ENTRY exceeded!\n");
+ else
+ translation_table[mpc_record] = m; /* stash this for later */
+ if (m->trans_quad+1 > numnodes)
+ numnodes = m->trans_quad+1;
+}
+
+/*
+ * Read/parse the MPC oem tables
+ */
+
+static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
+ unsigned short oemsize)
+{
+ int count = sizeof (*oemtable); /* the header size */
+ unsigned char *oemptr = ((unsigned char *)oemtable)+count;
+
+ printk("Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
+ if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
+ {
+ printk("SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
+ oemtable->oem_signature[0],
+ oemtable->oem_signature[1],
+ oemtable->oem_signature[2],
+ oemtable->oem_signature[3]);
+ return;
+ }
+ if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
+ {
+ printk("SMP oem mptable: checksum error!\n");
+ return;
+ }
+ while (count < oemtable->oem_length) {
+ switch (*oemptr) {
+ case MP_TRANSLATION:
+ {
+ struct mpc_config_translation *m=
+ (struct mpc_config_translation *)oemptr;
+ MP_translation_info(m);
+ oemptr += sizeof(*m);
+ count += sizeof(*m);
+ ++mpc_record;
+ break;
+ }
+ default:
+ {
+ printk("Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
+ return;
+ }
+ }
+ }
+}
+
+/*
+ * Read/parse the MPC
+ */
+
+static int __init smp_read_mpc(struct mp_config_table *mpc)
+{
+ char oem[16], prod[14];
+ int count=sizeof(*mpc);
+ unsigned char *mpt=((unsigned char *)mpc)+count;
+ int num_bus = 0;
+ int num_irq = 0;
+ unsigned char *bus_data;
+
+ if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
+ panic("SMP mptable: bad signature [%c%c%c%c]!\n",
+ mpc->mpc_signature[0],
+ mpc->mpc_signature[1],
+ mpc->mpc_signature[2],
+ mpc->mpc_signature[3]);
+ return 0;
+ }
+ if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
+ panic("SMP mptable: checksum error!\n");
+ return 0;
+ }
+ if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
+ printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
+ mpc->mpc_spec);
+ return 0;
+ }
+ if (!mpc->mpc_lapic) {
+ printk(KERN_ERR "SMP mptable: null local APIC address!\n");
+ return 0;
+ }
+ memcpy(oem,mpc->mpc_oem,8);
+ oem[8]=0;
+ printk("OEM ID: %s ",oem);
+
+ memcpy(prod,mpc->mpc_productid,12);
+ prod[12]=0;
+ printk("Product ID: %s ",prod);
+
+ detect_clustered_apic(oem, prod);
+
+ printk("APIC at: 0x%X\n",mpc->mpc_lapic);
+
+ /*
+ * Save the local APIC address (it might be non-default) -- but only
+ * if we're not using ACPI.
+ */
+ if (!acpi_lapic)
+ mp_lapic_addr = mpc->mpc_lapic;
+
+ if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) && mpc->mpc_oemptr) {
+ /* We need to process the oem mpc tables to tell us which quad things are in ... */
+ mpc_record = 0;
+ smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr, mpc->mpc_oemsize);
+ mpc_record = 0;
+ }
+
+ /* Pre-scan to determine the number of bus and
+ * interrupts records we have
+ */
+ while (count < mpc->mpc_length) {
+ switch (*mpt) {
+ case MP_PROCESSOR:
+ mpt += sizeof(struct mpc_config_processor);
+ count += sizeof(struct mpc_config_processor);
+ break;
+ case MP_BUS:
+ ++num_bus;
+ mpt += sizeof(struct mpc_config_bus);
+ count += sizeof(struct mpc_config_bus);
+ break;
+ case MP_INTSRC:
+ ++num_irq;
+ mpt += sizeof(struct mpc_config_intsrc);
+ count += sizeof(struct mpc_config_intsrc);
+ break;
+ case MP_IOAPIC:
+ mpt += sizeof(struct mpc_config_ioapic);
+ count += sizeof(struct mpc_config_ioapic);
+ break;
+ case MP_LINTSRC:
+ mpt += sizeof(struct mpc_config_lintsrc);
+ count += sizeof(struct mpc_config_lintsrc);
+ break;
+ default:
+ count = mpc->mpc_length;
+ break;
+ }
+ }
+ /*
+ * Paranoia: Allocate one extra of both the number of busses and number
+ * of irqs, and make sure that we have at least 4 interrupts per PCI
+ * slot. But some machines do not report very many busses, so we need
+ * to fall back on the older defaults.
+ */
+ ++num_bus;
+ max_mp_busses = max(num_bus, MAX_MP_BUSSES);
+ if (num_irq < (4 * max_mp_busses))
+ num_irq = 4 * num_bus; /* 4 intr/PCI slot */
+ ++num_irq;
+ max_irq_sources = max(num_irq, MAX_IRQ_SOURCES);
+
+ count = (max_mp_busses * sizeof(int)) * 4;
+ count += (max_irq_sources * sizeof(struct mpc_config_intsrc));
+ bus_data = (void *)__get_free_pages(GFP_KERNEL, get_order(count));
+ if (!bus_data) {
+ printk(KERN_ERR "SMP mptable: out of memory!\n");
+ return 0;
+ }
+ mp_bus_id_to_type = (int *)&bus_data[0];
+ mp_bus_id_to_node = (int *)&bus_data[(max_mp_busses * sizeof(int))];
+ mp_bus_id_to_local = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 2];
+ mp_bus_id_to_pci_bus = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 3];
+ mp_irqs = (struct mpc_config_intsrc *)&bus_data[(max_mp_busses * sizeof(int)) * 4];
+ memset(mp_bus_id_to_pci_bus, -1, max_mp_busses * sizeof(int));
+
+ /*
+ * Now process the configuration blocks.
+ */
+ count = sizeof(*mpc);
+ mpt = ((unsigned char *)mpc)+count;
+ while (count < mpc->mpc_length) {
+ switch(*mpt) {
+ case MP_PROCESSOR:
+ {
+ struct mpc_config_processor *m=
+ (struct mpc_config_processor *)mpt;
+ /* ACPI may have already provided this data */
+ if (!acpi_lapic)
+ MP_processor_info(m);
+ mpt += sizeof(*m);
+ count += sizeof(*m);
+ break;
+ }
+ case MP_BUS:
+ {
+ struct mpc_config_bus *m=
+ (struct mpc_config_bus *)mpt;
+ MP_bus_info(m);
+ mpt += sizeof(*m);
+ count += sizeof(*m);
+ break;
+ }
+ case MP_IOAPIC:
+ {
+ struct mpc_config_ioapic *m=
+ (struct mpc_config_ioapic *)mpt;
+ MP_ioapic_info(m);
+ mpt+=sizeof(*m);
+ count+=sizeof(*m);
+ break;
+ }
+ case MP_INTSRC:
+ {
+ struct mpc_config_intsrc *m=
+ (struct mpc_config_intsrc *)mpt;
+
+ MP_intsrc_info(m);
+ mpt+=sizeof(*m);
+ count+=sizeof(*m);
+ break;
+ }
+ case MP_LINTSRC:
+ {
+ struct mpc_config_lintsrc *m=
+ (struct mpc_config_lintsrc *)mpt;
+ MP_lintsrc_info(m);
+ mpt+=sizeof(*m);
+ count+=sizeof(*m);
+ break;
+ }
+ default:
+ {
+ count = mpc->mpc_length;
+ break;
+ }
+ }
+ ++mpc_record;
+ }
+
+ if (clustered_apic_mode){
+ phys_cpu_present_map = logical_cpu_present_map;
+ }
+
+
+ printk("Enabling APIC mode: ");
+ if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
+ printk("Clustered Logical. ");
+ else if(clustered_apic_mode == CLUSTERED_APIC_XAPIC)
+ printk("Physical. ");
+ else
+ printk("Flat. ");
+ printk("Using %d I/O APICs\n",nr_ioapics);
+
+ if (!num_processors)
+ printk(KERN_ERR "SMP mptable: no processors registered!\n");
+ return num_processors;
+}
+
+static int __init ELCR_trigger(unsigned int irq)
+{
+ unsigned int port;
+
+ port = 0x4d0 + (irq >> 3);
+ return (inb(port) >> (irq & 7)) & 1;
+}
+
+static void __init construct_default_ioirq_mptable(int mpc_default_type)
+{
+ struct mpc_config_intsrc intsrc;
+ int i;
+ int ELCR_fallback = 0;
+
+ intsrc.mpc_type = MP_INTSRC;
+ intsrc.mpc_irqflag = 0; /* conforming */
+ intsrc.mpc_srcbus = 0;
+ intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
+
+ intsrc.mpc_irqtype = mp_INT;
+
+ /*
+ * If true, we have an ISA/PCI system with no IRQ entries
+ * in the MP table. To prevent the PCI interrupts from being set up
+ * incorrectly, we try to use the ELCR. The sanity check to see if
+ * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
+ * never be level sensitive, so we simply see if the ELCR agrees.
+ * If it does, we assume it's valid.
+ */
+ if (mpc_default_type == 5) {
+ printk("ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
+
+ if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
+ printk("ELCR contains invalid data... not using ELCR\n");
+ else {
+ printk("Using ELCR to identify PCI interrupts\n");
+ ELCR_fallback = 1;
+ }
+ }
+
+ for (i = 0; i < 16; i++) {
+ switch (mpc_default_type) {
+ case 2:
+ if (i == 0 || i == 13)
+ continue; /* IRQ0 & IRQ13 not connected */
+ /* fall through */
+ default:
+ if (i == 2)
+ continue; /* IRQ2 is never connected */
+ }
+
+ if (ELCR_fallback) {
+ /*
+ * If the ELCR indicates a level-sensitive interrupt, we
+ * copy that information over to the MP table in the
+ * irqflag field (level sensitive, active high polarity).
+ */
+ if (ELCR_trigger(i))
+ intsrc.mpc_irqflag = 13;
+ else
+ intsrc.mpc_irqflag = 0;
+ }
+
+ intsrc.mpc_srcbusirq = i;
+ intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
+ MP_intsrc_info(&intsrc);
+ }
+
+ intsrc.mpc_irqtype = mp_ExtINT;
+ intsrc.mpc_srcbusirq = 0;
+ intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
+ MP_intsrc_info(&intsrc);
+}
+
+static inline void __init construct_default_ISA_mptable(int mpc_default_type)
+{
+ struct mpc_config_processor processor;
+ struct mpc_config_bus bus;
+ struct mpc_config_ioapic ioapic;
+ struct mpc_config_lintsrc lintsrc;
+ int linttypes[2] = { mp_ExtINT, mp_NMI };
+ int i;
+ struct {
+ int mp_bus_id_to_type[MAX_MP_BUSSES];
+ int mp_bus_id_to_node[MAX_MP_BUSSES];
+ int mp_bus_id_to_local[MAX_MP_BUSSES];
+ int mp_bus_id_to_pci_bus[MAX_MP_BUSSES];
+ struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+ } *bus_data;
+
+ bus_data = (void *)__get_free_pages(GFP_KERNEL, get_order(sizeof(*bus_data)));
+ if (!bus_data)
+ panic("SMP mptable: out of memory!\n");
+ mp_bus_id_to_type = bus_data->mp_bus_id_to_type;
+ mp_bus_id_to_node = bus_data->mp_bus_id_to_node;
+ mp_bus_id_to_local = bus_data->mp_bus_id_to_local;
+ mp_bus_id_to_pci_bus = bus_data->mp_bus_id_to_pci_bus;
+ mp_irqs = bus_data->mp_irqs;
+ for (i = 0; i < MAX_MP_BUSSES; ++i)
+ mp_bus_id_to_pci_bus[i] = -1;
+
+ /*
+ * local APIC has default address
+ */
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+ /*
+ * 2 CPUs, numbered 0 & 1.
+ */
+ processor.mpc_type = MP_PROCESSOR;
+ /* Either an integrated APIC or a discrete 82489DX. */
+ processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+ processor.mpc_cpuflag = CPU_ENABLED;
+ processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+ (boot_cpu_data.x86_model << 4) |
+ boot_cpu_data.x86_mask;
+ processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+ processor.mpc_reserved[0] = 0;
+ processor.mpc_reserved[1] = 0;
+ for (i = 0; i < 2; i++) {
+ processor.mpc_apicid = i;
+ MP_processor_info(&processor);
+ }
+
+ bus.mpc_type = MP_BUS;
+ bus.mpc_busid = 0;
+ switch (mpc_default_type) {
+ default:
+ printk("???\nUnknown standard configuration %d\n",
+ mpc_default_type);
+ /* fall through */
+ case 1:
+ case 5:
+ memcpy(bus.mpc_bustype, "ISA ", 6);
+ break;
+ case 2:
+ case 6:
+ case 3:
+ memcpy(bus.mpc_bustype, "EISA ", 6);
+ break;
+ case 4:
+ case 7:
+ memcpy(bus.mpc_bustype, "MCA ", 6);
+ }
+ MP_bus_info(&bus);
+ if (mpc_default_type > 4) {
+ bus.mpc_busid = 1;
+ memcpy(bus.mpc_bustype, "PCI ", 6);
+ MP_bus_info(&bus);
+ }
+
+ ioapic.mpc_type = MP_IOAPIC;
+ ioapic.mpc_apicid = 2;
+ ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+ ioapic.mpc_flags = MPC_APIC_USABLE;
+ ioapic.mpc_apicaddr = 0xFEC00000;
+ MP_ioapic_info(&ioapic);
+
+ /*
+ * We set up most of the low 16 IO-APIC pins according to MPS rules.
+ */
+ construct_default_ioirq_mptable(mpc_default_type);
+
+ lintsrc.mpc_type = MP_LINTSRC;
+ lintsrc.mpc_irqflag = 0; /* conforming */
+ lintsrc.mpc_srcbusid = 0;
+ lintsrc.mpc_srcbusirq = 0;
+ lintsrc.mpc_destapic = MP_APIC_ALL;
+ for (i = 0; i < 2; i++) {
+ lintsrc.mpc_irqtype = linttypes[i];
+ lintsrc.mpc_destapiclint = i;
+ MP_lintsrc_info(&lintsrc);
+ }
+}
+
+static struct intel_mp_floating *mpf_found;
+
+/*
+ * Scan the memory blocks for an SMP configuration block.
+ */
+void __init get_smp_config (void)
+{
+ struct intel_mp_floating *mpf = mpf_found;
+
+ /*
+ * ACPI may be used to obtain the entire SMP configuration or just to
+ * enumerate/configure processors (CONFIG_ACPI_HT_ONLY). Note that
+ * ACPI supports both logical (e.g. Hyper-Threading) and physical
+ * processors, where MPS only supports physical.
+ */
+ if (acpi_lapic && acpi_ioapic) {
+ printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
+ return;
+ }
+ else if (acpi_lapic)
+ printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
+
+ printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
+ if (mpf->mpf_feature2 & (1<<7)) {
+ printk(" IMCR and PIC compatibility mode.\n");
+ pic_mode = 1;
+ } else {
+ printk(" Virtual Wire compatibility mode.\n");
+ pic_mode = 0;
+ }
+
+ /*
+ * Now see if we need to read further.
+ */
+ if (mpf->mpf_feature1 != 0) {
+
+ printk("Default MP configuration #%d\n", mpf->mpf_feature1);
+ construct_default_ISA_mptable(mpf->mpf_feature1);
+
+ } else if (mpf->mpf_physptr) {
+
+ /*
+ * Read the physical hardware table. Anything here will
+ * override the defaults.
+ */
+ if (!smp_read_mpc((void *)mpf->mpf_physptr)) {
+ smp_found_config = 0;
+ printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
+ printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
+ return;
+ }
+ /*
+ * If there are no explicit MP IRQ entries, then we are
+ * broken. We set up most of the low 16 IO-APIC pins to
+ * ISA defaults and hope it will work.
+ */
+ if (!mp_irq_entries) {
+ struct mpc_config_bus bus;
+
+ printk("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
+
+ bus.mpc_type = MP_BUS;
+ bus.mpc_busid = 0;
+ memcpy(bus.mpc_bustype, "ISA ", 6);
+ MP_bus_info(&bus);
+
+ construct_default_ioirq_mptable(0);
+ }
+
+ } else
+ BUG();
+
+ printk("Processors: %d\n", num_processors);
+ /*
+ * Only use the first configuration found.
+ */
+}
+
+static int __init smp_scan_config (unsigned long base, unsigned long length)
+{
+ unsigned long *bp = phys_to_virt(base);
+ struct intel_mp_floating *mpf;
+
+ Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
+ if (sizeof(*mpf) != 16)
+ printk("Error: MPF size\n");
+
+ while (length > 0) {
+ mpf = (struct intel_mp_floating *)bp;
+ if ((*bp == SMP_MAGIC_IDENT) &&
+ (mpf->mpf_length == 1) &&
+ !mpf_checksum((unsigned char *)bp, 16) &&
+ ((mpf->mpf_specification == 1)
+ || (mpf->mpf_specification == 4)) ) {
+
+ smp_found_config = 1;
+ printk("found SMP MP-table at %08lx\n",
+ virt_to_phys(mpf));
+ reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
+ if (mpf->mpf_physptr)
+ reserve_bootmem((unsigned long)mpf->mpf_physptr, PAGE_SIZE);
+ mpf_found = mpf;
+ return 1;
+ }
+ bp += 4;
+ length -= 16;
+ }
+ return 0;
+}
+
+void __init find_intel_smp (void)
+{
+ unsigned int address;
+
+ /*
+ * FIXME: Linux assumes you have 640K of base ram..
+ * this continues the error...
+ *
+ * 1) Scan the bottom 1K for a signature
+ * 2) Scan the top 1K of base RAM
+ * 3) Scan the 64K of bios
+ */
+ if (smp_scan_config(0x0,0x400) ||
+ smp_scan_config(639*0x400,0x400) ||
+ smp_scan_config(0xF0000,0x10000))
+ return;
+ /*
+ * If it is an SMP machine we should know now, unless the
+ * configuration is in an EISA/MCA bus machine with an
+ * extended bios data area.
+ *
+ * there is a real-mode segmented pointer pointing to the
+ * 4K EBDA area at 0x40E, calculate and scan it here.
+ *
+ * NOTE! There were Linux loaders that will corrupt the EBDA
+ * area, and as such this kind of SMP config may be less
+ * trustworthy, simply because the SMP table may have been
+ * stomped on during early boot. Thankfully the bootloaders
+ * now honour the EBDA.
+ */
+
+ address = *(unsigned short *)phys_to_virt(0x40E);
+ address <<= 4;
+ smp_scan_config(address, 0x1000);
+}
+
+#else
+
+/*
+ * The Visual Workstation is Intel MP compliant in the hardware
+ * sense, but it doesn't have a BIOS(-configuration table).
+ * No problem for Linux.
+ */
+void __init find_visws_smp(void)
+{
+ smp_found_config = 1;
+
+ phys_cpu_present_map |= 2; /* or in id 1 */
+ apic_version[1] |= 0x10; /* integrated APIC */
+ apic_version[0] |= 0x10;
+
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+}
+
+#endif
+
+/*
+ * - Intel MP Configuration Table
+ * - or SGI Visual Workstation configuration
+ */
+void __init find_smp_config (void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+ find_intel_smp();
+#endif
+#ifdef CONFIG_VISWS
+ find_visws_smp();
+#endif
+}
+
+
+/* --------------------------------------------------------------------------
+ ACPI-based MP Configuration
+ -------------------------------------------------------------------------- */
+
+#ifdef CONFIG_ACPI_BOOT
+
+void __init mp_register_lapic_address (
+ u64 address)
+{
+ mp_lapic_addr = (unsigned long) address;
+
+ set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
+
+ if (boot_cpu_physical_apicid == -1U)
+ boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+
+ Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
+}
+
+
+void __init mp_register_lapic (
+ u8 id,
+ u8 enabled)
+{
+ struct mpc_config_processor processor;
+ int boot_cpu = 0;
+
+ if (id >= MAX_APICS) {
+ printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
+ id, MAX_APICS);
+ return;
+ }
+
+ if (id == boot_cpu_physical_apicid)
+ boot_cpu = 1;
+
+ processor.mpc_type = MP_PROCESSOR;
+ processor.mpc_apicid = id;
+
+ /*
+ * mp_register_lapic_address() which is called before the
+ * current function does the fixmap of FIX_APIC_BASE.
+ * Read in the correct APIC version from there
+ */
+ processor.mpc_apicver = apic_read(APIC_LVR);
+
+ processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
+ processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
+ processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+ (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
+ processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+ processor.mpc_reserved[0] = 0;
+ processor.mpc_reserved[1] = 0;
+
+ MP_processor_info(&processor);
+}
+
+#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
+
+#define MP_ISA_BUS 0
+#define MP_MAX_IOAPIC_PIN 127
+
+struct mp_ioapic_routing {
+ int apic_id;
+ int irq_start;
+ int irq_end;
+ u32 pin_programmed[4];
+} mp_ioapic_routing[MAX_IO_APICS];
+
+
+static int __init mp_find_ioapic (
+ int irq)
+{
+ int i = 0;
+
+ /* Find the IOAPIC that manages this IRQ. */
+ for (i = 0; i < nr_ioapics; i++) {
+ if ((irq >= mp_ioapic_routing[i].irq_start)
+ && (irq <= mp_ioapic_routing[i].irq_end))
+ return i;
+ }
+
+ printk(KERN_ERR "ERROR: Unable to locate IOAPIC for IRQ %d\n", irq);
+
+ return -1;
+}
+
+
+void __init mp_register_ioapic (
+ u8 id,
+ u32 address,
+ u32 irq_base)
+{
+ int idx = 0;
+
+ if (nr_ioapics >= MAX_IO_APICS) {
+ printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
+ "(found %d)\n", MAX_IO_APICS, nr_ioapics);
+ panic("Recompile kernel with bigger MAX_IO_APICS!\n");
+ }
+ if (!address) {
+ printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
+ " found in MADT table, skipping!\n");
+ return;
+ }
+
+ idx = nr_ioapics++;
+
+ mp_ioapics[idx].mpc_type = MP_IOAPIC;
+ mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
+ mp_ioapics[idx].mpc_apicaddr = address;
+
+ set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+ mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id);
+ mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
+
+ /*
+ * Build basic IRQ lookup table to facilitate irq->io_apic lookups
+ * and to prevent reprogramming of IOAPIC pins (PCI IRQs).
+ */
+ mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
+ mp_ioapic_routing[idx].irq_start = irq_base;
+ mp_ioapic_routing[idx].irq_end = irq_base +
+ io_apic_get_redir_entries(idx);
+
+ printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
+ "IRQ %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
+ mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
+ mp_ioapic_routing[idx].irq_start,
+ mp_ioapic_routing[idx].irq_end);
+
+ return;
+}
+
+
+void __init mp_override_legacy_irq (
+ u8 bus_irq,
+ u8 polarity,
+ u8 trigger,
+ u32 global_irq)
+{
+ struct mpc_config_intsrc intsrc;
+ int i = 0;
+ int found = 0;
+ int ioapic = -1;
+ int pin = -1;
+
+ /*
+ * Convert 'global_irq' to 'ioapic.pin'.
+ */
+ ioapic = mp_find_ioapic(global_irq);
+ if (ioapic < 0)
+ return;
+ pin = global_irq - mp_ioapic_routing[ioapic].irq_start;
+
+ /*
+ * TBD: This check is for faulty timer entries, where the override
+ * erroneously sets the trigger to level, resulting in a HUGE
+ * increase of timer interrupts!
+ */
+ if ((bus_irq == 0) && (global_irq == 2) && (trigger == 3))
+ trigger = 1;
+
+ intsrc.mpc_type = MP_INTSRC;
+ intsrc.mpc_irqtype = mp_INT;
+ intsrc.mpc_irqflag = (trigger << 2) | polarity;
+ intsrc.mpc_srcbus = MP_ISA_BUS;
+ intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
+ intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
+ intsrc.mpc_dstirq = pin; /* INTIN# */
+
+ Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
+ intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
+ (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
+ intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
+
+ /*
+ * If an existing [IOAPIC.PIN -> IRQ] routing entry exists we override it.
+ * Otherwise create a new entry (e.g. global_irq == 2).
+ */
+ for (i = 0; i < mp_irq_entries; i++) {
+ if ((mp_irqs[i].mpc_dstapic == intsrc.mpc_dstapic)
+ && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) {
+ mp_irqs[i] = intsrc;
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ mp_irqs[mp_irq_entries] = intsrc;
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!\n");
+ }
+
+ return;
+}
+
+
+void __init mp_config_acpi_legacy_irqs (void)
+{
+ int i = 0;
+ int ioapic = -1;
+
+ /*
+ * Initialize mp_irqs for IRQ configuration.
+ */
+ unsigned char *bus_data;
+ int count;
+
+ count = (MAX_MP_BUSSES * sizeof(int)) * 4;
+ count += (MAX_IRQ_SOURCES * sizeof(int)) * 4;
+ bus_data = (void *)__get_free_pages(GFP_KERNEL, get_order(count));
+ if (!bus_data) {
+ panic("Fatal: can't allocate bus memory for ACPI legacy IRQ!");
+ }
+ mp_bus_id_to_type = (int *)&bus_data[0];
+ mp_bus_id_to_node = (int *)&bus_data[(MAX_MP_BUSSES * sizeof(int))];
+ mp_bus_id_to_local = (int *)&bus_data[(MAX_MP_BUSSES * sizeof(int)) * 2];
+ mp_bus_id_to_pci_bus = (int *)&bus_data[(MAX_MP_BUSSES * sizeof(int)) * 3];
+ mp_irqs = (struct mpc_config_intsrc *)&bus_data[(MAX_MP_BUSSES * sizeof(int)) * 4];
+ for (i = 0; i < MAX_MP_BUSSES; ++i)
+ mp_bus_id_to_pci_bus[i] = -1;
+
+ /*
+ * Fabricate the legacy ISA bus (bus #31).
+ */
+ mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
+ Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
+
+ /*
+ * Locate the IOAPIC that manages the ISA IRQs (0-15).
+ */
+ ioapic = mp_find_ioapic(0);
+ if (ioapic < 0)
+ return;
+
+ /*
+ * Use the default configuration for the IRQs 0-15. These may be
+ * overriden by (MADT) interrupt source override entries.
+ */
+ for (i = 0; i < 16; i++) {
+
+ if (i == 2) continue; /* Don't connect IRQ2 */
+
+ mp_irqs[mp_irq_entries].mpc_type = MP_INTSRC;
+ mp_irqs[mp_irq_entries].mpc_irqflag = 0; /* Conforming */
+ mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS;
+ mp_irqs[mp_irq_entries].mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
+ mp_irqs[mp_irq_entries].mpc_irqtype = i ? mp_INT : mp_ExtINT; /* 8259A to #0 */
+ mp_irqs[mp_irq_entries].mpc_srcbusirq = i; /* Identity mapped */
+ mp_irqs[mp_irq_entries].mpc_dstirq = i;
+
+ Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
+ "%d-%d\n",
+ mp_irqs[mp_irq_entries].mpc_irqtype,
+ mp_irqs[mp_irq_entries].mpc_irqflag & 3,
+ (mp_irqs[mp_irq_entries].mpc_irqflag >> 2) & 3,
+ mp_irqs[mp_irq_entries].mpc_srcbus,
+ mp_irqs[mp_irq_entries].mpc_srcbusirq,
+ mp_irqs[mp_irq_entries].mpc_dstapic,
+ mp_irqs[mp_irq_entries].mpc_dstirq);
+
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!\n");
+ }
+}
+
+/*extern FADT_DESCRIPTOR acpi_fadt;*/
+
+void __init mp_config_ioapic_for_sci(int irq)
+{
+ int ioapic;
+ int ioapic_pin;
+ struct acpi_table_madt* madt;
+ struct acpi_table_int_src_ovr *entry = NULL;
+ acpi_interrupt_flags flags;
+ void *madt_end;
+ acpi_status status;
+
+ /*
+ * Ensure that if there is an interrupt source override entry
+ * for the ACPI SCI, we leave it as is. Unfortunately this involves
+ * walking the MADT again.
+ */
+ status = acpi_get_firmware_table("APIC", 1, ACPI_LOGICAL_ADDRESSING,
+ (struct acpi_table_header **) &madt);
+ if (ACPI_SUCCESS(status)) {
+ madt_end = (void *) (unsigned long)madt + madt->header.length;
+
+ entry = (struct acpi_table_int_src_ovr *)
+ ((unsigned long) madt + sizeof(struct acpi_table_madt));
+
+ while ((void *) entry < madt_end) {
+ if (entry->header.type == ACPI_MADT_INT_SRC_OVR &&
+ acpi_fadt.sci_int == entry->bus_irq)
+ goto found;
+
+ entry = (struct acpi_table_int_src_ovr *)
+ ((unsigned long) entry + entry->header.length);
+ }
+ }
+ /*
+ * Although the ACPI spec says that the SCI should be level/low
+ * don't reprogram it unless there is an explicit MADT OVR entry
+ * instructing us to do so -- otherwise we break Tyan boards which
+ * have the SCI wired edge/high but no MADT OVR.
+ */
+ return;
+
+found:
+ /*
+ * See the note at the end of ACPI 2.0b section
+ * 5.2.10.8 for what this is about.
+ */
+ flags = entry->flags;
+ acpi_fadt.sci_int = entry->global_irq;
+ irq = entry->global_irq;
+
+ ioapic = mp_find_ioapic(irq);
+
+ ioapic_pin = irq - mp_ioapic_routing[ioapic].irq_start;
+
+ /*
+ * MPS INTI flags:
+ * trigger: 0=default, 1=edge, 3=level
+ * polarity: 0=default, 1=high, 3=low
+ * Per ACPI spec, default for SCI means level/low.
+ */
+ io_apic_set_pci_routing(ioapic, ioapic_pin, irq,
+ (flags.trigger == 1 ? 0 : 1), (flags.polarity == 1 ? 0 : 1));
+}
+
+
+#ifdef CONFIG_ACPI_PCI
+
+void __init mp_parse_prt (void)
+{
+ struct list_head *node = NULL;
+ struct acpi_prt_entry *entry = NULL;
+ int ioapic = -1;
+ int ioapic_pin = 0;
+ int irq = 0;
+ int idx, bit = 0;
+ int edge_level = 0;
+ int active_high_low = 0;
+
+ /*
+ * Parsing through the PCI Interrupt Routing Table (PRT) and program
+ * routing for all entries.
+ */
+ list_for_each(node, &acpi_prt.entries) {
+ entry = list_entry(node, struct acpi_prt_entry, node);
+
+ /* Need to get irq for dynamic entry */
+ if (entry->link.handle) {
+ irq = acpi_pci_link_get_irq(entry->link.handle, entry->link.index, &edge_level, &active_high_low);
+ if (!irq)
+ continue;
+ }
+ else {
+ /* Hardwired IRQ. Assume PCI standard settings */
+ irq = entry->link.index;
+ edge_level = 1;
+ active_high_low = 1;
+ }
+
+ /* Don't set up the ACPI SCI because it's already set up */
+ if (acpi_fadt.sci_int == irq) {
+ entry->irq = irq; /*we still need to set entry's irq*/
+ continue;
+ }
+
+ ioapic = mp_find_ioapic(irq);
+ if (ioapic < 0)
+ continue;
+ ioapic_pin = irq - mp_ioapic_routing[ioapic].irq_start;
+
+ /*
+ * Avoid pin reprogramming. PRTs typically include entries
+ * with redundant pin->irq mappings (but unique PCI devices);
+ * we only only program the IOAPIC on the first.
+ */
+ bit = ioapic_pin % 32;
+ idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
+ if (idx > 3) {
+ printk(KERN_ERR "Invalid reference to IOAPIC pin "
+ "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
+ ioapic_pin);
+ continue;
+ }
+ if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
+ printk(KERN_DEBUG "Pin %d-%d already programmed\n",
+ mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
+ entry->irq = irq;
+ continue;
+ }
+
+ mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
+
+ if (!io_apic_set_pci_routing(ioapic, ioapic_pin, irq, edge_level, active_high_low))
+ entry->irq = irq;
+
+ printk(KERN_DEBUG "%02x:%02x:%02x[%c] -> %d-%d -> IRQ %d\n",
+ entry->id.segment, entry->id.bus,
+ entry->id.device, ('A' + entry->pin),
+ mp_ioapic_routing[ioapic].apic_id, ioapic_pin,
+ entry->irq);
+ }
+
+ print_IO_APIC();
+
+ return;
+}
+
+#endif /*CONFIG_ACPI_PCI*/
+
+#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/
+
+#endif /*CONFIG_ACPI*/
--- /dev/null
+/*
+ * linux/arch/i386/nmi.c
+ *
+ * NMI watchdog support on APIC systems
+ *
+ * Started by Ingo Molnar <mingo@redhat.com>
+ *
+ * Fixes:
+ * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
+ * Mikael Pettersson : Power Management for local APIC NMI watchdog.
+ * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
+ * Keir Fraser : Pentium 4 Hyperthreading support
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/mm.h>
+#include <xen/irq.h>
+#include <xen/delay.h>
+#include <xen/interrupt.h>
+#include <xen/time.h>
+#include <xen/timex.h>
+#include <xen/sched.h>
+
+#include <asm/mc146818rtc.h>
+#include <asm/smp.h>
+#include <asm/msr.h>
+#include <asm/mpspec.h>
+
+unsigned int nmi_watchdog = NMI_NONE;
+unsigned int watchdog_on = 0;
+static unsigned int nmi_hz = HZ;
+unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
+extern void show_registers(struct pt_regs *regs);
+
+extern int logical_proc_id[];
+
+#define K7_EVNTSEL_ENABLE (1 << 22)
+#define K7_EVNTSEL_INT (1 << 20)
+#define K7_EVNTSEL_OS (1 << 17)
+#define K7_EVNTSEL_USR (1 << 16)
+#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
+#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
+
+#define P6_EVNTSEL0_ENABLE (1 << 22)
+#define P6_EVNTSEL_INT (1 << 20)
+#define P6_EVNTSEL_OS (1 << 17)
+#define P6_EVNTSEL_USR (1 << 16)
+#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
+#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
+
+#define MSR_P4_MISC_ENABLE 0x1A0
+#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
+#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
+#define MSR_P4_PERFCTR0 0x300
+#define MSR_P4_CCCR0 0x360
+#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
+#define P4_ESCR_OS0 (1<<3)
+#define P4_ESCR_USR0 (1<<2)
+#define P4_ESCR_OS1 (1<<1)
+#define P4_ESCR_USR1 (1<<0)
+#define P4_CCCR_OVF_PMI0 (1<<26)
+#define P4_CCCR_OVF_PMI1 (1<<27)
+#define P4_CCCR_THRESHOLD(N) ((N)<<20)
+#define P4_CCCR_COMPLEMENT (1<<19)
+#define P4_CCCR_COMPARE (1<<18)
+#define P4_CCCR_REQUIRED (3<<16)
+#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
+#define P4_CCCR_ENABLE (1<<12)
+/*
+ * Set up IQ_COUNTER{0,1} to behave like a clock, by having IQ_CCCR{0,1} filter
+ * CRU_ESCR0 (with any non-null event selector) through a complemented
+ * max threshold. [IA32-Vol3, Section 14.9.9]
+ */
+#define MSR_P4_IQ_COUNTER0 0x30C
+#define MSR_P4_IQ_COUNTER1 0x30D
+#define MSR_P4_IQ_CCCR0 0x36C
+#define MSR_P4_IQ_CCCR1 0x36D
+#define MSR_P4_CRU_ESCR0 0x3B8 /* ESCR no. 4 */
+#define P4_NMI_CRU_ESCR0 \
+ (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS0|P4_ESCR_USR0| \
+ P4_ESCR_OS1|P4_ESCR_USR1)
+#define P4_NMI_IQ_CCCR0 \
+ (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
+ P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
+#define P4_NMI_IQ_CCCR1 \
+ (P4_CCCR_OVF_PMI1|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
+ P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
+
+int __init check_nmi_watchdog (void)
+{
+ unsigned int prev_nmi_count[NR_CPUS];
+ int j, cpu;
+
+ if ( !nmi_watchdog )
+ return 0;
+
+ printk("Testing NMI watchdog --- ");
+
+ for ( j = 0; j < smp_num_cpus; j++ )
+ {
+ cpu = cpu_logical_map(j);
+ prev_nmi_count[cpu] = irq_stat[cpu].__nmi_count;
+ }
+ sti();
+ mdelay((10*1000)/nmi_hz); /* wait 10 ticks */
+
+ for ( j = 0; j < smp_num_cpus; j++ )
+ {
+ cpu = cpu_logical_map(j);
+ if ( nmi_count(cpu) - prev_nmi_count[cpu] <= 5 )
+ printk("CPU#%d stuck. ", cpu);
+ else
+ printk("CPU#%d okay. ", cpu);
+ }
+
+ printk("\n");
+
+ /* now that we know it works we can reduce NMI frequency to
+ something more reasonable; makes a difference in some configs */
+ if ( nmi_watchdog == NMI_LOCAL_APIC )
+ nmi_hz = 1;
+
+ return 0;
+}
+
+static inline void nmi_pm_init(void) { }
+#define __pminit __init
+
+/*
+ * Activate the NMI watchdog via the local APIC.
+ * Original code written by Keith Owens.
+ */
+
+static void __pminit clear_msr_range(unsigned int base, unsigned int n)
+{
+ unsigned int i;
+ for ( i = 0; i < n; i++ )
+ wrmsr(base+i, 0, 0);
+}
+
+static void __pminit setup_k7_watchdog(void)
+{
+ unsigned int evntsel;
+
+ nmi_perfctr_msr = MSR_K7_PERFCTR0;
+
+ clear_msr_range(MSR_K7_EVNTSEL0, 4);
+ clear_msr_range(MSR_K7_PERFCTR0, 4);
+
+ evntsel = K7_EVNTSEL_INT
+ | K7_EVNTSEL_OS
+ | K7_EVNTSEL_USR
+ | K7_NMI_EVENT;
+
+ wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
+ Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
+ wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ evntsel |= K7_EVNTSEL_ENABLE;
+ wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
+}
+
+static void __pminit setup_p6_watchdog(void)
+{
+ unsigned int evntsel;
+
+ nmi_perfctr_msr = MSR_P6_PERFCTR0;
+
+ clear_msr_range(MSR_P6_EVNTSEL0, 2);
+ clear_msr_range(MSR_P6_PERFCTR0, 2);
+
+ evntsel = P6_EVNTSEL_INT
+ | P6_EVNTSEL_OS
+ | P6_EVNTSEL_USR
+ | P6_NMI_EVENT;
+
+ wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
+ Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
+ wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ evntsel |= P6_EVNTSEL0_ENABLE;
+ wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
+}
+
+static int __pminit setup_p4_watchdog(void)
+{
+ unsigned int misc_enable, dummy;
+
+ rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
+ if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
+ return 0;
+
+ nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
+
+ if ( logical_proc_id[smp_processor_id()] == 0 )
+ {
+ if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
+ clear_msr_range(0x3F1, 2);
+ /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
+ docs doesn't fully define it, so leave it alone for now. */
+ clear_msr_range(0x3A0, 31);
+ clear_msr_range(0x3C0, 6);
+ clear_msr_range(0x3C8, 6);
+ clear_msr_range(0x3E0, 2);
+ clear_msr_range(MSR_P4_CCCR0, 18);
+ clear_msr_range(MSR_P4_PERFCTR0, 18);
+
+ wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
+ wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
+ Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
+ wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
+ }
+ else if ( logical_proc_id[smp_processor_id()] == 1 )
+ {
+ wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1 & ~P4_CCCR_ENABLE, 0);
+ Dprintk("setting P4_IQ_COUNTER2 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
+ wrmsr(MSR_P4_IQ_COUNTER1, -(cpu_khz/nmi_hz*1000), -1);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1, 0);
+ }
+ else
+ {
+ return 0;
+ }
+
+ return 1;
+}
+
+void __pminit setup_apic_nmi_watchdog(void)
+{
+ if (!nmi_watchdog)
+ return;
+
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
+ return;
+ setup_k7_watchdog();
+ break;
+ case X86_VENDOR_INTEL:
+ switch (boot_cpu_data.x86) {
+ case 6:
+ setup_p6_watchdog();
+ break;
+ case 15:
+ if (!setup_p4_watchdog())
+ return;
+ break;
+ default:
+ return;
+ }
+ break;
+ default:
+ return;
+ }
+ nmi_pm_init();
+}
+
+
+static unsigned int
+last_irq_sums [NR_CPUS],
+ alert_counter [NR_CPUS];
+
+void touch_nmi_watchdog (void)
+{
+ int i;
+ for (i = 0; i < smp_num_cpus; i++)
+ alert_counter[i] = 0;
+}
+
+void nmi_watchdog_tick (struct pt_regs * regs)
+{
+ extern spinlock_t console_lock;
+ extern void die(const char * str, struct pt_regs * regs, long err);
+
+ int sum, cpu = smp_processor_id();
+
+ sum = apic_timer_irqs[cpu];
+
+ if ( (last_irq_sums[cpu] == sum) && watchdog_on )
+ {
+ /*
+ * Ayiee, looks like this CPU is stuck ... wait a few IRQs (5 seconds)
+ * before doing the oops ...
+ */
+ alert_counter[cpu]++;
+ if (alert_counter[cpu] == 5*nmi_hz) {
+ console_lock = SPIN_LOCK_UNLOCKED;
+ die("NMI Watchdog detected LOCKUP on CPU", regs, cpu);
+ }
+ }
+ else
+ {
+ last_irq_sums[cpu] = sum;
+ alert_counter[cpu] = 0;
+ }
+
+ if ( nmi_perfctr_msr )
+ {
+ if ( nmi_perfctr_msr == MSR_P4_IQ_COUNTER0 )
+ {
+ if ( logical_proc_id[cpu] == 0 )
+ {
+ wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
+ }
+ else
+ {
+ wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1, 0);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ wrmsr(MSR_P4_IQ_COUNTER1, -(cpu_khz/nmi_hz*1000), -1);
+ }
+ }
+ else
+ {
+ wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
+ }
+ }
+}
--- /dev/null
+/*
+ * Low-Level PCI Support for PC -- Routing of Interrupts
+ *
+ * (c) 1999--2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/kernel.h>
+#include <xen/pci.h>
+#include <xen/init.h>
+#include <xen/slab.h>
+#include <xen/interrupt.h>
+#include <xen/irq.h>
+
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/io_apic.h>
+
+#include "pci-x86.h"
+
+#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
+#define PIRQ_VERSION 0x0100
+
+int broken_hp_bios_irq9;
+
+static struct irq_routing_table *pirq_table;
+
+/*
+ * Never use: 0, 1, 2 (timer, keyboard, and cascade)
+ * Avoid using: 13, 14 and 15 (FP error and IDE).
+ * Penalize: 3, 4, 6, 7, 12 (known ISA uses: serial, floppy, parallel and mouse)
+ */
+unsigned int pcibios_irq_mask = 0xfff8;
+
+static int pirq_penalty[16] = {
+ 1000000, 1000000, 1000000, 1000, 1000, 0, 1000, 1000,
+ 0, 0, 0, 0, 1000, 100000, 100000, 100000
+};
+
+struct irq_router {
+ char *name;
+ u16 vendor, device;
+ int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq);
+ int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new);
+};
+
+struct irq_router_handler {
+ u16 vendor;
+ int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device);
+};
+
+/*
+ * Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table.
+ */
+
+static struct irq_routing_table * __init pirq_find_routing_table(void)
+{
+ u8 *addr;
+ struct irq_routing_table *rt;
+ int i;
+ u8 sum;
+
+ for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) {
+ rt = (struct irq_routing_table *) addr;
+ if (rt->signature != PIRQ_SIGNATURE ||
+ rt->version != PIRQ_VERSION ||
+ rt->size % 16 ||
+ rt->size < sizeof(struct irq_routing_table))
+ continue;
+ sum = 0;
+ for(i=0; i<rt->size; i++)
+ sum += addr[i];
+ if (!sum) {
+ DBG("PCI: Interrupt Routing Table found at 0x%p\n", rt);
+ return rt;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * If we have a IRQ routing table, use it to search for peer host
+ * bridges. It's a gross hack, but since there are no other known
+ * ways how to get a list of buses, we have to go this way.
+ */
+
+static void __init pirq_peer_trick(void)
+{
+ struct irq_routing_table *rt = pirq_table;
+ u8 busmap[256];
+ int i;
+ struct irq_info *e;
+
+ memset(busmap, 0, sizeof(busmap));
+ for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) {
+ e = &rt->slots[i];
+#ifdef DEBUG
+ {
+ int j;
+ DBG("%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot);
+ for(j=0; j<4; j++)
+ DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap);
+ DBG("\n");
+ }
+#endif
+ busmap[e->bus] = 1;
+ }
+ for(i=1; i<256; i++)
+ /*
+ * It might be a secondary bus, but in this case its parent is already
+ * known (ascending bus order) and therefore pci_scan_bus returns immediately.
+ */
+ if (busmap[i] && pci_scan_bus(i, pci_root_bus->ops, NULL))
+ printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i);
+ pcibios_last_bus = -1;
+}
+
+/*
+ * Code for querying and setting of IRQ routes on various interrupt routers.
+ */
+
+void eisa_set_level_irq(unsigned int irq)
+{
+ unsigned char mask = 1 << (irq & 7);
+ unsigned int port = 0x4d0 + (irq >> 3);
+ unsigned char val = inb(port);
+
+ if (!(val & mask)) {
+ DBG(" -> edge");
+ outb(val | mask, port);
+ }
+}
+
+/*
+ * Common IRQ routing practice: nybbles in config space,
+ * offset by some magic constant.
+ */
+static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr)
+{
+ u8 x;
+ unsigned reg = offset + (nr >> 1);
+
+ pci_read_config_byte(router, reg, &x);
+ return (nr & 1) ? (x >> 4) : (x & 0xf);
+}
+
+static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val)
+{
+ u8 x;
+ unsigned reg = offset + (nr >> 1);
+
+ pci_read_config_byte(router, reg, &x);
+ x = (nr & 1) ? ((x & 0x0f) | (val << 4)) : ((x & 0xf0) | val);
+ pci_write_config_byte(router, reg, x);
+}
+
+/*
+ * ALI pirq entries are damn ugly, and completely undocumented.
+ * This has been figured out from pirq tables, and it's not a pretty
+ * picture.
+ */
+static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ static unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
+
+ return irqmap[read_config_nybble(router, 0x48, pirq-1)];
+}
+
+static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ static unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
+ unsigned int val = irqmap[irq];
+
+ if (val) {
+ write_config_nybble(router, 0x48, pirq-1, val);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * The Intel PIIX4 pirq rules are fairly simple: "pirq" is
+ * just a pointer to the config space.
+ */
+static int pirq_piix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ u8 x;
+
+ pci_read_config_byte(router, pirq, &x);
+ return (x < 16) ? x : 0;
+}
+
+static int pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ pci_write_config_byte(router, pirq, irq);
+ return 1;
+}
+
+/*
+ * The VIA pirq rules are nibble-based, like ALI,
+ * but without the ugly irq number munging.
+ * However, PIRQD is in the upper instead of lower nibble.
+ */
+static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ return read_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq);
+}
+
+static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ write_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq, irq);
+ return 1;
+}
+
+/*
+ * ITE 8330G pirq rules are nibble-based
+ * FIXME: pirqmap may be { 1, 0, 3, 2 },
+ * 2+3 are both mapped to irq 9 on my system
+ */
+static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ static unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+ return read_config_nybble(router,0x43, pirqmap[pirq-1]);
+}
+
+static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ static unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+ write_config_nybble(router, 0x43, pirqmap[pirq-1], irq);
+ return 1;
+}
+
+/*
+ * OPTI: high four bits are nibble pointer..
+ * I wonder what the low bits do?
+ */
+static int pirq_opti_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ return read_config_nybble(router, 0xb8, pirq >> 4);
+}
+
+static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ write_config_nybble(router, 0xb8, pirq >> 4, irq);
+ return 1;
+}
+
+/*
+ * Cyrix: nibble offset 0x5C
+ */
+static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ return read_config_nybble(router, 0x5C, (pirq-1)^1);
+}
+
+static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ write_config_nybble(router, 0x5C, (pirq-1)^1, irq);
+ return 1;
+}
+
+/*
+ * PIRQ routing for SiS 85C503 router used in several SiS chipsets.
+ * We have to deal with the following issues here:
+ * - vendors have different ideas about the meaning of link values
+ * - some onboard devices (integrated in the chipset) have special
+ * links and are thus routed differently (i.e. not via PCI INTA-INTD)
+ * - different revision of the router have a different layout for
+ * the routing registers, particularly for the onchip devices
+ *
+ * For all routing registers the common thing is we have one byte
+ * per routeable link which is defined as:
+ * bit 7 IRQ mapping enabled (0) or disabled (1)
+ * bits [6:4] reserved (sometimes used for onchip devices)
+ * bits [3:0] IRQ to map to
+ * allowed: 3-7, 9-12, 14-15
+ * reserved: 0, 1, 2, 8, 13
+ *
+ * The config-space registers located at 0x41/0x42/0x43/0x44 are
+ * always used to route the normal PCI INT A/B/C/D respectively.
+ * Apparently there are systems implementing PCI routing table using
+ * link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D.
+ * We try our best to handle both link mappings.
+ *
+ * Currently (2003-05-21) it appears most SiS chipsets follow the
+ * definition of routing registers from the SiS-5595 southbridge.
+ * According to the SiS 5595 datasheets the revision id's of the
+ * router (ISA-bridge) should be 0x01 or 0xb0.
+ *
+ * Furthermore we've also seen lspci dumps with revision 0x00 and 0xb1.
+ * Looks like these are used in a number of SiS 5xx/6xx/7xx chipsets.
+ * They seem to work with the current routing code. However there is
+ * some concern because of the two USB-OHCI HCs (original SiS 5595
+ * had only one). YMMV.
+ *
+ * Onchip routing for router rev-id 0x01/0xb0 and probably 0x00/0xb1:
+ *
+ * 0x61: IDEIRQ:
+ * bits [6:5] must be written 01
+ * bit 4 channel-select primary (0), secondary (1)
+ *
+ * 0x62: USBIRQ:
+ * bit 6 OHCI function disabled (0), enabled (1)
+ *
+ * 0x6a: ACPI/SCI IRQ: bits 4-6 reserved
+ *
+ * 0x7e: Data Acq. Module IRQ - bits 4-6 reserved
+ *
+ * We support USBIRQ (in addition to INTA-INTD) and keep the
+ * IDE, ACPI and DAQ routing untouched as set by the BIOS.
+ *
+ * Currently the only reported exception is the new SiS 65x chipset
+ * which includes the SiS 69x southbridge. Here we have the 85C503
+ * router revision 0x04 and there are changes in the register layout
+ * mostly related to the different USB HCs with USB 2.0 support.
+ *
+ * Onchip routing for router rev-id 0x04 (try-and-error observation)
+ *
+ * 0x60/0x61/0x62/0x63: 1xEHCI and 3xOHCI (companion) USB-HCs
+ * bit 6-4 are probably unused, not like 5595
+ */
+
+#define PIRQ_SIS_IRQ_MASK 0x0f
+#define PIRQ_SIS_IRQ_DISABLE 0x80
+#define PIRQ_SIS_USB_ENABLE 0x40
+#define PIRQ_SIS_DETECT_REGISTER 0x40
+
+/* return value:
+ * -1 on error
+ * 0 for PCI INTA-INTD
+ * 0 or enable bit mask to check or set for onchip functions
+ */
+static inline int pirq_sis5595_onchip(int pirq, int *reg)
+{
+ int ret = -1;
+
+ *reg = pirq;
+ switch(pirq) {
+ case 0x01:
+ case 0x02:
+ case 0x03:
+ case 0x04:
+ *reg += 0x40;
+ case 0x41:
+ case 0x42:
+ case 0x43:
+ case 0x44:
+ ret = 0;
+ break;
+
+ case 0x62:
+ ret = PIRQ_SIS_USB_ENABLE; /* documented for 5595 */
+ break;
+
+ case 0x61:
+ case 0x6a:
+ case 0x7e:
+ printk(KERN_INFO "SiS pirq: IDE/ACPI/DAQ mapping not implemented: (%u)\n",
+ (unsigned) pirq);
+ /* fall thru */
+ default:
+ printk(KERN_INFO "SiS router unknown request: (%u)\n",
+ (unsigned) pirq);
+ break;
+ }
+ return ret;
+}
+
+/* return value:
+ * -1 on error
+ * 0 for PCI INTA-INTD
+ * 0 or enable bit mask to check or set for onchip functions
+ */
+static inline int pirq_sis96x_onchip(int pirq, int *reg)
+{
+ int ret = -1;
+
+ *reg = pirq;
+ switch(pirq) {
+ case 0x01:
+ case 0x02:
+ case 0x03:
+ case 0x04:
+ *reg += 0x40;
+ case 0x41:
+ case 0x42:
+ case 0x43:
+ case 0x44:
+ case 0x60:
+ case 0x61:
+ case 0x62:
+ case 0x63:
+ ret = 0;
+ break;
+
+ default:
+ printk(KERN_INFO "SiS router unknown request: (%u)\n",
+ (unsigned) pirq);
+ break;
+ }
+ return ret;
+}
+
+
+static int pirq_sis5595_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ u8 x;
+ int reg, check;
+
+ check = pirq_sis5595_onchip(pirq, ®);
+ if (check < 0)
+ return 0;
+
+ pci_read_config_byte(router, reg, &x);
+ if (check != 0 && !(x & check))
+ return 0;
+
+ return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK);
+}
+
+static int pirq_sis96x_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ u8 x;
+ int reg, check;
+
+ check = pirq_sis96x_onchip(pirq, ®);
+ if (check < 0)
+ return 0;
+
+ pci_read_config_byte(router, reg, &x);
+ if (check != 0 && !(x & check))
+ return 0;
+
+ return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK);
+}
+
+static int pirq_sis5595_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ u8 x;
+ int reg, set;
+
+ set = pirq_sis5595_onchip(pirq, ®);
+ if (set < 0)
+ return 0;
+
+ x = (irq & PIRQ_SIS_IRQ_MASK);
+ if (x == 0)
+ x = PIRQ_SIS_IRQ_DISABLE;
+ else
+ x |= set;
+
+ pci_write_config_byte(router, reg, x);
+
+ return 1;
+}
+
+static int pirq_sis96x_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ u8 x;
+ int reg, set;
+
+ set = pirq_sis96x_onchip(pirq, ®);
+ if (set < 0)
+ return 0;
+
+ x = (irq & PIRQ_SIS_IRQ_MASK);
+ if (x == 0)
+ x = PIRQ_SIS_IRQ_DISABLE;
+ else
+ x |= set;
+
+ pci_write_config_byte(router, reg, x);
+
+ return 1;
+}
+
+
+/*
+ * VLSI: nibble offset 0x74 - educated guess due to routing table and
+ * config space of VLSI 82C534 PCI-bridge/router (1004:0102)
+ * Tested on HP OmniBook 800 covering PIRQ 1, 2, 4, 8 for onboard
+ * devices, PIRQ 3 for non-pci(!) soundchip and (untested) PIRQ 6
+ * for the busbridge to the docking station.
+ */
+
+static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ if (pirq > 8) {
+ printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
+ return 0;
+ }
+ return read_config_nybble(router, 0x74, pirq-1);
+}
+
+static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ if (pirq > 8) {
+ printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
+ return 0;
+ }
+ write_config_nybble(router, 0x74, pirq-1, irq);
+ return 1;
+}
+
+/*
+ * ServerWorks: PCI interrupts mapped to system IRQ lines through Index
+ * and Redirect I/O registers (0x0c00 and 0x0c01). The Index register
+ * format is (PCIIRQ## | 0x10), e.g.: PCIIRQ10=0x1a. The Redirect
+ * register is a straight binary coding of desired PIC IRQ (low nibble).
+ *
+ * The 'link' value in the PIRQ table is already in the correct format
+ * for the Index register. There are some special index values:
+ * 0x00 for ACPI (SCI), 0x01 for USB, 0x02 for IDE0, 0x04 for IDE1,
+ * and 0x03 for SMBus.
+ */
+static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ outb_p(pirq, 0xc00);
+ return inb(0xc01) & 0xf;
+}
+
+static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ outb_p(pirq, 0xc00);
+ outb_p(irq, 0xc01);
+ return 1;
+}
+
+/* Support for AMD756 PCI IRQ Routing
+ * Jhon H. Caicedo <jhcaiced@osso.org.co>
+ * Jun/21/2001 0.2.0 Release, fixed to use "nybble" functions... (jhcaiced)
+ * Jun/19/2001 Alpha Release 0.1.0 (jhcaiced)
+ * The AMD756 pirq rules are nibble-based
+ * offset 0x56 0-3 PIRQA 4-7 PIRQB
+ * offset 0x57 0-3 PIRQC 4-7 PIRQD
+ */
+static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ u8 irq;
+ irq = 0;
+ if (pirq <= 4)
+ {
+ irq = read_config_nybble(router, 0x56, pirq - 1);
+ }
+ printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n",
+ dev->vendor, dev->device, pirq, irq);
+ return irq;
+}
+
+static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n",
+ dev->vendor, dev->device, pirq, irq);
+ if (pirq <= 4)
+ {
+ write_config_nybble(router, 0x56, pirq - 1, irq);
+ }
+ return 1;
+}
+
+#ifdef CONFIG_PCI_BIOS
+
+static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ struct pci_dev *bridge;
+ int pin = pci_get_interrupt_pin(dev, &bridge);
+ return pcibios_set_irq_routing(bridge, pin, irq);
+}
+
+#endif
+
+
+static __init int intel_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ /* We must not touch 440GX even if we have tables. 440GX has
+ different IRQ routing weirdness */
+ if(pci_find_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0, NULL) ||
+ pci_find_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2, NULL))
+ return 0;
+ switch(device)
+ {
+ case PCI_DEVICE_ID_INTEL_82371FB_0:
+ case PCI_DEVICE_ID_INTEL_82371SB_0:
+ case PCI_DEVICE_ID_INTEL_82371AB_0:
+ case PCI_DEVICE_ID_INTEL_82371MX:
+ case PCI_DEVICE_ID_INTEL_82443MX_0:
+ case PCI_DEVICE_ID_INTEL_82801AA_0:
+ case PCI_DEVICE_ID_INTEL_82801AB_0:
+ case PCI_DEVICE_ID_INTEL_82801BA_0:
+ case PCI_DEVICE_ID_INTEL_82801BA_10:
+ case PCI_DEVICE_ID_INTEL_82801CA_0:
+ case PCI_DEVICE_ID_INTEL_82801CA_12:
+ case PCI_DEVICE_ID_INTEL_82801DB_0:
+ case PCI_DEVICE_ID_INTEL_82801E_0:
+ case PCI_DEVICE_ID_INTEL_82801EB_0:
+ case PCI_DEVICE_ID_INTEL_ESB_0:
+ r->name = "PIIX/ICH";
+ r->get = pirq_piix_get;
+ r->set = pirq_piix_set;
+ return 1;
+ }
+ return 0;
+}
+
+static __init int via_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ /* FIXME: We should move some of the quirk fixup stuff here */
+ switch(device)
+ {
+ case PCI_DEVICE_ID_VIA_82C586_0:
+ case PCI_DEVICE_ID_VIA_82C596:
+ case PCI_DEVICE_ID_VIA_82C686:
+ case PCI_DEVICE_ID_VIA_8231:
+ /* FIXME: add new ones for 8233/5 */
+ r->name = "VIA";
+ r->get = pirq_via_get;
+ r->set = pirq_via_set;
+ return 1;
+ }
+ return 0;
+}
+
+static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ switch(device)
+ {
+ case PCI_DEVICE_ID_VLSI_82C534:
+ r->name = "VLSI 82C534";
+ r->get = pirq_vlsi_get;
+ r->set = pirq_vlsi_set;
+ return 1;
+ }
+ return 0;
+}
+
+
+static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ switch(device)
+ {
+ case PCI_DEVICE_ID_SERVERWORKS_OSB4:
+ case PCI_DEVICE_ID_SERVERWORKS_CSB5:
+ r->name = "ServerWorks";
+ r->get = pirq_serverworks_get;
+ r->set = pirq_serverworks_set;
+ return 1;
+ }
+ return 0;
+}
+
+static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ u8 reg;
+ u16 devid;
+
+ if (device != PCI_DEVICE_ID_SI_503)
+ return 0;
+
+ /*
+ * In case of SiS south bridge, we need to detect the two
+ * kinds of routing tables we have seen so far (5595 and 96x).
+ * Since the maintain the same device ID, we need to do poke
+ * the PCI configuration space to find the router type we are
+ * dealing with.
+ */
+
+ /*
+ * Factoid: writing bit6 of register 0x40 of the router config space
+ * will make the SB to show up 0x096x inside the device id. Note,
+ * we need to restore register 0x40 after the device id poke.
+ */
+
+ pci_read_config_byte(router, PIRQ_SIS_DETECT_REGISTER, ®);
+ pci_write_config_byte(router, PIRQ_SIS_DETECT_REGISTER, reg | (1 << 6));
+ pci_read_config_word(router, PCI_DEVICE_ID, &devid);
+ pci_write_config_byte(router, PIRQ_SIS_DETECT_REGISTER, reg);
+
+ if ((devid & 0xfff0) == 0x0960) {
+ r->name = "SIS96x";
+ r->get = pirq_sis96x_get;
+ r->set = pirq_sis96x_set;
+ DBG("PCI: Detecting SiS router at %02x:%02x : SiS096x detected\n",
+ rt->rtr_bus, rt->rtr_devfn);
+ } else {
+ r->name = "SIS5595";
+ r->get = pirq_sis5595_get;
+ r->set = pirq_sis5595_set;
+ DBG("PCI: Detecting SiS router at %02x:%02x : SiS5595 detected\n",
+ rt->rtr_bus, rt->rtr_devfn);
+ }
+ return 1;
+}
+
+static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ switch(device)
+ {
+ case PCI_DEVICE_ID_CYRIX_5520:
+ r->name = "NatSemi";
+ r->get = pirq_cyrix_get;
+ r->set = pirq_cyrix_set;
+ return 1;
+ }
+ return 0;
+}
+
+static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ switch(device)
+ {
+ case PCI_DEVICE_ID_OPTI_82C700:
+ r->name = "OPTI";
+ r->get = pirq_opti_get;
+ r->set = pirq_opti_set;
+ return 1;
+ }
+ return 0;
+}
+
+static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ switch(device)
+ {
+ case PCI_DEVICE_ID_ITE_IT8330G_0:
+ r->name = "ITE";
+ r->get = pirq_ite_get;
+ r->set = pirq_ite_set;
+ return 1;
+ }
+ return 0;
+}
+
+static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ switch(device)
+ {
+ case PCI_DEVICE_ID_AL_M1533:
+ r->name = "ALI";
+ r->get = pirq_ali_get;
+ r->set = pirq_ali_set;
+ return 1;
+ /* Should add 156x some day */
+ }
+ return 0;
+}
+
+static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ switch(device)
+ {
+ case PCI_DEVICE_ID_AMD_VIPER_740B:
+ r->name = "AMD756";
+ break;
+ case PCI_DEVICE_ID_AMD_VIPER_7413:
+ r->name = "AMD766";
+ break;
+ case PCI_DEVICE_ID_AMD_VIPER_7443:
+ r->name = "AMD768";
+ break;
+ default:
+ return 0;
+ }
+ r->get = pirq_amd756_get;
+ r->set = pirq_amd756_set;
+ return 1;
+}
+
+static __initdata struct irq_router_handler pirq_routers[] = {
+ { PCI_VENDOR_ID_INTEL, intel_router_probe },
+ { PCI_VENDOR_ID_AL, ali_router_probe },
+ { PCI_VENDOR_ID_ITE, ite_router_probe },
+ { PCI_VENDOR_ID_VIA, via_router_probe },
+ { PCI_VENDOR_ID_OPTI, opti_router_probe },
+ { PCI_VENDOR_ID_SI, sis_router_probe },
+ { PCI_VENDOR_ID_CYRIX, cyrix_router_probe },
+ { PCI_VENDOR_ID_VLSI, vlsi_router_probe },
+ { PCI_VENDOR_ID_SERVERWORKS, serverworks_router_probe },
+ { PCI_VENDOR_ID_AMD, amd_router_probe },
+ /* Someone with docs needs to add the ATI Radeon IGP */
+ { 0, NULL }
+};
+static struct irq_router pirq_router;
+static struct pci_dev *pirq_router_dev;
+
+/*
+ * FIXME: should we have an option to say "generic for
+ * chipset" ?
+ */
+
+static void __init pirq_find_router(struct irq_router *r)
+{
+ struct irq_routing_table *rt = pirq_table;
+ struct irq_router_handler *h;
+
+#ifdef CONFIG_PCI_BIOS
+ if (!rt->signature) {
+ printk(KERN_INFO "PCI: Using BIOS for IRQ routing\n");
+ r->set = pirq_bios_set;
+ r->name = "BIOS";
+ return;
+ }
+#endif
+
+ /* Default unless a driver reloads it */
+ r->name = "default";
+ r->get = NULL;
+ r->set = NULL;
+
+ DBG("PCI: Attempting to find IRQ router for %04x:%04x\n",
+ rt->rtr_vendor, rt->rtr_device);
+
+ pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn);
+ if (!pirq_router_dev) {
+ DBG("PCI: Interrupt router not found at %02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
+ return;
+ }
+
+ for( h = pirq_routers; h->vendor; h++) {
+ /* First look for a router match */
+ if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device))
+ break;
+ /* Fall back to a device match */
+ if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device))
+ break;
+ }
+ printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n",
+ pirq_router.name,
+ pirq_router_dev->vendor,
+ pirq_router_dev->device,
+ pirq_router_dev->slot_name);
+}
+
+static struct irq_info *pirq_get_info(struct pci_dev *dev)
+{
+ struct irq_routing_table *rt = pirq_table;
+ int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info);
+ struct irq_info *info;
+
+ for (info = rt->slots; entries--; info++)
+ if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
+ return info;
+ return NULL;
+}
+
+static void pcibios_test_irq_handler(int irq, void *dev_id, struct pt_regs *regs)
+{
+}
+
+static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
+{
+ u8 pin;
+ struct irq_info *info;
+ int i, pirq, newirq;
+ int irq = 0;
+ u32 mask;
+ struct irq_router *r = &pirq_router;
+ struct pci_dev *dev2;
+ char *msg = NULL;
+
+ if (!pirq_table)
+ return 0;
+
+ /* Find IRQ routing entry */
+ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+ if (!pin) {
+ DBG(" -> no interrupt pin\n");
+ return 0;
+ }
+ pin = pin - 1;
+
+ DBG("IRQ for %s:%d", dev->slot_name, pin);
+ info = pirq_get_info(dev);
+ if (!info) {
+ DBG(" -> not found in routing table\n");
+ return 0;
+ }
+ pirq = info->irq[pin].link;
+ mask = info->irq[pin].bitmap;
+ if (!pirq) {
+ DBG(" -> not routed\n");
+ return 0;
+ }
+ DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs);
+ mask &= pcibios_irq_mask;
+
+ /* Work around broken HP Pavilion Notebooks which assign USB to
+ IRQ 9 even though it is actually wired to IRQ 11 */
+
+ if (broken_hp_bios_irq9 && pirq == 0x59 && dev->irq == 9) {
+ dev->irq = 11;
+ pci_write_config_byte(dev, PCI_INTERRUPT_LINE, 11);
+ r->set(pirq_router_dev, dev, pirq, 11);
+ }
+
+ /*
+ * Find the best IRQ to assign: use the one
+ * reported by the device if possible.
+ */
+ newirq = dev->irq;
+ if (!newirq && assign) {
+ for (i = 0; i < 16; i++) {
+ if (!(mask & (1 << i)))
+ continue;
+ if (pirq_penalty[i] < pirq_penalty[newirq] &&
+ !request_irq(i, pcibios_test_irq_handler, SA_SHIRQ, "pci-test", dev)) {
+ free_irq(i, dev);
+ newirq = i;
+ }
+ }
+ }
+ DBG(" -> newirq=%d", newirq);
+
+ /* Check if it is hardcoded */
+ if ((pirq & 0xf0) == 0xf0) {
+ irq = pirq & 0xf;
+ DBG(" -> hardcoded IRQ %d\n", irq);
+ msg = "Hardcoded";
+ } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq))) {
+ DBG(" -> got IRQ %d\n", irq);
+ msg = "Found";
+ } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
+ DBG(" -> assigning IRQ %d", newirq);
+ if (r->set(pirq_router_dev, dev, pirq, newirq)) {
+ eisa_set_level_irq(newirq);
+ DBG(" ... OK\n");
+ msg = "Assigned";
+ irq = newirq;
+ }
+ }
+
+ if (!irq) {
+ DBG(" ... failed\n");
+ if (newirq && mask == (1 << newirq)) {
+ msg = "Guessed";
+ irq = newirq;
+ } else
+ return 0;
+ }
+ printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, dev->slot_name);
+
+ /* Update IRQ for all devices with the same pirq value */
+ pci_for_each_dev(dev2) {
+ pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin);
+ if (!pin)
+ continue;
+ pin--;
+ info = pirq_get_info(dev2);
+ if (!info)
+ continue;
+ if (info->irq[pin].link == pirq) {
+ /* We refuse to override the dev->irq information. Give a warning! */
+ if (dev2->irq && dev2->irq != irq) {
+ printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n",
+ dev2->slot_name, dev2->irq, irq);
+ continue;
+ }
+ dev2->irq = irq;
+ pirq_penalty[irq]++;
+ if (dev != dev2)
+ printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, dev2->slot_name);
+ }
+ }
+ return 1;
+}
+
+void __init pcibios_irq_init(void)
+{
+ DBG("PCI: IRQ init\n");
+ pirq_table = pirq_find_routing_table();
+#ifdef CONFIG_PCI_BIOS
+ if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN))
+ pirq_table = pcibios_get_irq_routing_table();
+#endif
+ if (pirq_table) {
+ pirq_peer_trick();
+ pirq_find_router(&pirq_router);
+ if (pirq_table->exclusive_irqs) {
+ int i;
+ for (i=0; i<16; i++)
+ if (!(pirq_table->exclusive_irqs & (1 << i)))
+ pirq_penalty[i] += 100;
+ }
+ /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */
+ if (io_apic_assign_pci_irqs)
+ pirq_table = NULL;
+ }
+}
+
+void __init pcibios_fixup_irqs(void)
+{
+ struct pci_dev *dev;
+ u8 pin;
+
+ DBG("PCI: IRQ fixup\n");
+ pci_for_each_dev(dev) {
+ /*
+ * If the BIOS has set an out of range IRQ number, just ignore it.
+ * Also keep track of which IRQ's are already in use.
+ */
+ if (dev->irq >= 16) {
+ DBG("%s: ignoring bogus IRQ %d\n", dev->slot_name, dev->irq);
+ dev->irq = 0;
+ }
+ /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */
+ if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000)
+ pirq_penalty[dev->irq] = 0;
+ pirq_penalty[dev->irq]++;
+ }
+
+ pci_for_each_dev(dev) {
+ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+#ifdef CONFIG_X86_IO_APIC
+ /*
+ * Recalculate IRQ numbers if we use the I/O APIC.
+ */
+ if (io_apic_assign_pci_irqs)
+ {
+ int irq;
+
+ if (pin) {
+ pin--; /* interrupt pins are numbered starting from 1 */
+ irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
+ /*
+ * Busses behind bridges are typically not listed in the MP-table.
+ * In this case we have to look up the IRQ based on the parent bus,
+ * parent slot, and pin number. The SMP code detects such bridged
+ * busses itself so we should get into this branch reliably.
+ */
+ if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
+ struct pci_dev * bridge = dev->bus->self;
+
+ pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+ irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
+ PCI_SLOT(bridge->devfn), pin);
+ if (irq >= 0)
+ printk(KERN_WARNING "PCI: using PPB(B%d,I%d,P%d) to get irq %d\n",
+ bridge->bus->number, PCI_SLOT(bridge->devfn), pin, irq);
+ }
+ if (irq >= 0) {
+ printk(KERN_INFO "PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %d\n",
+ dev->bus->number, PCI_SLOT(dev->devfn), pin, irq);
+ dev->irq = irq;
+ }
+ }
+ }
+#endif
+ /*
+ * Still no IRQ? Try to lookup one...
+ */
+ if (pin && !dev->irq)
+ pcibios_lookup_irq(dev, 0);
+ }
+}
+
+void pcibios_penalize_isa_irq(int irq)
+{
+ /*
+ * If any ISAPnP device reports an IRQ in its list of possible
+ * IRQ's, we try to avoid assigning it to PCI devices.
+ */
+ pirq_penalty[irq] += 100;
+}
+
+void pcibios_enable_irq(struct pci_dev *dev)
+{
+ u8 pin;
+ extern int interrupt_line_quirk;
+
+ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+ if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
+ char *msg;
+
+ /* With IDE legacy devices the IRQ lookup failure is not a problem.. */
+ if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5))
+ return;
+
+ if (io_apic_assign_pci_irqs)
+ msg = " Probably buggy MP table.";
+ else if (pci_probe & PCI_BIOS_IRQ_SCAN)
+ msg = "";
+ else
+ msg = " Please try using pci=biosirq.";
+ printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
+ 'A' + pin - 1, dev->slot_name, msg);
+ }
+ /* VIA bridges use interrupt line for apic/pci steering across
+ the V-Link */
+ else if (interrupt_line_quirk)
+ pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
+
+}
--- /dev/null
+/*
+ * Low-Level PCI Support for PC
+ *
+ * (c) 1999--2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/kernel.h>
+#include <xen/sched.h>
+#include <xen/pci.h>
+#include <xen/init.h>
+#include <xen/ioport.h>
+#include <xen/acpi.h>
+
+/*#include <asm/segment.h>*/
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/smpboot.h>
+
+#include "pci-x86.h"
+
+extern int numnodes;
+#define __KERNEL_CS __HYPERVISOR_CS
+#define __KERNEL_DS __HYPERVISOR_DS
+
+unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2;
+
+int pcibios_last_bus = -1;
+struct pci_bus *pci_root_bus = NULL;
+struct pci_ops *pci_root_ops = NULL;
+
+int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value) = NULL;
+int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value) = NULL;
+
+static int pci_using_acpi_prt = 0;
+
+#ifdef CONFIG_MULTIQUAD
+#define BUS2QUAD(global) (mp_bus_id_to_node[global])
+#define BUS2LOCAL(global) (mp_bus_id_to_local[global])
+#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
+#else
+#define BUS2QUAD(global) (0)
+#define BUS2LOCAL(global) (global)
+#define QUADLOCAL2BUS(quad,local) (local)
+#endif
+
+/*
+ * This interrupt-safe spinlock protects all accesses to PCI
+ * configuration space.
+ */
+static spinlock_t pci_config_lock = SPIN_LOCK_UNLOCKED;
+
+
+/*
+ * Functions for accessing PCI configuration space with type 1 accesses
+ */
+
+#ifdef CONFIG_PCI_DIRECT
+
+#ifdef CONFIG_MULTIQUAD
+#define PCI_CONF1_ADDRESS(bus, dev, fn, reg) \
+ (0x80000000 | (BUS2LOCAL(bus) << 16) | (dev << 11) | (fn << 8) | (reg & ~3))
+
+static int pci_conf1_mq_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) /* CONFIG_MULTIQUAD */
+{
+ unsigned long flags;
+
+ if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
+ return -EINVAL;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ outl_quad(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8, BUS2QUAD(bus));
+
+ switch (len) {
+ case 1:
+ *value = inb_quad(0xCFC + (reg & 3), BUS2QUAD(bus));
+ break;
+ case 2:
+ *value = inw_quad(0xCFC + (reg & 2), BUS2QUAD(bus));
+ break;
+ case 4:
+ *value = inl_quad(0xCFC, BUS2QUAD(bus));
+ break;
+ }
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return 0;
+}
+
+static int pci_conf1_mq_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) /* CONFIG_MULTIQUAD */
+{
+ unsigned long flags;
+
+ if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
+ return -EINVAL;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ outl_quad(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8, BUS2QUAD(bus));
+
+ switch (len) {
+ case 1:
+ outb_quad((u8)value, 0xCFC + (reg & 3), BUS2QUAD(bus));
+ break;
+ case 2:
+ outw_quad((u16)value, 0xCFC + (reg & 2), BUS2QUAD(bus));
+ break;
+ case 4:
+ outl_quad((u32)value, 0xCFC, BUS2QUAD(bus));
+ break;
+ }
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return 0;
+}
+
+static int pci_conf1_read_mq_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+ int result;
+ u32 data;
+
+ result = pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 1, &data);
+
+ *value = (u8)data;
+
+ return result;
+}
+
+static int pci_conf1_read_mq_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+ int result;
+ u32 data;
+
+ result = pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 2, &data);
+
+ *value = (u16)data;
+
+ return result;
+}
+
+static int pci_conf1_read_mq_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+ if (!value)
+ return -EINVAL;
+
+ return pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static int pci_conf1_write_mq_config_byte(struct pci_dev *dev, int where, u8 value)
+{
+ return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 1, value);
+}
+
+static int pci_conf1_write_mq_config_word(struct pci_dev *dev, int where, u16 value)
+{
+ return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 2, value);
+}
+
+static int pci_conf1_write_mq_config_dword(struct pci_dev *dev, int where, u32 value)
+{
+ return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static struct pci_ops pci_direct_mq_conf1 = {
+ pci_conf1_read_mq_config_byte,
+ pci_conf1_read_mq_config_word,
+ pci_conf1_read_mq_config_dword,
+ pci_conf1_write_mq_config_byte,
+ pci_conf1_write_mq_config_word,
+ pci_conf1_write_mq_config_dword
+};
+
+#endif /* !CONFIG_MULTIQUAD */
+#define PCI_CONF1_ADDRESS(bus, dev, fn, reg) \
+ (0x80000000 | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3))
+
+static int pci_conf1_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) /* !CONFIG_MULTIQUAD */
+{
+ unsigned long flags;
+
+ if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
+ return -EINVAL;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8);
+
+ switch (len) {
+ case 1:
+ *value = inb(0xCFC + (reg & 3));
+ break;
+ case 2:
+ *value = inw(0xCFC + (reg & 2));
+ break;
+ case 4:
+ *value = inl(0xCFC);
+ break;
+ }
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return 0;
+}
+
+static int pci_conf1_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) /* !CONFIG_MULTIQUAD */
+{
+ unsigned long flags;
+
+ if ((bus > 255 || dev > 31 || fn > 7 || reg > 255))
+ return -EINVAL;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8);
+
+ switch (len) {
+ case 1:
+ outb((u8)value, 0xCFC + (reg & 3));
+ break;
+ case 2:
+ outw((u16)value, 0xCFC + (reg & 2));
+ break;
+ case 4:
+ outl((u32)value, 0xCFC);
+ break;
+ }
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return 0;
+}
+
+#undef PCI_CONF1_ADDRESS
+
+static int pci_conf1_read_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+ int result;
+ u32 data;
+
+ result = pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 1, &data);
+
+ *value = (u8)data;
+
+ return result;
+}
+
+static int pci_conf1_read_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+ int result;
+ u32 data;
+
+ result = pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 2, &data);
+
+ *value = (u16)data;
+
+ return result;
+}
+
+static int pci_conf1_read_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+ return pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static int pci_conf1_write_config_byte(struct pci_dev *dev, int where, u8 value)
+{
+ return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 1, value);
+}
+
+static int pci_conf1_write_config_word(struct pci_dev *dev, int where, u16 value)
+{
+ return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 2, value);
+}
+
+static int pci_conf1_write_config_dword(struct pci_dev *dev, int where, u32 value)
+{
+ return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static struct pci_ops pci_direct_conf1 = {
+ pci_conf1_read_config_byte,
+ pci_conf1_read_config_word,
+ pci_conf1_read_config_dword,
+ pci_conf1_write_config_byte,
+ pci_conf1_write_config_word,
+ pci_conf1_write_config_dword
+};
+
+
+/*
+ * Functions for accessing PCI configuration space with type 2 accesses
+ */
+
+#define PCI_CONF2_ADDRESS(dev, reg) (u16)(0xC000 | (dev << 8) | reg)
+
+static int pci_conf2_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value)
+{
+ unsigned long flags;
+
+ if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
+ return -EINVAL;
+
+ if (dev & 0x10)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ outb((u8)(0xF0 | (fn << 1)), 0xCF8);
+ outb((u8)bus, 0xCFA);
+
+ switch (len) {
+ case 1:
+ *value = inb(PCI_CONF2_ADDRESS(dev, reg));
+ break;
+ case 2:
+ *value = inw(PCI_CONF2_ADDRESS(dev, reg));
+ break;
+ case 4:
+ *value = inl(PCI_CONF2_ADDRESS(dev, reg));
+ break;
+ }
+
+ outb (0, 0xCF8);
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return 0;
+}
+
+static int pci_conf2_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value)
+{
+ unsigned long flags;
+
+ if ((bus > 255 || dev > 31 || fn > 7 || reg > 255))
+ return -EINVAL;
+
+ if (dev & 0x10)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ outb((u8)(0xF0 | (fn << 1)), 0xCF8);
+ outb((u8)bus, 0xCFA);
+
+ switch (len) {
+ case 1:
+ outb ((u8)value, PCI_CONF2_ADDRESS(dev, reg));
+ break;
+ case 2:
+ outw ((u16)value, PCI_CONF2_ADDRESS(dev, reg));
+ break;
+ case 4:
+ outl ((u32)value, PCI_CONF2_ADDRESS(dev, reg));
+ break;
+ }
+
+ outb (0, 0xCF8);
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return 0;
+}
+
+#undef PCI_CONF2_ADDRESS
+
+static int pci_conf2_read_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+ int result;
+ u32 data;
+ result = pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 1, &data);
+ *value = (u8)data;
+ return result;
+}
+
+static int pci_conf2_read_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+ int result;
+ u32 data;
+ result = pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 2, &data);
+ *value = (u16)data;
+ return result;
+}
+
+static int pci_conf2_read_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+ return pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static int pci_conf2_write_config_byte(struct pci_dev *dev, int where, u8 value)
+{
+ return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 1, value);
+}
+
+static int pci_conf2_write_config_word(struct pci_dev *dev, int where, u16 value)
+{
+ return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 2, value);
+}
+
+static int pci_conf2_write_config_dword(struct pci_dev *dev, int where, u32 value)
+{
+ return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static struct pci_ops pci_direct_conf2 = {
+ pci_conf2_read_config_byte,
+ pci_conf2_read_config_word,
+ pci_conf2_read_config_dword,
+ pci_conf2_write_config_byte,
+ pci_conf2_write_config_word,
+ pci_conf2_write_config_dword
+};
+
+
+/*
+ * Before we decide to use direct hardware access mechanisms, we try to do some
+ * trivial checks to ensure it at least _seems_ to be working -- we just test
+ * whether bus 00 contains a host bridge (this is similar to checking
+ * techniques used in XFree86, but ours should be more reliable since we
+ * attempt to make use of direct access hints provided by the PCI BIOS).
+ *
+ * This should be close to trivial, but it isn't, because there are buggy
+ * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID.
+ */
+static int __devinit pci_sanity_check(struct pci_ops *o)
+{
+ u16 x;
+ struct pci_bus bus; /* Fake bus and device */
+ struct pci_dev dev;
+
+ if (pci_probe & PCI_NO_CHECKS)
+ return 1;
+ bus.number = 0;
+ dev.bus = &bus;
+ for(dev.devfn=0; dev.devfn < 0x100; dev.devfn++)
+ if ((!o->read_word(&dev, PCI_CLASS_DEVICE, &x) &&
+ (x == PCI_CLASS_BRIDGE_HOST || x == PCI_CLASS_DISPLAY_VGA)) ||
+ (!o->read_word(&dev, PCI_VENDOR_ID, &x) &&
+ (x == PCI_VENDOR_ID_INTEL || x == PCI_VENDOR_ID_COMPAQ)))
+ return 1;
+ DBG("PCI: Sanity check failed\n");
+ return 0;
+}
+
+static struct pci_ops * __devinit pci_check_direct(void)
+{
+ unsigned int tmp;
+ unsigned long flags;
+
+ __save_flags(flags); __cli();
+
+ /*
+ * Check if configuration type 1 works.
+ */
+ if (pci_probe & PCI_PROBE_CONF1) {
+ outb (0x01, 0xCFB);
+ tmp = inl (0xCF8);
+ outl (0x80000000, 0xCF8);
+ if (inl (0xCF8) == 0x80000000 &&
+ pci_sanity_check(&pci_direct_conf1)) {
+ outl (tmp, 0xCF8);
+ __restore_flags(flags);
+ printk(KERN_INFO "PCI: Using configuration type 1\n");
+ request_region(0xCF8, 8, "PCI conf1");
+
+#ifdef CONFIG_MULTIQUAD
+ /* Multi-Quad has an extended PCI Conf1 */
+ if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
+ return &pci_direct_mq_conf1;
+#endif
+ return &pci_direct_conf1;
+ }
+ outl (tmp, 0xCF8);
+ }
+
+ /*
+ * Check if configuration type 2 works.
+ */
+ if (pci_probe & PCI_PROBE_CONF2) {
+ outb (0x00, 0xCFB);
+ outb (0x00, 0xCF8);
+ outb (0x00, 0xCFA);
+ if (inb (0xCF8) == 0x00 && inb (0xCFA) == 0x00 &&
+ pci_sanity_check(&pci_direct_conf2)) {
+ __restore_flags(flags);
+ printk(KERN_INFO "PCI: Using configuration type 2\n");
+ request_region(0xCF8, 4, "PCI conf2");
+ return &pci_direct_conf2;
+ }
+ }
+
+ __restore_flags(flags);
+ return NULL;
+}
+
+#endif
+
+/*
+ * BIOS32 and PCI BIOS handling.
+ */
+
+#ifdef CONFIG_PCI_BIOS
+
+#define PCIBIOS_PCI_FUNCTION_ID 0xb1XX
+#define PCIBIOS_PCI_BIOS_PRESENT 0xb101
+#define PCIBIOS_FIND_PCI_DEVICE 0xb102
+#define PCIBIOS_FIND_PCI_CLASS_CODE 0xb103
+#define PCIBIOS_GENERATE_SPECIAL_CYCLE 0xb106
+#define PCIBIOS_READ_CONFIG_BYTE 0xb108
+#define PCIBIOS_READ_CONFIG_WORD 0xb109
+#define PCIBIOS_READ_CONFIG_DWORD 0xb10a
+#define PCIBIOS_WRITE_CONFIG_BYTE 0xb10b
+#define PCIBIOS_WRITE_CONFIG_WORD 0xb10c
+#define PCIBIOS_WRITE_CONFIG_DWORD 0xb10d
+#define PCIBIOS_GET_ROUTING_OPTIONS 0xb10e
+#define PCIBIOS_SET_PCI_HW_INT 0xb10f
+
+/* BIOS32 signature: "_32_" */
+#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
+
+/* PCI signature: "PCI " */
+#define PCI_SIGNATURE (('P' << 0) + ('C' << 8) + ('I' << 16) + (' ' << 24))
+
+/* PCI service signature: "$PCI" */
+#define PCI_SERVICE (('$' << 0) + ('P' << 8) + ('C' << 16) + ('I' << 24))
+
+/* PCI BIOS hardware mechanism flags */
+#define PCIBIOS_HW_TYPE1 0x01
+#define PCIBIOS_HW_TYPE2 0x02
+#define PCIBIOS_HW_TYPE1_SPEC 0x10
+#define PCIBIOS_HW_TYPE2_SPEC 0x20
+
+/*
+ * This is the standard structure used to identify the entry point
+ * to the BIOS32 Service Directory, as documented in
+ * Standard BIOS 32-bit Service Directory Proposal
+ * Revision 0.4 May 24, 1993
+ * Phoenix Technologies Ltd.
+ * Norwood, MA
+ * and the PCI BIOS specification.
+ */
+
+union bios32 {
+ struct {
+ unsigned long signature; /* _32_ */
+ unsigned long entry; /* 32 bit physical address */
+ unsigned char revision; /* Revision level, 0 */
+ unsigned char length; /* Length in paragraphs should be 01 */
+ unsigned char checksum; /* All bytes must add up to zero */
+ unsigned char reserved[5]; /* Must be zero */
+ } fields;
+ char chars[16];
+};
+
+/*
+ * Physical address of the service directory. I don't know if we're
+ * allowed to have more than one of these or not, so just in case
+ * we'll make pcibios_present() take a memory start parameter and store
+ * the array there.
+ */
+
+static struct {
+ unsigned long address;
+ unsigned short segment;
+} bios32_indirect = { 0, __KERNEL_CS };
+
+/*
+ * Returns the entry point for the given service, NULL on error
+ */
+
+static unsigned long bios32_service(unsigned long service)
+{
+ unsigned char return_code; /* %al */
+ unsigned long address; /* %ebx */
+ unsigned long length; /* %ecx */
+ unsigned long entry; /* %edx */
+ unsigned long flags;
+
+ __save_flags(flags); __cli();
+ __asm__("lcall *(%%edi); cld"
+ : "=a" (return_code),
+ "=b" (address),
+ "=c" (length),
+ "=d" (entry)
+ : "0" (service),
+ "1" (0),
+ "D" (&bios32_indirect));
+ __restore_flags(flags);
+
+ switch (return_code) {
+ case 0:
+ return address + entry;
+ case 0x80: /* Not present */
+ printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service);
+ return 0;
+ default: /* Shouldn't happen */
+ printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n",
+ service, return_code);
+ return 0;
+ }
+}
+
+static struct {
+ unsigned long address;
+ unsigned short segment;
+} pci_indirect = { 0, __KERNEL_CS };
+
+static int pci_bios_present;
+
+static int __devinit check_pcibios(void)
+{
+ u32 signature, eax, ebx, ecx;
+ u8 status, major_ver, minor_ver, hw_mech;
+ unsigned long flags, pcibios_entry;
+
+ if ((pcibios_entry = bios32_service(PCI_SERVICE))) {
+ pci_indirect.address = pcibios_entry + PAGE_OFFSET;
+
+ __save_flags(flags); __cli();
+ __asm__(
+ "lcall *(%%edi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=d" (signature),
+ "=a" (eax),
+ "=b" (ebx),
+ "=c" (ecx)
+ : "1" (PCIBIOS_PCI_BIOS_PRESENT),
+ "D" (&pci_indirect)
+ : "memory");
+ __restore_flags(flags);
+
+ status = (eax >> 8) & 0xff;
+ hw_mech = eax & 0xff;
+ major_ver = (ebx >> 8) & 0xff;
+ minor_ver = ebx & 0xff;
+ if (pcibios_last_bus < 0)
+ pcibios_last_bus = ecx & 0xff;
+ DBG("PCI: BIOS probe returned s=%02x hw=%02x ver=%02x.%02x l=%02x\n",
+ status, hw_mech, major_ver, minor_ver, pcibios_last_bus);
+ if (status || signature != PCI_SIGNATURE) {
+ printk (KERN_ERR "PCI: BIOS BUG #%x[%08x] found\n",
+ status, signature);
+ return 0;
+ }
+ printk(KERN_INFO "PCI: PCI BIOS revision %x.%02x entry at 0x%lx, last bus=%d\n",
+ major_ver, minor_ver, pcibios_entry, pcibios_last_bus);
+#ifdef CONFIG_PCI_DIRECT
+ if (!(hw_mech & PCIBIOS_HW_TYPE1))
+ pci_probe &= ~PCI_PROBE_CONF1;
+ if (!(hw_mech & PCIBIOS_HW_TYPE2))
+ pci_probe &= ~PCI_PROBE_CONF2;
+#endif
+ return 1;
+ }
+ return 0;
+}
+
+static int __devinit pci_bios_find_device (unsigned short vendor, unsigned short device_id,
+ unsigned short index, unsigned char *bus, unsigned char *device_fn)
+{
+ unsigned short bx;
+ unsigned short ret;
+
+ __asm__("lcall *(%%edi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=b" (bx),
+ "=a" (ret)
+ : "1" (PCIBIOS_FIND_PCI_DEVICE),
+ "c" (device_id),
+ "d" (vendor),
+ "S" ((int) index),
+ "D" (&pci_indirect));
+ *bus = (bx >> 8) & 0xff;
+ *device_fn = bx & 0xff;
+ return (int) (ret & 0xff00) >> 8;
+}
+
+static int pci_bios_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value)
+{
+ unsigned long result = 0;
+ unsigned long flags;
+ unsigned long bx = ((bus << 8) | (dev << 3) | fn);
+
+ if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
+ return -EINVAL;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ switch (len) {
+ case 1:
+ __asm__("lcall *(%%esi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=c" (*value),
+ "=a" (result)
+ : "1" (PCIBIOS_READ_CONFIG_BYTE),
+ "b" (bx),
+ "D" ((long)reg),
+ "S" (&pci_indirect));
+ break;
+ case 2:
+ __asm__("lcall *(%%esi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=c" (*value),
+ "=a" (result)
+ : "1" (PCIBIOS_READ_CONFIG_WORD),
+ "b" (bx),
+ "D" ((long)reg),
+ "S" (&pci_indirect));
+ break;
+ case 4:
+ __asm__("lcall *(%%esi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=c" (*value),
+ "=a" (result)
+ : "1" (PCIBIOS_READ_CONFIG_DWORD),
+ "b" (bx),
+ "D" ((long)reg),
+ "S" (&pci_indirect));
+ break;
+ }
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return (int)((result & 0xff00) >> 8);
+}
+
+static int pci_bios_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value)
+{
+ unsigned long result = 0;
+ unsigned long flags;
+ unsigned long bx = ((bus << 8) | (dev << 3) | fn);
+
+ if ((bus > 255 || dev > 31 || fn > 7 || reg > 255))
+ return -EINVAL;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ switch (len) {
+ case 1:
+ __asm__("lcall *(%%esi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=a" (result)
+ : "0" (PCIBIOS_WRITE_CONFIG_BYTE),
+ "c" (value),
+ "b" (bx),
+ "D" ((long)reg),
+ "S" (&pci_indirect));
+ break;
+ case 2:
+ __asm__("lcall *(%%esi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=a" (result)
+ : "0" (PCIBIOS_WRITE_CONFIG_WORD),
+ "c" (value),
+ "b" (bx),
+ "D" ((long)reg),
+ "S" (&pci_indirect));
+ break;
+ case 4:
+ __asm__("lcall *(%%esi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=a" (result)
+ : "0" (PCIBIOS_WRITE_CONFIG_DWORD),
+ "c" (value),
+ "b" (bx),
+ "D" ((long)reg),
+ "S" (&pci_indirect));
+ break;
+ }
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return (int)((result & 0xff00) >> 8);
+}
+
+static int pci_bios_read_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+ int result;
+ u32 data;
+
+ if (!value)
+ BUG();
+
+ result = pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 1, &data);
+
+ *value = (u8)data;
+
+ return result;
+}
+
+static int pci_bios_read_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+ int result;
+ u32 data;
+
+ if (!value)
+ BUG();
+
+ result = pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 2, &data);
+
+ *value = (u16)data;
+
+ return result;
+}
+
+static int pci_bios_read_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+ if (!value)
+ BUG();
+
+ return pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static int pci_bios_write_config_byte(struct pci_dev *dev, int where, u8 value)
+{
+ return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 1, value);
+}
+
+static int pci_bios_write_config_word(struct pci_dev *dev, int where, u16 value)
+{
+ return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 2, value);
+}
+
+static int pci_bios_write_config_dword(struct pci_dev *dev, int where, u32 value)
+{
+ return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+
+/*
+ * Function table for BIOS32 access
+ */
+
+static struct pci_ops pci_bios_access = {
+ pci_bios_read_config_byte,
+ pci_bios_read_config_word,
+ pci_bios_read_config_dword,
+ pci_bios_write_config_byte,
+ pci_bios_write_config_word,
+ pci_bios_write_config_dword
+};
+
+/*
+ * Try to find PCI BIOS.
+ */
+
+static struct pci_ops * __devinit pci_find_bios(void)
+{
+ union bios32 *check;
+ unsigned char sum;
+ int i, length;
+
+ /*
+ * Follow the standard procedure for locating the BIOS32 Service
+ * directory by scanning the permissible address range from
+ * 0xe0000 through 0xfffff for a valid BIOS32 structure.
+ */
+
+ for (check = (union bios32 *) __va(0xe0000);
+ check <= (union bios32 *) __va(0xffff0);
+ ++check) {
+ if (check->fields.signature != BIOS32_SIGNATURE)
+ continue;
+ length = check->fields.length * 16;
+ if (!length)
+ continue;
+ sum = 0;
+ for (i = 0; i < length ; ++i)
+ sum += check->chars[i];
+ if (sum != 0)
+ continue;
+ if (check->fields.revision != 0) {
+ printk("PCI: unsupported BIOS32 revision %d at 0x%p\n",
+ check->fields.revision, check);
+ continue;
+ }
+ DBG("PCI: BIOS32 Service Directory structure at 0x%p\n", check);
+ if (check->fields.entry >= 0x100000) {
+ printk("PCI: BIOS32 entry (0x%p) in high memory, cannot use.\n", check);
+ return NULL;
+ } else {
+ unsigned long bios32_entry = check->fields.entry;
+ DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", bios32_entry);
+ bios32_indirect.address = bios32_entry + PAGE_OFFSET;
+ if (check_pcibios())
+ return &pci_bios_access;
+ }
+ break; /* Hopefully more than one BIOS32 cannot happen... */
+ }
+
+ return NULL;
+}
+
+/*
+ * Sort the device list according to PCI BIOS. Nasty hack, but since some
+ * fool forgot to define the `correct' device order in the PCI BIOS specs
+ * and we want to be (possibly bug-to-bug ;-]) compatible with older kernels
+ * which used BIOS ordering, we are bound to do this...
+ */
+
+static void __devinit pcibios_sort(void)
+{
+ LIST_HEAD(sorted_devices);
+ struct list_head *ln;
+ struct pci_dev *dev, *d;
+ int idx, found;
+ unsigned char bus, devfn;
+
+ DBG("PCI: Sorting device list...\n");
+ while (!list_empty(&pci_devices)) {
+ ln = pci_devices.next;
+ dev = pci_dev_g(ln);
+ idx = found = 0;
+ while (pci_bios_find_device(dev->vendor, dev->device, idx, &bus, &devfn) == PCIBIOS_SUCCESSFUL) {
+ idx++;
+ for (ln=pci_devices.next; ln != &pci_devices; ln=ln->next) {
+ d = pci_dev_g(ln);
+ if (d->bus->number == bus && d->devfn == devfn) {
+ list_del(&d->global_list);
+ list_add_tail(&d->global_list, &sorted_devices);
+ if (d == dev)
+ found = 1;
+ break;
+ }
+ }
+ if (ln == &pci_devices) {
+ printk(KERN_WARNING "PCI: BIOS reporting unknown device %02x:%02x\n", bus, devfn);
+ /*
+ * We must not continue scanning as several buggy BIOSes
+ * return garbage after the last device. Grr.
+ */
+ break;
+ }
+ }
+ if (!found) {
+ printk(KERN_WARNING "PCI: Device %02x:%02x not found by BIOS\n",
+ dev->bus->number, dev->devfn);
+ list_del(&dev->global_list);
+ list_add_tail(&dev->global_list, &sorted_devices);
+ }
+ }
+ list_splice(&sorted_devices, &pci_devices);
+}
+
+/*
+ * BIOS Functions for IRQ Routing
+ */
+
+struct irq_routing_options {
+ u16 size;
+ struct irq_info *table;
+ u16 segment;
+} __attribute__((packed));
+
+struct irq_routing_table * __devinit pcibios_get_irq_routing_table(void)
+{
+ struct irq_routing_options opt;
+ struct irq_routing_table *rt = NULL;
+ int ret, map;
+ unsigned long page;
+
+ if (!pci_bios_present)
+ return NULL;
+ page = __get_free_page(GFP_KERNEL);
+ if (!page)
+ return NULL;
+ opt.table = (struct irq_info *) page;
+ opt.size = PAGE_SIZE;
+ opt.segment = __KERNEL_DS;
+
+ DBG("PCI: Fetching IRQ routing table... ");
+ __asm__("push %%es\n\t"
+ "push %%ds\n\t"
+ "pop %%es\n\t"
+ "lcall *(%%esi); cld\n\t"
+ "pop %%es\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=a" (ret),
+ "=b" (map),
+ "+m" (opt)
+ : "0" (PCIBIOS_GET_ROUTING_OPTIONS),
+ "1" (0),
+ "D" ((long) &opt),
+ "S" (&pci_indirect));
+ DBG("OK ret=%d, size=%d, map=%x\n", ret, opt.size, map);
+ if (ret & 0xff00)
+ printk(KERN_ERR "PCI: Error %02x when fetching IRQ routing table.\n", (ret >> 8) & 0xff);
+ else if (opt.size) {
+ rt = kmalloc(sizeof(struct irq_routing_table) + opt.size, GFP_KERNEL);
+ if (rt) {
+ memset(rt, 0, sizeof(struct irq_routing_table));
+ rt->size = opt.size + sizeof(struct irq_routing_table);
+ rt->exclusive_irqs = map;
+ memcpy(rt->slots, (void *) page, opt.size);
+ printk(KERN_INFO "PCI: Using BIOS Interrupt Routing Table\n");
+ }
+ }
+ free_page(page);
+ return rt;
+}
+
+
+int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq)
+{
+ int ret;
+
+ __asm__("lcall *(%%esi); cld\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=a" (ret)
+ : "0" (PCIBIOS_SET_PCI_HW_INT),
+ "b" ((dev->bus->number << 8) | dev->devfn),
+ "c" ((irq << 8) | (pin + 10)),
+ "S" (&pci_indirect));
+ return !(ret & 0xff00);
+}
+
+#endif
+
+/*
+ * Several buggy motherboards address only 16 devices and mirror
+ * them to next 16 IDs. We try to detect this `feature' on all
+ * primary buses (those containing host bridges as they are
+ * expected to be unique) and remove the ghost devices.
+ */
+
+static void __devinit pcibios_fixup_ghosts(struct pci_bus *b)
+{
+ struct list_head *ln, *mn;
+ struct pci_dev *d, *e;
+ int mirror = PCI_DEVFN(16,0);
+ int seen_host_bridge = 0;
+ int i;
+
+ DBG("PCI: Scanning for ghost devices on bus %d\n", b->number);
+ for (ln=b->devices.next; ln != &b->devices; ln=ln->next) {
+ d = pci_dev_b(ln);
+ if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST)
+ seen_host_bridge++;
+ for (mn=ln->next; mn != &b->devices; mn=mn->next) {
+ e = pci_dev_b(mn);
+ if (e->devfn != d->devfn + mirror ||
+ e->vendor != d->vendor ||
+ e->device != d->device ||
+ e->class != d->class)
+ continue;
+ for(i=0; i<PCI_NUM_RESOURCES; i++)
+ if (e->resource[i].start != d->resource[i].start ||
+ e->resource[i].end != d->resource[i].end ||
+ e->resource[i].flags != d->resource[i].flags)
+ continue;
+ break;
+ }
+ if (mn == &b->devices)
+ return;
+ }
+ if (!seen_host_bridge)
+ return;
+ printk(KERN_WARNING "PCI: Ignoring ghost devices on bus %02x\n", b->number);
+
+ ln = &b->devices;
+ while (ln->next != &b->devices) {
+ d = pci_dev_b(ln->next);
+ if (d->devfn >= mirror) {
+ list_del(&d->global_list);
+ list_del(&d->bus_list);
+ kfree(d);
+ } else
+ ln = ln->next;
+ }
+}
+
+/*
+ * Discover remaining PCI buses in case there are peer host bridges.
+ * We use the number of last PCI bus provided by the PCI BIOS.
+ */
+static void __devinit pcibios_fixup_peer_bridges(void)
+{
+ int n;
+ struct pci_bus bus;
+ struct pci_dev dev;
+ u16 l;
+
+ if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff)
+ return;
+ DBG("PCI: Peer bridge fixup\n");
+ for (n=0; n <= pcibios_last_bus; n++) {
+ if (pci_bus_exists(&pci_root_buses, n))
+ continue;
+ bus.number = n;
+ bus.ops = pci_root_ops;
+ dev.bus = &bus;
+ for(dev.devfn=0; dev.devfn<256; dev.devfn += 8)
+ if (!pci_read_config_word(&dev, PCI_VENDOR_ID, &l) &&
+ l != 0x0000 && l != 0xffff) {
+ DBG("Found device at %02x:%02x [%04x]\n", n, dev.devfn, l);
+ printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n);
+ pci_scan_bus(n, pci_root_ops, NULL);
+ break;
+ }
+ }
+}
+
+/*
+ * Exceptions for specific devices. Usually work-arounds for fatal design flaws.
+ */
+
+static void __devinit pci_fixup_i450nx(struct pci_dev *d)
+{
+ /*
+ * i450NX -- Find and scan all secondary buses on all PXB's.
+ */
+ int pxb, reg;
+ u8 busno, suba, subb;
+#ifdef CONFIG_MULTIQUAD
+ int quad = BUS2QUAD(d->bus->number);
+#endif
+ printk("PCI: Searching for i450NX host bridges on %s\n", d->slot_name);
+ reg = 0xd0;
+ for(pxb=0; pxb<2; pxb++) {
+ pci_read_config_byte(d, reg++, &busno);
+ pci_read_config_byte(d, reg++, &suba);
+ pci_read_config_byte(d, reg++, &subb);
+ DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb);
+ if (busno)
+ pci_scan_bus(QUADLOCAL2BUS(quad,busno), pci_root_ops, NULL); /* Bus A */
+ if (suba < subb)
+ pci_scan_bus(QUADLOCAL2BUS(quad,suba+1), pci_root_ops, NULL); /* Bus B */
+ }
+ pcibios_last_bus = -1;
+}
+
+static void __devinit pci_fixup_i450gx(struct pci_dev *d)
+{
+ /*
+ * i450GX and i450KX -- Find and scan all secondary buses.
+ * (called separately for each PCI bridge found)
+ */
+ u8 busno;
+ pci_read_config_byte(d, 0x4a, &busno);
+ printk(KERN_INFO "PCI: i440KX/GX host bridge %s: secondary bus %02x\n", d->slot_name, busno);
+ pci_scan_bus(busno, pci_root_ops, NULL);
+ pcibios_last_bus = -1;
+}
+
+static void __devinit pci_fixup_umc_ide(struct pci_dev *d)
+{
+ /*
+ * UM8886BF IDE controller sets region type bits incorrectly,
+ * therefore they look like memory despite of them being I/O.
+ */
+ int i;
+
+ printk(KERN_WARNING "PCI: Fixing base address flags for device %s\n", d->slot_name);
+ for(i=0; i<4; i++)
+ d->resource[i].flags |= PCI_BASE_ADDRESS_SPACE_IO;
+}
+
+static void __devinit pci_fixup_ncr53c810(struct pci_dev *d)
+{
+ /*
+ * NCR 53C810 returns class code 0 (at least on some systems).
+ * Fix class to be PCI_CLASS_STORAGE_SCSI
+ */
+ if (!d->class) {
+ printk("PCI: fixing NCR 53C810 class code for %s\n", d->slot_name);
+ d->class = PCI_CLASS_STORAGE_SCSI << 8;
+ }
+}
+
+static void __devinit pci_fixup_ide_bases(struct pci_dev *d)
+{
+ int i;
+
+ /*
+ * PCI IDE controllers use non-standard I/O port decoding, respect it.
+ */
+ if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE)
+ return;
+ DBG("PCI: IDE base address fixup for %s\n", d->slot_name);
+ for(i=0; i<4; i++) {
+ struct resource *r = &d->resource[i];
+ if ((r->start & ~0x80) == 0x374) {
+ r->start |= 2;
+ r->end = r->start;
+ }
+ }
+}
+
+static void __devinit pci_fixup_ide_trash(struct pci_dev *d)
+{
+ int i;
+
+ /*
+ * There exist PCI IDE controllers which have utter garbage
+ * in first four base registers. Ignore that.
+ */
+ DBG("PCI: IDE base address trash cleared for %s\n", d->slot_name);
+ for(i=0; i<4; i++)
+ d->resource[i].start = d->resource[i].end = d->resource[i].flags = 0;
+}
+
+static void __devinit pci_fixup_latency(struct pci_dev *d)
+{
+ /*
+ * SiS 5597 and 5598 chipsets require latency timer set to
+ * at most 32 to avoid lockups.
+ */
+ DBG("PCI: Setting max latency to 32\n");
+ pcibios_max_latency = 32;
+}
+
+static void __devinit pci_fixup_piix4_acpi(struct pci_dev *d)
+{
+ /*
+ * PIIX4 ACPI device: hardwired IRQ9
+ */
+ d->irq = 9;
+}
+
+/*
+ * Addresses issues with problems in the memory write queue timer in
+ * certain VIA Northbridges. This bugfix is per VIA's specifications,
+ * except for the KL133/KM133: clearing bit 5 on those Northbridges seems
+ * to trigger a bug in its integrated ProSavage video card, which
+ * causes screen corruption. We only clear bits 6 and 7 for that chipset,
+ * until VIA can provide us with definitive information on why screen
+ * corruption occurs, and what exactly those bits do.
+ *
+ * VIA 8363,8622,8361 Northbridges:
+ * - bits 5, 6, 7 at offset 0x55 need to be turned off
+ * VIA 8367 (KT266x) Northbridges:
+ * - bits 5, 6, 7 at offset 0x95 need to be turned off
+ * VIA 8363 rev 0x81/0x84 (KL133/KM133) Northbridges:
+ * - bits 6, 7 at offset 0x55 need to be turned off
+ */
+
+#define VIA_8363_KL133_REVISION_ID 0x81
+#define VIA_8363_KM133_REVISION_ID 0x84
+
+static void __init pci_fixup_via_northbridge_bug(struct pci_dev *d)
+{
+ u8 v;
+ u8 revision;
+ int where = 0x55;
+ int mask = 0x1f; /* clear bits 5, 6, 7 by default */
+
+ pci_read_config_byte(d, PCI_REVISION_ID, &revision);
+
+ if (d->device == PCI_DEVICE_ID_VIA_8367_0) {
+ /* fix pci bus latency issues resulted by NB bios error
+ it appears on bug free^Wreduced kt266x's bios forces
+ NB latency to zero */
+ pci_write_config_byte(d, PCI_LATENCY_TIMER, 0);
+
+ where = 0x95; /* the memory write queue timer register is
+ different for the KT266x's: 0x95 not 0x55 */
+ } else if (d->device == PCI_DEVICE_ID_VIA_8363_0 &&
+ (revision == VIA_8363_KL133_REVISION_ID ||
+ revision == VIA_8363_KM133_REVISION_ID)) {
+ mask = 0x3f; /* clear only bits 6 and 7; clearing bit 5
+ causes screen corruption on the KL133/KM133 */
+ }
+
+ pci_read_config_byte(d, where, &v);
+ if (v & ~mask) {
+ printk("Disabling VIA memory write queue (PCI ID %04x, rev %02x): [%02x] %02x & %02x -> %02x\n", \
+ d->device, revision, where, v, mask, v & mask);
+ v &= mask;
+ pci_write_config_byte(d, where, v);
+ }
+}
+
+/*
+ * For some reasons Intel decided that certain parts of their
+ * 815, 845 and some other chipsets must look like PCI-to-PCI bridges
+ * while they are obviously not. The 82801 family (AA, AB, BAM/CAM,
+ * BA/CA/DB and E) PCI bridges are actually HUB-to-PCI ones, according
+ * to Intel terminology. These devices do forward all addresses from
+ * system to PCI bus no matter what are their window settings, so they are
+ * "transparent" (or subtractive decoding) from programmers point of view.
+ */
+static void __init pci_fixup_transparent_bridge(struct pci_dev *dev)
+{
+ if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI &&
+ (dev->device & 0xff00) == 0x2400)
+ dev->transparent = 1;
+}
+
+struct pci_fixup pcibios_fixups[] = {
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5513, pci_fixup_ide_trash },
+ { PCI_FIXUP_HEADER, PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, pci_fixup_latency },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5598, pci_fixup_latency },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, pci_fixup_piix4_acpi },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, pci_fixup_via_northbridge_bug },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8622, pci_fixup_via_northbridge_bug },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8361, pci_fixup_via_northbridge_bug },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_via_northbridge_bug },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810 },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_bridge },
+ { 0 }
+};
+
+/*
+ * Called after each bus is probed, but before its children
+ * are examined.
+ */
+
+void __devinit pcibios_fixup_bus(struct pci_bus *b)
+{
+ pcibios_fixup_ghosts(b);
+ pci_read_bridge_bases(b);
+}
+
+struct pci_bus * __devinit pcibios_scan_root(int busnum)
+{
+ struct list_head *list;
+ struct pci_bus *bus;
+
+ list_for_each(list, &pci_root_buses) {
+ bus = pci_bus_b(list);
+ if (bus->number == busnum) {
+ /* Already scanned */
+ return bus;
+ }
+ }
+
+ printk("PCI: Probing PCI hardware (bus %02x)\n", busnum);
+
+ return pci_scan_bus(busnum, pci_root_ops, NULL);
+}
+
+void __devinit pcibios_config_init(void)
+{
+ /*
+ * Try all known PCI access methods. Note that we support using
+ * both PCI BIOS and direct access, with a preference for direct.
+ */
+
+#ifdef CONFIG_PCI_DIRECT
+ struct pci_ops *tmp = NULL;
+#endif
+
+
+#ifdef CONFIG_PCI_BIOS
+ if ((pci_probe & PCI_PROBE_BIOS)
+ && ((pci_root_ops = pci_find_bios()))) {
+ pci_probe |= PCI_BIOS_SORT;
+ pci_bios_present = 1;
+ pci_config_read = pci_bios_read;
+ pci_config_write = pci_bios_write;
+ }
+#endif
+
+#ifdef CONFIG_PCI_DIRECT
+ if ((pci_probe & (PCI_PROBE_CONF1 | PCI_PROBE_CONF2))
+ && (tmp = pci_check_direct())) {
+ pci_root_ops = tmp;
+ if (pci_root_ops == &pci_direct_conf1) {
+ pci_config_read = pci_conf1_read;
+ pci_config_write = pci_conf1_write;
+ }
+ else {
+ pci_config_read = pci_conf2_read;
+ pci_config_write = pci_conf2_write;
+ }
+ }
+#endif
+
+ return;
+}
+
+void __init pcibios_init(void)
+{
+ int quad;
+
+ if (!pci_root_ops)
+ pcibios_config_init();
+ if (!pci_root_ops) {
+ printk(KERN_WARNING "PCI: System does not support PCI\n");
+ return;
+ }
+
+ pcibios_set_cacheline_size();
+
+ printk(KERN_INFO "PCI: Probing PCI hardware\n");
+#ifdef CONFIG_ACPI_PCI
+ if (!acpi_noirq && !acpi_pci_irq_init()) {
+ pci_using_acpi_prt = 1;
+ printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
+ printk(KERN_INFO "PCI: if you experience problems, try using option 'pci=noacpi' or even 'acpi=off'\n");
+ }
+#endif
+ if (!pci_using_acpi_prt) {
+ pci_root_bus = pcibios_scan_root(0);
+ pcibios_irq_init();
+ pcibios_fixup_peer_bridges();
+ pcibios_fixup_irqs();
+ }
+ if (clustered_apic_mode && (numnodes > 1)) {
+ for (quad = 1; quad < numnodes; ++quad) {
+ printk("Scanning PCI bus %d for quad %d\n",
+ QUADLOCAL2BUS(quad,0), quad);
+ pci_scan_bus(QUADLOCAL2BUS(quad,0),
+ pci_root_ops, NULL);
+ }
+ }
+
+ pcibios_resource_survey();
+
+#ifdef CONFIG_PCI_BIOS
+ if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT))
+ pcibios_sort();
+#endif
+}
+
+char * __devinit pcibios_setup(char *str)
+{
+ if (!strcmp(str, "off")) {
+ pci_probe = 0;
+ return NULL;
+ }
+#ifdef CONFIG_PCI_BIOS
+ else if (!strcmp(str, "bios")) {
+ pci_probe = PCI_PROBE_BIOS;
+ return NULL;
+ } else if (!strcmp(str, "nobios")) {
+ pci_probe &= ~PCI_PROBE_BIOS;
+ return NULL;
+ } else if (!strcmp(str, "nosort")) {
+ pci_probe |= PCI_NO_SORT;
+ return NULL;
+ } else if (!strcmp(str, "biosirq")) {
+ pci_probe |= PCI_BIOS_IRQ_SCAN;
+ return NULL;
+ }
+#endif
+#ifdef CONFIG_PCI_DIRECT
+ else if (!strcmp(str, "conf1")) {
+ pci_probe = PCI_PROBE_CONF1 | PCI_NO_CHECKS;
+ return NULL;
+ }
+ else if (!strcmp(str, "conf2")) {
+ pci_probe = PCI_PROBE_CONF2 | PCI_NO_CHECKS;
+ return NULL;
+ }
+#endif
+ else if (!strcmp(str, "rom")) {
+ pci_probe |= PCI_ASSIGN_ROMS;
+ return NULL;
+ } else if (!strcmp(str, "assign-busses")) {
+ pci_probe |= PCI_ASSIGN_ALL_BUSSES;
+ return NULL;
+ } else if (!strncmp(str, "irqmask=", 8)) {
+ pcibios_irq_mask = simple_strtol(str+8, NULL, 0);
+ return NULL;
+ } else if (!strncmp(str, "lastbus=", 8)) {
+ pcibios_last_bus = simple_strtol(str+8, NULL, 0);
+ return NULL;
+ } else if (!strncmp(str, "noacpi", 6)) {
+ acpi_noirq_set();
+ return NULL;
+ }
+ return str;
+}
+
+unsigned int pcibios_assign_all_busses(void)
+{
+ return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+ int err;
+
+ if ((err = pcibios_enable_resources(dev, mask)) < 0)
+ return err;
+
+#ifdef CONFIG_ACPI_PCI
+ if (pci_using_acpi_prt) {
+ acpi_pci_irq_enable(dev);
+ return 0;
+ }
+#endif
+
+ pcibios_enable_irq(dev);
+
+ return 0;
+}
--- /dev/null
+/*
+ * Low-Level PCI Access for i386 machines
+ *
+ * Copyright 1993, 1994 Drew Eckhardt
+ * Visionary Computing
+ * (Unix and Linux consulting and custom programming)
+ * Drew@Colorado.EDU
+ * +1 (303) 786-7975
+ *
+ * Drew's work was sponsored by:
+ * iX Multiuser Multitasking Magazine
+ * Hannover, Germany
+ * hm@ix.de
+ *
+ * Copyright 1997--2000 Martin Mares <mj@ucw.cz>
+ *
+ * For more information, please consult the following manuals (look at
+ * http://www.pcisig.com/ for how to get them):
+ *
+ * PCI BIOS Specification
+ * PCI Local Bus Specification
+ * PCI to PCI Bridge Specification
+ * PCI System Design Guide
+ *
+ *
+ * CHANGELOG :
+ * Jun 17, 1994 : Modified to accommodate the broken pre-PCI BIOS SPECIFICATION
+ * Revision 2.0 present on <thys@dennis.ee.up.ac.za>'s ASUS mainboard.
+ *
+ * Jan 5, 1995 : Modified to probe PCI hardware at boot time by Frederic
+ * Potter, potter@cao-vlsi.ibp.fr
+ *
+ * Jan 10, 1995 : Modified to store the information about configured pci
+ * devices into a list, which can be accessed via /proc/pci by
+ * Curtis Varner, cvarner@cs.ucr.edu
+ *
+ * Jan 12, 1995 : CPU-PCI bridge optimization support by Frederic Potter.
+ * Alpha version. Intel & UMC chipset support only.
+ *
+ * Apr 16, 1995 : Source merge with the DEC Alpha PCI support. Most of the code
+ * moved to drivers/pci/pci.c.
+ *
+ * Dec 7, 1996 : Added support for direct configuration access of boards
+ * with Intel compatible access schemes (tsbogend@alpha.franken.de)
+ *
+ * Feb 3, 1997 : Set internal functions to static, save/restore flags
+ * avoid dead locks reading broken PCI BIOS, werner@suse.de
+ *
+ * Apr 26, 1997 : Fixed case when there is BIOS32, but not PCI BIOS
+ * (mj@atrey.karlin.mff.cuni.cz)
+ *
+ * May 7, 1997 : Added some missing cli()'s. [mj]
+ *
+ * Jun 20, 1997 : Corrected problems in "conf1" type accesses.
+ * (paubert@iram.es)
+ *
+ * Aug 2, 1997 : Split to PCI BIOS handling and direct PCI access parts
+ * and cleaned it up... Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ * Feb 6, 1998 : No longer using BIOS to find devices and device classes. [mj]
+ *
+ * May 1, 1998 : Support for peer host bridges. [mj]
+ *
+ * Jun 19, 1998 : Changed to use spinlocks, so that PCI configuration space
+ * can be accessed from interrupts even on SMP systems. [mj]
+ *
+ * August 1998 : Better support for peer host bridges and more paranoid
+ * checks for direct hardware access. Ugh, this file starts to look as
+ * a large gallery of common hardware bug workarounds (watch the comments)
+ * -- the PCI specs themselves are sane, but most implementors should be
+ * hit hard with \hammer scaled \magstep5. [mj]
+ *
+ * Jan 23, 1999 : More improvements to peer host bridge logic. i450NX fixup. [mj]
+ *
+ * Feb 8, 1999 : Added UM8886BF I/O address fixup. [mj]
+ *
+ * August 1999 : New resource management and configuration access stuff. [mj]
+ *
+ * Sep 19, 1999 : Use PCI IRQ routing tables for detection of peer host bridges.
+ * Based on ideas by Chris Frantz and David Hinds. [mj]
+ *
+ * Sep 28, 1999 : Handle unreported/unassigned IRQs. Thanks to Shuu Yamaguchi
+ * for a lot of patience during testing. [mj]
+ *
+ * Oct 8, 1999 : Split to pci-i386.c, pci-pc.c and pci-visws.c. [mj]
+ */
+
+#include <xen/types.h>
+#include <xen/lib.h>
+#include <xen/pci.h>
+#include <xen/init.h>
+#include <xen/ioport.h>
+#include <xen/errno.h>
+
+#include "pci-x86.h"
+
+void
+pcibios_update_resource(struct pci_dev *dev, struct resource *root,
+ struct resource *res, int resource)
+{
+ u32 new, check;
+ int reg;
+
+ new = res->start | (res->flags & PCI_REGION_FLAG_MASK);
+ if (resource < 6) {
+ reg = PCI_BASE_ADDRESS_0 + 4*resource;
+ } else if (resource == PCI_ROM_RESOURCE) {
+ res->flags |= PCI_ROM_ADDRESS_ENABLE;
+ new |= PCI_ROM_ADDRESS_ENABLE;
+ reg = dev->rom_base_reg;
+ } else {
+ /* Somebody might have asked allocation of a non-standard resource */
+ return;
+ }
+
+ pci_write_config_dword(dev, reg, new);
+ pci_read_config_dword(dev, reg, &check);
+ if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) {
+ printk(KERN_ERR "PCI: Error while updating region "
+ "%s/%d (%08x != %08x)\n", dev->slot_name, resource,
+ new, check);
+ }
+}
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ *
+ * Why? Because some silly external IO cards only decode
+ * the low 10 bits of the IO address. The 0x00-0xff region
+ * is reserved for motherboard devices that decode all 16
+ * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
+ * but we want to try to avoid allocating at 0x2900-0x2bff
+ * which might have be mirrored at 0x0100-0x03ff..
+ */
+void
+pcibios_align_resource(void *data, struct resource *res,
+ unsigned long size, unsigned long align)
+{
+ if (res->flags & IORESOURCE_IO) {
+ unsigned long start = res->start;
+
+ if (start & 0x300) {
+ start = (start + 0x3ff) & ~0x3ff;
+ res->start = start;
+ }
+ }
+}
+
+
+/*
+ * Handle resources of PCI devices. If the world were perfect, we could
+ * just allocate all the resource regions and do nothing more. It isn't.
+ * On the other hand, we cannot just re-allocate all devices, as it would
+ * require us to know lots of host bridge internals. So we attempt to
+ * keep as much of the original configuration as possible, but tweak it
+ * when it's found to be wrong.
+ *
+ * Known BIOS problems we have to work around:
+ * - I/O or memory regions not configured
+ * - regions configured, but not enabled in the command register
+ * - bogus I/O addresses above 64K used
+ * - expansion ROMs left enabled (this may sound harmless, but given
+ * the fact the PCI specs explicitly allow address decoders to be
+ * shared between expansion ROMs and other resource regions, it's
+ * at least dangerous)
+ *
+ * Our solution:
+ * (1) Allocate resources for all buses behind PCI-to-PCI bridges.
+ * This gives us fixed barriers on where we can allocate.
+ * (2) Allocate resources for all enabled devices. If there is
+ * a collision, just mark the resource as unallocated. Also
+ * disable expansion ROMs during this step.
+ * (3) Try to allocate resources for disabled devices. If the
+ * resources were assigned correctly, everything goes well,
+ * if they weren't, they won't disturb allocation of other
+ * resources.
+ * (4) Assign new addresses to resources which were either
+ * not configured at all or misconfigured. If explicitly
+ * requested by the user, configure expansion ROM address
+ * as well.
+ */
+
+static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
+{
+ struct list_head *ln;
+ struct pci_bus *bus;
+ struct pci_dev *dev;
+ int idx;
+ struct resource *r, *pr;
+
+ /* Depth-First Search on bus tree */
+ for (ln=bus_list->next; ln != bus_list; ln=ln->next) {
+ bus = pci_bus_b(ln);
+ if ((dev = bus->self)) {
+ for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) {
+ r = &dev->resource[idx];
+ if (!r->start)
+ continue;
+ pr = pci_find_parent_resource(dev, r);
+ if (!pr || request_resource(pr, r) < 0)
+ printk(KERN_ERR "PCI: Cannot allocate resource region %d of bridge %s\n", idx, dev->slot_name);
+ }
+ }
+ pcibios_allocate_bus_resources(&bus->children);
+ }
+}
+
+static void __init pcibios_allocate_resources(int pass)
+{
+ struct pci_dev *dev;
+ int idx, disabled;
+ u16 command;
+ struct resource *r, *pr;
+
+ pci_for_each_dev(dev) {
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ for(idx = 0; idx < 6; idx++) {
+ r = &dev->resource[idx];
+ if (r->parent) /* Already allocated */
+ continue;
+ if (!r->start) /* Address not assigned at all */
+ continue;
+ if (r->flags & IORESOURCE_IO)
+ disabled = !(command & PCI_COMMAND_IO);
+ else
+ disabled = !(command & PCI_COMMAND_MEMORY);
+ if (pass == disabled) {
+ DBG("PCI: Resource %08lx-%08lx (f=%lx, d=%d, p=%d)\n",
+ r->start, r->end, r->flags, disabled, pass);
+ pr = pci_find_parent_resource(dev, r);
+ if (!pr || request_resource(pr, r) < 0) {
+ printk(KERN_ERR "PCI: Cannot allocate resource region %d of device %s\n", idx, dev->slot_name);
+ /* We'll assign a new address later */
+ r->end -= r->start;
+ r->start = 0;
+ }
+ }
+ }
+ if (!pass) {
+ r = &dev->resource[PCI_ROM_RESOURCE];
+ if (r->flags & PCI_ROM_ADDRESS_ENABLE) {
+ /* Turn the ROM off, leave the resource region, but keep it unregistered. */
+ u32 reg;
+ DBG("PCI: Switching off ROM of %s\n", dev->slot_name);
+ r->flags &= ~PCI_ROM_ADDRESS_ENABLE;
+ pci_read_config_dword(dev, dev->rom_base_reg, ®);
+ pci_write_config_dword(dev, dev->rom_base_reg, reg & ~PCI_ROM_ADDRESS_ENABLE);
+ }
+ }
+ }
+}
+
+static void __init pcibios_assign_resources(void)
+{
+ struct pci_dev *dev;
+ int idx;
+ struct resource *r;
+
+ pci_for_each_dev(dev) {
+ int class = dev->class >> 8;
+
+ /* Don't touch classless devices and host bridges */
+ if (!class || class == PCI_CLASS_BRIDGE_HOST)
+ continue;
+
+ for(idx=0; idx<6; idx++) {
+ r = &dev->resource[idx];
+
+ /*
+ * Don't touch IDE controllers and I/O ports of video cards!
+ */
+ if ((class == PCI_CLASS_STORAGE_IDE && idx < 4) ||
+ (class == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO)))
+ continue;
+
+ /*
+ * We shall assign a new address to this resource, either because
+ * the BIOS forgot to do so or because we have decided the old
+ * address was unusable for some reason.
+ */
+ if (!r->start && r->end)
+ pci_assign_resource(dev, idx);
+ }
+
+ if (pci_probe & PCI_ASSIGN_ROMS) {
+ r = &dev->resource[PCI_ROM_RESOURCE];
+ r->end -= r->start;
+ r->start = 0;
+ if (r->end)
+ pci_assign_resource(dev, PCI_ROM_RESOURCE);
+ }
+ }
+}
+
+void __init pcibios_set_cacheline_size(void)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ pci_cache_line_size = 32 >> 2;
+ if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD)
+ pci_cache_line_size = 64 >> 2; /* K7 & K8 */
+ else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL)
+ pci_cache_line_size = 128 >> 2; /* P4 */
+}
+
+void __init pcibios_resource_survey(void)
+{
+ DBG("PCI: Allocating resources\n");
+ pcibios_allocate_bus_resources(&pci_root_buses);
+ pcibios_allocate_resources(0);
+ pcibios_allocate_resources(1);
+ pcibios_assign_resources();
+}
+
+int pcibios_enable_resources(struct pci_dev *dev, int mask)
+{
+ u16 cmd, old_cmd;
+ int idx;
+ struct resource *r;
+
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ old_cmd = cmd;
+ for(idx=0; idx<6; idx++) {
+ /* Only set up the requested stuff */
+ if (!(mask & (1<<idx)))
+ continue;
+
+ r = &dev->resource[idx];
+ if (!r->start && r->end) {
+ printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", dev->slot_name);
+ return -EINVAL;
+ }
+ if (r->flags & IORESOURCE_IO)
+ cmd |= PCI_COMMAND_IO;
+ if (r->flags & IORESOURCE_MEM)
+ cmd |= PCI_COMMAND_MEMORY;
+ }
+ if (dev->resource[PCI_ROM_RESOURCE].start)
+ cmd |= PCI_COMMAND_MEMORY;
+ if (cmd != old_cmd) {
+ printk("PCI: Enabling device %s (%04x -> %04x)\n", dev->slot_name, old_cmd, cmd);
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ }
+ return 0;
+}
+
+/*
+ * If we set up a device for bus mastering, we need to check the latency
+ * timer as certain crappy BIOSes forget to set it properly.
+ */
+unsigned int pcibios_max_latency = 255;
+
+void pcibios_set_master(struct pci_dev *dev)
+{
+ u8 lat;
+ pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
+ if (lat < 16)
+ lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
+ else if (lat > pcibios_max_latency)
+ lat = pcibios_max_latency;
+ else
+ return;
+ printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", dev->slot_name, lat);
+ pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
+}
+
+#if 0
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state, int write_combine)
+{
+ unsigned long prot;
+
+ /* I/O space cannot be accessed via normal processor loads and
+ * stores on this platform.
+ */
+ if (mmap_state == pci_mmap_io)
+ return -EINVAL;
+
+ /* Leave vm_pgoff as-is, the PCI space address is the physical
+ * address on this platform.
+ */
+ vma->vm_flags |= (VM_SHM | VM_LOCKED | VM_IO);
+
+ prot = pgprot_val(vma->vm_page_prot);
+ if (boot_cpu_data.x86 > 3)
+ prot |= _PAGE_PCD | _PAGE_PWT;
+ vma->vm_page_prot = __pgprot(prot);
+
+ /* Write-combine setting is ignored, it is changed via the mtrr
+ * interfaces on this platform.
+ */
+ if (remap_page_range(vma->vm_start, vma->vm_pgoff << PAGE_SHIFT,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot))
+ return -EAGAIN;
+
+ return 0;
+}
+#endif
--- /dev/null
+/*
+ * Low-Level PCI Access for i386 machines.
+ *
+ * (c) 1999 Martin Mares <mj@ucw.cz>
+ */
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+#define PCI_PROBE_BIOS 0x0001
+#define PCI_PROBE_CONF1 0x0002
+#define PCI_PROBE_CONF2 0x0004
+#define PCI_NO_SORT 0x0100
+#define PCI_BIOS_SORT 0x0200
+#define PCI_NO_CHECKS 0x0400
+#define PCI_ASSIGN_ROMS 0x1000
+#define PCI_BIOS_IRQ_SCAN 0x2000
+#define PCI_ASSIGN_ALL_BUSSES 0x4000
+
+extern unsigned int pci_probe;
+
+/* pci-i386.c */
+
+extern unsigned int pcibios_max_latency;
+extern u8 pci_cache_line_size;
+
+void pcibios_resource_survey(void);
+void pcibios_set_cacheline_size(void);
+int pcibios_enable_resources(struct pci_dev *, int);
+
+/* pci-pc.c */
+
+extern int pcibios_last_bus;
+extern struct pci_bus *pci_root_bus;
+extern struct pci_ops *pci_root_ops;
+
+/* pci-irq.c */
+
+struct irq_info {
+ u8 bus, devfn; /* Bus, device and function */
+ struct {
+ u8 link; /* IRQ line ID, chipset dependent, 0=not routed */
+ u16 bitmap; /* Available IRQs */
+ } __attribute__((packed)) irq[4];
+ u8 slot; /* Slot number, 0=onboard */
+ u8 rfu;
+} __attribute__((packed));
+
+struct irq_routing_table {
+ u32 signature; /* PIRQ_SIGNATURE should be here */
+ u16 version; /* PIRQ_VERSION */
+ u16 size; /* Table size in bytes */
+ u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */
+ u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */
+ u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */
+ u32 miniport_data; /* Crap */
+ u8 rfu[11];
+ u8 checksum; /* Modulo 256 checksum must give zero */
+ struct irq_info slots[0];
+} __attribute__((packed));
+
+extern unsigned int pcibios_irq_mask;
+
+void pcibios_irq_init(void);
+void pcibios_fixup_irqs(void);
+void pcibios_enable_irq(struct pci_dev *dev);
--- /dev/null
+
+/*
+ * pervasive debugger
+ * www.cl.cam.ac.uk/netos/pdb
+ *
+ * alex ho
+ * 2004
+ * university of cambridge computer laboratory
+ *
+ * linux & i386 dependent code. bleech.
+ */
+
+#include <asm/pdb.h>
+
+/* offset to the first instruction in the linux system call code
+ where we can safely set a breakpoint */
+unsigned int pdb_linux_syscall_enter_bkpt_offset = 20;
+
+/* offset to eflags saved on the stack after an int 80 */
+unsigned int pdb_linux_syscall_eflags_offset = 48;
+
+/* offset to the instruction pointer saved on the stack after an int 80 */
+unsigned int pdb_linux_syscall_eip_offset = 40;
+
+unsigned char
+pdb_linux_set_bkpt (unsigned long addr)
+{
+ unsigned char old_instruction = *(unsigned char *)addr;
+ *(unsigned char *)addr = 0xcc;
+ return old_instruction;
+}
+
+void
+pdb_linux_clr_bkpt (unsigned long addr, unsigned char value)
+{
+ *(unsigned char *)addr = value;
+}
+
+void
+pdb_linux_syscall_enter_bkpt (struct pt_regs *regs, long error_code,
+ trap_info_t *ti)
+{
+ /* set at breakpoint at the beginning of the
+ system call in the target domain */
+
+ pdb_system_call_enter_instr = pdb_linux_set_bkpt(ti->address +
+ pdb_linux_syscall_enter_bkpt_offset);
+ pdb_system_call = 1;
+}
+
+void
+pdb_linux_syscall_exit_bkpt (struct pt_regs *regs, struct pdb_context *pdb_ctx)
+{
+ /*
+ we've hit an int 0x80 in a user's program, jumped into xen
+ (traps.c::do_general_protection()) which re-wrote the next
+ instruction in the os kernel to 0xcc, and then hit that
+ exception.
+
+ we need to re-write the return instruction in the user's
+ program so that we know when we have finished the system call
+ and are back in the user's program.
+
+ at this point our stack should look something like this:
+
+ esp = 0x80a59f0
+ esp + 4 = 0x0
+ esp + 8 = 0x80485a0
+ esp + 12 = 0x2d
+ esp + 16 = 0x80485f4
+ esp + 20 = 0xbffffa48
+ esp + 24 = 0xd
+ esp + 28 = 0xc00a0833
+ esp + 32 = 0x833
+ esp + 36 = 0xd
+ esp + 40 = 0x804dcdd saved eip
+ esp + 44 = 0x82b saved cs
+ esp + 48 = 0x213392 saved eflags
+ esp + 52 = 0xbffffa2c saved esp
+ esp + 56 = 0x833 saved ss
+ esp + 60 = 0x1000000
+ */
+
+ /* restore the entry instruction for the system call */
+ pdb_linux_clr_bkpt(regs->eip - 1, pdb_system_call_enter_instr);
+
+ /* save the address of eflags that was saved on the stack */
+ pdb_system_call_eflags_addr = (regs->esp +
+ pdb_linux_syscall_eflags_offset);
+
+ /* muck with the return instruction so that we trap back into the
+ debugger when re-entering user space */
+ pdb_system_call_next_addr = *(unsigned long *)(regs->esp +
+ pdb_linux_syscall_eip_offset);
+ pdb_linux_get_values (&pdb_system_call_leave_instr, 1,
+ pdb_system_call_next_addr,
+ pdb_ctx->process, pdb_ctx->ptbr);
+ pdb_linux_set_values ("cc", 1, pdb_system_call_next_addr,
+ pdb_ctx->process, pdb_ctx->ptbr);
+}
--- /dev/null
+
+/*
+ * pervasive debugger
+ * www.cl.cam.ac.uk/netos/pdb
+ *
+ * alex ho
+ * 2004
+ * university of cambridge computer laboratory
+ *
+ * code adapted originally from kgdb, nemesis, & gdbserver
+ */
+
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <asm/ptrace.h>
+#include <xen/keyhandler.h>
+#include <asm/apic.h>
+#include <asm/domain_page.h> /* [un]map_domain_mem */
+#include <asm/processor.h>
+#include <asm/pdb.h>
+#include <xen/list.h>
+#include <xen/serial.h>
+
+#undef PDB_DEBUG_TRACE
+#ifdef PDB_DEBUG_TRACE
+#define TRC(_x) _x
+#else
+#define TRC(_x)
+#endif
+
+#define DEBUG_EXCEPTION 0x01
+#define BREAKPT_EXCEPTION 0x03
+#define PDB_LIVE_EXCEPTION 0x58
+#define KEYPRESS_EXCEPTION 0x88
+
+#define BUFMAX 400
+
+static const char hexchars[] = "0123456789abcdef";
+
+static int remote_debug;
+
+#define PDB_BUFMAX 1024
+static char pdb_in_buffer[PDB_BUFMAX];
+static char pdb_out_buffer[PDB_BUFMAX];
+static char pdb_buffer[PDB_BUFMAX];
+static int pdb_in_buffer_ptr;
+static unsigned char pdb_in_checksum;
+static unsigned char pdb_xmit_checksum;
+
+struct pdb_context pdb_ctx;
+int pdb_continue_thread = 0;
+int pdb_general_thread = 0;
+
+void pdb_put_packet (unsigned char *buffer, int ack);
+void pdb_bkpt_check (u_char *buffer, int length,
+ unsigned long cr3, unsigned long addr);
+
+int pdb_initialized = 0;
+int pdb_page_fault_possible = 0;
+int pdb_page_fault_scratch = 0; /* just a handy variable */
+int pdb_page_fault = 0;
+static int pdb_serhnd = -1;
+static int pdb_stepping = 0;
+
+int pdb_system_call = 0;
+unsigned char pdb_system_call_enter_instr = 0; /* original enter instr */
+unsigned char pdb_system_call_leave_instr = 0; /* original next instr */
+unsigned long pdb_system_call_next_addr = 0; /* instr after int 0x80 */
+unsigned long pdb_system_call_eflags_addr = 0; /* saved eflags on stack */
+
+static inline void pdb_put_char(unsigned char c)
+{
+ serial_putc(pdb_serhnd, c);
+}
+
+static inline unsigned char pdb_get_char(void)
+{
+ return serial_getc(pdb_serhnd);
+}
+
+int
+get_char (char *addr)
+{
+ return *addr;
+}
+
+void
+set_char (char *addr, int val)
+{
+ *addr = val;
+}
+
+void
+pdb_process_query (char *ptr)
+{
+ if (strcmp(ptr, "C") == 0)
+ {
+ /* empty string */
+ }
+ else if (strcmp(ptr, "fThreadInfo") == 0)
+ {
+#ifdef PDB_PAST
+ struct task_struct *p;
+ u_long flags;
+#endif /* PDB_PAST */
+
+ int buf_idx = 0;
+
+ pdb_out_buffer[buf_idx++] = 'l';
+ pdb_out_buffer[buf_idx++] = 0;
+
+#ifdef PDB_PAST
+ switch (pdb_level)
+ {
+ case PDB_LVL_XEN: /* return a list of domains */
+ {
+ int count = 0;
+
+ read_lock_irqsave (&tasklist_lock, flags);
+
+ pdb_out_buffer[buf_idx++] = 'm';
+ for_each_domain ( p )
+ {
+ domid_t domain = p->domain + PDB_ID_OFFSET;
+
+ if (count > 0)
+ {
+ pdb_out_buffer[buf_idx++] = ',';
+ }
+ if (domain > 15)
+ {
+ pdb_out_buffer[buf_idx++] = hexchars[domain >> 4];
+ }
+ pdb_out_buffer[buf_idx++] = hexchars[domain % 16];
+ count++;
+ }
+ pdb_out_buffer[buf_idx++] = 0;
+
+ read_unlock_irqrestore(&tasklist_lock, flags);
+ break;
+ }
+ case PDB_LVL_GUESTOS: /* return a list of processes */
+ {
+ int foobar[20];
+ int loop, total;
+
+ /* this cr3 is wrong! */
+ total = pdb_linux_process_list(pdb_ctx[pdb_level].info_cr3,
+ foobar, 20);
+
+ pdb_out_buffer[buf_idx++] = 'm';
+ pdb_out_buffer[buf_idx++] = '1'; /* 1 is to go back */
+ for (loop = 0; loop < total; loop++)
+ {
+ int pid = foobar[loop] + PDB_ID_OFFSET;
+
+ pdb_out_buffer[buf_idx++] = ',';
+ if (pid > 15)
+ {
+ pdb_out_buffer[buf_idx++] = hexchars[pid >> 4];
+ }
+ pdb_out_buffer[buf_idx++] = hexchars[pid % 16];
+ }
+ pdb_out_buffer[buf_idx++] = 0;
+ break;
+ }
+ case PDB_LVL_PROCESS: /* hmmm... */
+ {
+ pdb_out_buffer[buf_idx++] = 'm';
+ pdb_out_buffer[buf_idx++] = '1'; /* 1 is to go back */
+ break;
+ }
+ default:
+ break;
+ }
+#endif /* PDB_PAST */
+
+ }
+ else if (strcmp(ptr, "sThreadInfo") == 0)
+ {
+ int buf_idx = 0;
+
+ pdb_out_buffer[buf_idx++] = 'l';
+ pdb_out_buffer[buf_idx++] = 0;
+ }
+ else if (strncmp(ptr, "ThreadExtraInfo,", 16) == 0)
+ {
+ int thread = 0;
+ char *message = "foobar ?";
+
+ ptr += 16;
+ if (hexToInt (&ptr, &thread))
+ {
+ mem2hex (message, pdb_out_buffer, strlen(message) + 1);
+ }
+
+#ifdef PDB_PAST
+ int thread = 0;
+ char message[16];
+ struct task_struct *p;
+
+ p = find_domain_by_id(pdb_ctx[pdb_level].info);
+ strncpy (message, p->name, 16);
+ put_task_struct(p);
+
+ ptr += 16;
+ if (hexToInt (&ptr, &thread))
+ {
+ mem2hex ((char *)message, pdb_out_buffer, strlen(message) + 1);
+ }
+#endif /* PDB_PAST */
+
+#ifdef PDB_FUTURE
+ {
+ char string[task_struct_comm_length];
+
+ string[0] = 0;
+ pdb_linux_process_details (cr3, pid, string);
+ printk (" (%s)", string);
+ }
+#endif /* PDB_FUTURE*/
+
+ }
+ else if (strcmp(ptr, "Offsets") == 0)
+ {
+ /* empty string */
+ }
+ else if (strncmp(ptr, "Symbol", 6) == 0)
+ {
+ strcpy (pdb_out_buffer, "OK");
+ }
+ else
+ {
+ printk("pdb: error, unknown query [%s]\n", ptr);
+ }
+}
+
+void
+pdb_x86_to_gdb_regs (char *buffer, struct pt_regs *regs)
+{
+ int idx = 0;
+
+ mem2hex ((char *)®s->eax, &buffer[idx], sizeof(regs->eax));
+ idx += sizeof(regs->eax) * 2;
+ mem2hex ((char *)®s->ecx, &buffer[idx], sizeof(regs->ecx));
+ idx += sizeof(regs->ecx) * 2;
+ mem2hex ((char *)®s->edx, &buffer[idx], sizeof(regs->edx));
+ idx += sizeof(regs->edx) * 2;
+ mem2hex ((char *)®s->ebx, &buffer[idx], sizeof(regs->ebx));
+ idx += sizeof(regs->ebx) * 2;
+ mem2hex ((char *)®s->esp, &buffer[idx], sizeof(regs->esp));
+ idx += sizeof(regs->esp) * 2;
+ mem2hex ((char *)®s->ebp, &buffer[idx], sizeof(regs->ebp));
+ idx += sizeof(regs->ebp) * 2;
+ mem2hex ((char *)®s->esi, &buffer[idx], sizeof(regs->esi));
+ idx += sizeof(regs->esi) * 2;
+ mem2hex ((char *)®s->edi, &buffer[idx], sizeof(regs->edi));
+ idx += sizeof(regs->edi) * 2;
+ mem2hex ((char *)®s->eip, &buffer[idx], sizeof(regs->eip));
+ idx += sizeof(regs->eip) * 2;
+ mem2hex ((char *)®s->eflags, &buffer[idx], sizeof(regs->eflags));
+ idx += sizeof(regs->eflags) * 2;
+ mem2hex ((char *)®s->xcs, &buffer[idx], sizeof(regs->xcs));
+ idx += sizeof(regs->xcs) * 2;
+ mem2hex ((char *)®s->xss, &buffer[idx], sizeof(regs->xss));
+ idx += sizeof(regs->xss) * 2;
+ mem2hex ((char *)®s->xds, &buffer[idx], sizeof(regs->xds));
+ idx += sizeof(regs->xds) * 2;
+ mem2hex ((char *)®s->xes, &buffer[idx], sizeof(regs->xes));
+ idx += sizeof(regs->xes) * 2;
+ mem2hex ((char *)®s->xfs, &buffer[idx], sizeof(regs->xfs));
+ idx += sizeof(regs->xfs) * 2;
+ mem2hex ((char *)®s->xgs, &buffer[idx], sizeof(regs->xgs));
+}
+
+/* at this point we allow any register to be changed, caveat emptor */
+void
+pdb_gdb_to_x86_regs (struct pt_regs *regs, char *buffer)
+{
+ hex2mem(buffer, (char *)®s->eax, sizeof(regs->eax));
+ buffer += sizeof(regs->eax) * 2;
+ hex2mem(buffer, (char *)®s->ecx, sizeof(regs->ecx));
+ buffer += sizeof(regs->ecx) * 2;
+ hex2mem(buffer, (char *)®s->edx, sizeof(regs->edx));
+ buffer += sizeof(regs->edx) * 2;
+ hex2mem(buffer, (char *)®s->ebx, sizeof(regs->ebx));
+ buffer += sizeof(regs->ebx) * 2;
+ hex2mem(buffer, (char *)®s->esp, sizeof(regs->esp));
+ buffer += sizeof(regs->esp) * 2;
+ hex2mem(buffer, (char *)®s->ebp, sizeof(regs->ebp));
+ buffer += sizeof(regs->ebp) * 2;
+ hex2mem(buffer, (char *)®s->esi, sizeof(regs->esi));
+ buffer += sizeof(regs->esi) * 2;
+ hex2mem(buffer, (char *)®s->edi, sizeof(regs->edi));
+ buffer += sizeof(regs->edi) * 2;
+ hex2mem(buffer, (char *)®s->eip, sizeof(regs->eip));
+ buffer += sizeof(regs->eip) * 2;
+ hex2mem(buffer, (char *)®s->eflags, sizeof(regs->eflags));
+ buffer += sizeof(regs->eflags) * 2;
+ hex2mem(buffer, (char *)®s->xcs, sizeof(regs->xcs));
+ buffer += sizeof(regs->xcs) * 2;
+ hex2mem(buffer, (char *)®s->xss, sizeof(regs->xss));
+ buffer += sizeof(regs->xss) * 2;
+ hex2mem(buffer, (char *)®s->xds, sizeof(regs->xds));
+ buffer += sizeof(regs->xds) * 2;
+ hex2mem(buffer, (char *)®s->xes, sizeof(regs->xes));
+ buffer += sizeof(regs->xes) * 2;
+ hex2mem(buffer, (char *)®s->xfs, sizeof(regs->xfs));
+ buffer += sizeof(regs->xfs) * 2;
+ hex2mem(buffer, (char *)®s->xgs, sizeof(regs->xgs));
+}
+
+int
+pdb_process_command (char *ptr, struct pt_regs *regs, unsigned long cr3,
+ int sigval)
+{
+ int length;
+ unsigned long addr;
+ int ack = 1; /* wait for ack in pdb_put_packet */
+ int go = 0;
+
+ TRC(printf("pdb: [%s]\n", ptr));
+
+ pdb_out_buffer[0] = 0;
+
+ if (pdb_ctx.valid == 1)
+ {
+ if (pdb_ctx.domain == -1) /* pdb context: xen */
+ {
+ struct task_struct *p;
+
+ p = &idle0_task;
+ if (p->mm.shadow_mode)
+ pdb_ctx.ptbr = pagetable_val(p->mm.shadow_table);
+ else
+ pdb_ctx.ptbr = pagetable_val(p->mm.pagetable);
+ }
+ else if (pdb_ctx.process == -1) /* pdb context: guest os */
+ {
+ struct task_struct *p;
+
+ if (pdb_ctx.domain == -2)
+ {
+ p = find_last_domain();
+ }
+ else
+ {
+ p = find_domain_by_id(pdb_ctx.domain);
+ }
+ if (p == NULL)
+ {
+ printk ("pdb error: unknown domain [0x%x]\n", pdb_ctx.domain);
+ strcpy (pdb_out_buffer, "E01");
+ pdb_ctx.domain = -1;
+ goto exit;
+ }
+ if (p->mm.shadow_mode)
+ pdb_ctx.ptbr = pagetable_val(p->mm.shadow_table);
+ else
+ pdb_ctx.ptbr = pagetable_val(p->mm.pagetable);
+ put_task_struct(p);
+ }
+ else /* pdb context: process */
+ {
+ struct task_struct *p;
+ unsigned long domain_ptbr;
+
+ p = find_domain_by_id(pdb_ctx.domain);
+ if (p == NULL)
+ {
+ printk ("pdb error: unknown domain [0x%x][0x%x]\n",
+ pdb_ctx.domain, pdb_ctx.process);
+ strcpy (pdb_out_buffer, "E01");
+ pdb_ctx.domain = -1;
+ goto exit;
+ }
+ if (p->mm.shadow_mode)
+ domain_ptbr = pagetable_val(p->mm.shadow_table);
+ else
+ domain_ptbr = pagetable_val(p->mm.pagetable);
+ put_task_struct(p);
+
+ pdb_ctx.ptbr = domain_ptbr;
+ /*pdb_ctx.ptbr=pdb_linux_pid_ptbr(domain_ptbr, pdb_ctx.process);*/
+ }
+
+ pdb_ctx.valid = 0;
+ TRC(printk ("pdb change context (dom:%d, proc:%d) now 0x%lx\n",
+ pdb_ctx.domain, pdb_ctx.process, pdb_ctx.ptbr));
+ }
+
+ switch (*ptr++)
+ {
+ case '?':
+ pdb_out_buffer[0] = 'S';
+ pdb_out_buffer[1] = hexchars[sigval >> 4];
+ pdb_out_buffer[2] = hexchars[sigval % 16];
+ pdb_out_buffer[3] = 0;
+ break;
+ case 'S': /* step with signal */
+ case 's': /* step */
+ {
+ if ( pdb_system_call_eflags_addr != 0 )
+ {
+ unsigned long eflags;
+ char eflags_buf[sizeof(eflags)*2]; /* STUPID STUPID STUPID */
+
+ pdb_linux_get_values((u_char*)&eflags, sizeof(eflags),
+ pdb_system_call_eflags_addr,
+ pdb_ctx.process, pdb_ctx.ptbr);
+ eflags |= X86_EFLAGS_TF;
+ mem2hex ((u_char *)&eflags, eflags_buf, sizeof(eflags));
+ pdb_linux_set_values(eflags_buf, sizeof(eflags),
+ pdb_system_call_eflags_addr,
+ pdb_ctx.process, pdb_ctx.ptbr);
+ }
+
+ regs->eflags |= X86_EFLAGS_TF;
+ pdb_stepping = 1;
+ return 1;
+ /* not reached */
+ }
+ case 'C': /* continue with signal */
+ case 'c': /* continue */
+ {
+ if ( pdb_system_call_eflags_addr != 0 )
+ {
+ unsigned long eflags;
+ char eflags_buf[sizeof(eflags)*2]; /* STUPID STUPID STUPID */
+
+ pdb_linux_get_values((u_char*)&eflags, sizeof(eflags),
+ pdb_system_call_eflags_addr,
+ pdb_ctx.process, pdb_ctx.ptbr);
+ eflags &= ~X86_EFLAGS_TF;
+ mem2hex ((u_char *)&eflags, eflags_buf, sizeof(eflags));
+ pdb_linux_set_values(eflags_buf, sizeof(eflags),
+ pdb_system_call_eflags_addr,
+ pdb_ctx.process, pdb_ctx.ptbr);
+ }
+
+ regs->eflags &= ~X86_EFLAGS_TF;
+ return 1; /* jump out before replying to gdb */
+ /* not reached */
+ }
+ case 'd':
+ remote_debug = !(remote_debug); /* toggle debug flag */
+ break;
+ case 'D': /* detach */
+ return go;
+ /* not reached */
+ case 'g': /* return the value of the CPU registers */
+ {
+ pdb_x86_to_gdb_regs (pdb_out_buffer, regs);
+ break;
+ }
+ case 'G': /* set the value of the CPU registers - return OK */
+ {
+ pdb_gdb_to_x86_regs (regs, ptr);
+ break;
+ }
+ case 'H':
+ {
+ int thread;
+ char *next = &ptr[1];
+
+ if (hexToInt (&next, &thread))
+ {
+ if (*ptr == 'c')
+ {
+ pdb_continue_thread = thread;
+ }
+ else if (*ptr == 'g')
+ {
+ pdb_general_thread = thread;
+ }
+ else
+ {
+ printk ("pdb error: unknown set thread command %c (%d)\n",
+ *ptr, thread);
+ strcpy (pdb_out_buffer, "E00");
+ break;
+ }
+ }
+ strcpy (pdb_out_buffer, "OK");
+ break;
+ }
+ case 'k': /* kill request */
+ {
+ strcpy (pdb_out_buffer, "OK"); /* ack for fun */
+ printk ("don't kill bill...\n");
+ ack = 0;
+ break;
+ }
+
+ case 'q':
+ {
+ pdb_process_query(ptr);
+ break;
+ }
+
+ /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */
+ case 'm':
+ {
+ /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */
+ if (hexToInt (&ptr, (int *)&addr))
+ if (*(ptr++) == ',')
+ if (hexToInt (&ptr, &length))
+ {
+ ptr = 0;
+
+ pdb_page_fault_possible = 1;
+ pdb_page_fault = 0;
+ if (addr >= PAGE_OFFSET)
+ {
+ mem2hex ((char *) addr, pdb_out_buffer, length);
+ }
+ else if (pdb_ctx.process != -1)
+ {
+ pdb_linux_get_values(pdb_buffer, length, addr,
+ pdb_ctx.process, pdb_ctx.ptbr);
+ mem2hex (pdb_buffer, pdb_out_buffer, length);
+ }
+ else
+ {
+ pdb_get_values (pdb_buffer, length,
+ pdb_ctx.ptbr, addr);
+ mem2hex (pdb_buffer, pdb_out_buffer, length);
+ }
+
+ pdb_page_fault_possible = 0;
+ if (pdb_page_fault)
+ {
+ strcpy (pdb_out_buffer, "E03");
+ }
+ }
+
+ if (ptr)
+ {
+ strcpy (pdb_out_buffer, "E01");
+ }
+ break;
+ }
+
+ /* MAA..AA,LLLL: Write LLLL bytes at address AA.AA return OK */
+ case 'M':
+ {
+ /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */
+ if (hexToInt (&ptr, (int *)&addr))
+ if (*(ptr++) == ',')
+ if (hexToInt (&ptr, &length))
+ if (*(ptr++) == ':')
+ {
+
+ pdb_page_fault_possible = 1;
+ pdb_page_fault = 0;
+ if (addr >= PAGE_OFFSET)
+ {
+ hex2mem (ptr, (char *)addr, length);
+ pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr);
+ }
+ else if (pdb_ctx.process != -1)
+ {
+ pdb_linux_set_values(ptr, length, addr,
+ pdb_ctx.process,
+ pdb_ctx.ptbr);
+ pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr);
+ }
+ else
+ {
+ pdb_set_values (ptr, length,
+ pdb_ctx.ptbr, addr);
+ pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr);
+ }
+ pdb_page_fault_possible = 0;
+ if (pdb_page_fault)
+ {
+ strcpy (pdb_out_buffer, "E03");
+ }
+ else
+ {
+ strcpy (pdb_out_buffer, "OK");
+ }
+
+ ptr = 0;
+ }
+ if (ptr)
+ {
+ strcpy (pdb_out_buffer, "E02");
+ }
+ break;
+ }
+ case 'T':
+ {
+ int id;
+
+ if (hexToInt (&ptr, &id))
+ {
+ strcpy (pdb_out_buffer, "E00");
+
+#ifdef PDB_PAST
+
+ switch (pdb_level) /* previous level */
+ {
+ case PDB_LVL_XEN:
+ {
+ struct task_struct *p;
+ id -= PDB_ID_OFFSET;
+ if ( (p = find_domain_by_id(id)) == NULL)
+ strcpy (pdb_out_buffer, "E00");
+ else
+ strcpy (pdb_out_buffer, "OK");
+ put_task_struct(p);
+
+ pdb_level = PDB_LVL_GUESTOS;
+ pdb_ctx[pdb_level].ctrl = id;
+ pdb_ctx[pdb_level].info = id;
+ break;
+ }
+ case PDB_LVL_GUESTOS:
+ {
+ if (pdb_level == -1)
+ {
+ pdb_level = PDB_LVL_XEN;
+ }
+ else
+ {
+ pdb_level = PDB_LVL_PROCESS;
+ pdb_ctx[pdb_level].ctrl = id;
+ pdb_ctx[pdb_level].info = id;
+ }
+ break;
+ }
+ case PDB_LVL_PROCESS:
+ {
+ if (pdb_level == -1)
+ {
+ pdb_level = PDB_LVL_GUESTOS;
+ }
+ break;
+ }
+ default:
+ {
+ printk ("pdb internal error: invalid level [%d]\n",
+ pdb_level);
+ }
+ }
+
+#endif /* PDB_PAST */
+ }
+ break;
+ }
+ }
+
+exit:
+ /* reply to the request */
+ pdb_put_packet (pdb_out_buffer, ack);
+
+ return go;
+}
+
+/*
+ * process an input character from the serial line.
+ *
+ * return "1" if the character is a gdb debug string
+ * (and hence shouldn't be further processed).
+ */
+
+int pdb_debug_state = 0; /* small parser state machine */
+
+int pdb_serial_input(u_char c, struct pt_regs *regs)
+{
+ int out = 1;
+ int loop, count;
+ unsigned long cr3;
+
+ __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : );
+
+ switch (pdb_debug_state)
+ {
+ case 0: /* not currently processing debug string */
+ if ( c == '$' ) /* start token */
+ {
+ pdb_debug_state = 1;
+ pdb_in_buffer_ptr = 0;
+ pdb_in_checksum = 0;
+ pdb_xmit_checksum = 0;
+ }
+ else
+ {
+ out = 0;
+ }
+ break;
+ case 1: /* saw '$' */
+ if ( c == '#' ) /* checksum token */
+ {
+ pdb_debug_state = 2;
+ pdb_in_buffer[pdb_in_buffer_ptr] = 0;
+ }
+ else
+ {
+ pdb_in_checksum += c;
+ pdb_in_buffer[pdb_in_buffer_ptr++] = c;
+ }
+ break;
+ case 2: /* 1st checksum digit */
+ pdb_xmit_checksum = hex(c) << 4;
+ pdb_debug_state = 3;
+ break;
+ case 3: /* 2nd checksum digit */
+ pdb_xmit_checksum += hex(c);
+ if (pdb_in_checksum != pdb_xmit_checksum)
+ {
+ pdb_put_char('-'); /* checksum failure */
+ printk ("checksum failure [%s.%02x.%02x]\n", pdb_in_buffer,
+ pdb_in_checksum, pdb_xmit_checksum);
+ }
+ else
+ {
+ pdb_put_char('+'); /* checksum okay */
+ if ( pdb_in_buffer_ptr > 1 && pdb_in_buffer[2] == ':' )
+ {
+ pdb_put_char(pdb_in_buffer[0]);
+ pdb_put_char(pdb_in_buffer[1]);
+ /* remove sequence chars from buffer */
+ count = strlen(pdb_in_buffer);
+ for (loop = 3; loop < count; loop++)
+ pdb_in_buffer[loop - 3] = pdb_in_buffer[loop];
+ }
+
+ pdb_process_command (pdb_in_buffer, regs, cr3,
+ PDB_LIVE_EXCEPTION);
+ }
+ pdb_debug_state = 0;
+ break;
+ }
+
+ return out;
+}
+
+int hex(char ch)
+{
+ if ((ch >= 'a') && (ch <= 'f')) return (ch-'a'+10);
+ if ((ch >= '0') && (ch <= '9')) return (ch-'0');
+ if ((ch >= 'A') && (ch <= 'F')) return (ch-'A'+10);
+ return (-1);
+}
+
+/* convert the memory pointed to by mem into hex, placing result in buf */
+/* return a pointer to the last char put in buf (null) */
+char *
+mem2hex (mem, buf, count)
+ char *mem;
+ char *buf;
+ int count;
+{
+ int i;
+ unsigned char ch;
+
+ for (i = 0; i < count; i++)
+ {
+ ch = get_char (mem++);
+ *buf++ = hexchars[ch >> 4];
+ *buf++ = hexchars[ch % 16];
+ }
+ *buf = 0;
+ return (buf);
+}
+
+/* convert the hex array pointed to by buf into binary to be placed in mem */
+/* return a pointer to the character AFTER the last byte written */
+char *
+hex2mem (buf, mem, count)
+ char *buf;
+ char *mem;
+ int count;
+{
+ int i;
+ unsigned char ch;
+
+ for (i = 0; i < count; i++)
+ {
+ ch = hex (*buf++) << 4;
+ ch = ch + hex (*buf++);
+ set_char (mem++, ch);
+ }
+ return (mem);
+}
+
+int
+hexToInt (char **ptr, int *intValue)
+{
+ int numChars = 0;
+ int hexValue;
+ int negative = 0;
+
+ *intValue = 0;
+
+ if (**ptr == '-')
+ {
+ negative = 1;
+ numChars++;
+ (*ptr)++;
+ }
+
+ while (**ptr)
+ {
+ hexValue = hex (**ptr);
+ if (hexValue >= 0)
+ {
+ *intValue = (*intValue << 4) | hexValue;
+ numChars++;
+ }
+ else
+ break;
+
+ (*ptr)++;
+ }
+
+ if ( negative )
+ *intValue *= -1;
+
+ return (numChars);
+}
+
+/***********************************************************************/
+/***********************************************************************/
+
+
+/*
+ * Add a breakpoint to the list of known breakpoints.
+ * For now there should only be two or three breakpoints so
+ * we use a simple linked list. In the future, maybe a red-black tree?
+ */
+struct pdb_breakpoint breakpoints;
+
+void pdb_bkpt_add (unsigned long cr3, unsigned long address)
+{
+ struct pdb_breakpoint *bkpt = kmalloc(sizeof(*bkpt), GFP_KERNEL);
+ bkpt->cr3 = cr3;
+ bkpt->address = address;
+ list_add(&bkpt->list, &breakpoints.list);
+}
+
+/*
+ * Check to see of the breakpoint is in the list of known breakpoints
+ * Return 1 if it has been set, NULL otherwise.
+ */
+struct pdb_breakpoint* pdb_bkpt_search (unsigned long cr3,
+ unsigned long address)
+{
+ struct list_head *list_entry;
+ struct pdb_breakpoint *bkpt;
+
+ list_for_each(list_entry, &breakpoints.list)
+ {
+ bkpt = list_entry(list_entry, struct pdb_breakpoint, list);
+ if ( bkpt->cr3 == cr3 && bkpt->address == address )
+ return bkpt;
+ }
+
+ return NULL;
+}
+
+/*
+ * Remove a breakpoint to the list of known breakpoints.
+ * Return 1 if the element was not found, otherwise 0.
+ */
+int pdb_bkpt_remove (unsigned long cr3, unsigned long address)
+{
+ struct list_head *list_entry;
+ struct pdb_breakpoint *bkpt;
+
+ list_for_each(list_entry, &breakpoints.list)
+ {
+ bkpt = list_entry(list_entry, struct pdb_breakpoint, list);
+ if ( bkpt->cr3 == cr3 && bkpt->address == address )
+ {
+ list_del(&bkpt->list);
+ kfree(bkpt);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * Check to see if a memory write is really gdb setting a breakpoint
+ */
+void pdb_bkpt_check (u_char *buffer, int length,
+ unsigned long cr3, unsigned long addr)
+{
+ if (length == 1 && buffer[0] == 'c' && buffer[1] == 'c')
+ {
+ /* inserting a new breakpoint */
+ pdb_bkpt_add(cr3, addr);
+ TRC(printk("pdb breakpoint detected at 0x%lx:0x%lx\n", cr3, addr));
+ }
+ else if ( pdb_bkpt_remove(cr3, addr) == 0 )
+ {
+ /* removing a breakpoint */
+ TRC(printk("pdb breakpoint cleared at 0x%lx:0x%lx\n", cr3, addr));
+ }
+}
+
+/***********************************************************************/
+
+int pdb_change_values(u_char *buffer, int length,
+ unsigned long cr3, unsigned long addr, int rw);
+int pdb_change_values_one_page(u_char *buffer, int length,
+ unsigned long cr3, unsigned long addr, int rw);
+
+#define __PDB_GET_VAL 1
+#define __PDB_SET_VAL 2
+
+/*
+ * Set memory in a domain's address space
+ * Set "length" bytes at "address" from "domain" to the values in "buffer".
+ * Return the number of bytes set, 0 if there was a problem.
+ */
+
+int pdb_set_values(u_char *buffer, int length,
+ unsigned long cr3, unsigned long addr)
+{
+ int count = pdb_change_values(buffer, length, cr3, addr, __PDB_SET_VAL);
+ return count;
+}
+
+/*
+ * Read memory from a domain's address space.
+ * Fetch "length" bytes at "address" from "domain" into "buffer".
+ * Return the number of bytes read, 0 if there was a problem.
+ */
+
+int pdb_get_values(u_char *buffer, int length,
+ unsigned long cr3, unsigned long addr)
+{
+ return pdb_change_values(buffer, length, cr3, addr, __PDB_GET_VAL);
+}
+
+/*
+ * Read or write memory in an address space
+ */
+int pdb_change_values(u_char *buffer, int length,
+ unsigned long cr3, unsigned long addr, int rw)
+{
+ int remaining; /* number of bytes to touch past this page */
+ int bytes = 0;
+
+ while ( (remaining = (addr + length - 1) - (addr | (PAGE_SIZE - 1))) > 0)
+ {
+ bytes += pdb_change_values_one_page(buffer, length - remaining,
+ cr3, addr, rw);
+ buffer = buffer + (2 * (length - remaining));
+ length = remaining;
+ addr = (addr | (PAGE_SIZE - 1)) + 1;
+ }
+
+ bytes += pdb_change_values_one_page(buffer, length, cr3, addr, rw);
+ return bytes;
+}
+
+/*
+ * Change memory in a process' address space in one page
+ * Read or write "length" bytes at "address" into/from "buffer"
+ * from the virtual address space referenced by "cr3".
+ * Return the number of bytes read, 0 if there was a problem.
+ */
+
+int pdb_change_values_one_page(u_char *buffer, int length,
+ unsigned long cr3, unsigned long addr, int rw)
+{
+ l2_pgentry_t* l2_table = NULL;
+ l1_pgentry_t* l1_table = NULL;
+ u_char *page;
+ int bytes = 0;
+
+ l2_table = map_domain_mem(cr3);
+ l2_table += l2_table_offset(addr);
+ if (!(l2_pgentry_val(*l2_table) & _PAGE_PRESENT))
+ {
+ if (pdb_page_fault_possible == 1)
+ {
+ pdb_page_fault = 1;
+ TRC(printk("pdb: L2 error (0x%lx)\n", addr));
+ }
+ else
+ {
+ struct task_struct *p = find_domain_by_id(0);
+ printk ("pdb error: cr3: 0x%lx dom0cr3: 0x%lx\n", cr3,
+ p->mm.shadow_mode ? pagetable_val(p->mm.shadow_table)
+ : pagetable_val(p->mm.pagetable));
+ put_task_struct(p);
+ printk ("pdb error: L2:0x%p (0x%lx)\n",
+ l2_table, l2_pgentry_val(*l2_table));
+ }
+ goto exit2;
+ }
+
+ if (l2_pgentry_val(*l2_table) & _PAGE_PSE)
+ {
+#define PSE_PAGE_SHIFT L2_PAGETABLE_SHIFT
+#define PSE_PAGE_SIZE (1UL << PSE_PAGE_SHIFT)
+#define PSE_PAGE_MASK (~(PSE_PAGE_SIZE-1))
+
+#define L1_PAGE_BITS ( (ENTRIES_PER_L1_PAGETABLE - 1) << L1_PAGETABLE_SHIFT )
+
+#define pse_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PSE_PAGE_MASK)
+
+ page = map_domain_mem(pse_pgentry_to_phys(*l2_table) + /* 10 bits */
+ (addr & L1_PAGE_BITS)); /* 10 bits */
+ page += addr & (PAGE_SIZE - 1); /* 12 bits */
+ }
+ else
+ {
+ l1_table = map_domain_mem(l2_pgentry_to_phys(*l2_table));
+ l1_table += l1_table_offset(addr);
+ if (!(l1_pgentry_val(*l1_table) & _PAGE_PRESENT))
+ {
+ if (pdb_page_fault_possible == 1)
+ {
+ pdb_page_fault = 1;
+ TRC(printk ("pdb: L1 error (0x%lx)\n", addr));
+ }
+ else
+ {
+ printk ("L2:0x%p (0x%lx) L1:0x%p (0x%lx)\n",
+ l2_table, l2_pgentry_val(*l2_table),
+ l1_table, l1_pgentry_val(*l1_table));
+ }
+ goto exit1;
+ }
+
+ page = map_domain_mem(l1_pgentry_to_phys(*l1_table));
+ page += addr & (PAGE_SIZE - 1);
+ }
+
+ switch (rw)
+ {
+ case __PDB_GET_VAL: /* read */
+ memcpy (buffer, page, length);
+ bytes = length;
+ break;
+ case __PDB_SET_VAL: /* write */
+ hex2mem (buffer, page, length);
+ bytes = length;
+ break;
+ default: /* unknown */
+ printk ("error: unknown RW flag: %d\n", rw);
+ return 0;
+ }
+
+ unmap_domain_mem((void *)page);
+exit1:
+ if (l1_table != NULL)
+ unmap_domain_mem((void *)l1_table);
+exit2:
+ unmap_domain_mem((void *)l2_table);
+
+ return bytes;
+}
+
+/***********************************************************************/
+
+void breakpoint(void);
+
+/* send the packet in buffer. */
+void pdb_put_packet (unsigned char *buffer, int ack)
+{
+ unsigned char checksum;
+ int count;
+ char ch;
+
+ /* $<packet info>#<checksum> */
+ /* do */
+ {
+ pdb_put_char ('$');
+ checksum = 0;
+ count = 0;
+
+ while ((ch = buffer[count]))
+ {
+ pdb_put_char (ch);
+ checksum += ch;
+ count += 1;
+ }
+
+ pdb_put_char('#');
+ pdb_put_char(hexchars[checksum >> 4]);
+ pdb_put_char(hexchars[checksum % 16]);
+ }
+
+ if (ack)
+ {
+ if ((ch = pdb_get_char()) != '+')
+ {
+ printk(" pdb return error: %c 0x%x [%s]\n", ch, ch, buffer);
+ }
+ }
+}
+
+void pdb_get_packet(char *buffer)
+{
+ int count;
+ char ch;
+ unsigned char checksum = 0;
+ unsigned char xmitcsum = 0;
+
+ do
+ {
+ while ((ch = pdb_get_char()) != '$');
+
+ count = 0;
+ checksum = 0;
+
+ while (count < BUFMAX)
+ {
+ ch = pdb_get_char();
+ if (ch == '#') break;
+ checksum += ch;
+ buffer[count] = ch;
+ count++;
+ }
+ buffer[count] = 0;
+
+ if (ch == '#')
+ {
+ xmitcsum = hex(pdb_get_char()) << 4;
+ xmitcsum += hex(pdb_get_char());
+
+ if (xmitcsum == checksum)
+ {
+ pdb_put_char('+');
+ if (buffer[2] == ':')
+ {
+ printk ("pdb: obsolete gdb packet (sequence ID)\n");
+ }
+ }
+ else
+ {
+ pdb_put_char('-');
+ }
+ }
+ } while (checksum != xmitcsum);
+
+ return;
+}
+
+/*
+ * process a machine interrupt or exception
+ * Return 1 if pdb is not interested in the exception; it should
+ * be propagated to the guest os.
+ */
+
+int pdb_handle_exception(int exceptionVector,
+ struct pt_regs *xen_regs)
+{
+ int signal = 0;
+ struct pdb_breakpoint* bkpt;
+ int watchdog_save;
+ unsigned long cr3;
+
+ __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : );
+
+ /* If the exception is an int3 from user space then pdb is only
+ interested if it re-wrote an instruction set the breakpoint.
+ This occurs when leaving a system call from a domain.
+ */
+ if ( exceptionVector == 3 &&
+ (xen_regs->xcs & 3) == 3 &&
+ xen_regs->eip != pdb_system_call_next_addr + 1)
+ {
+ TRC(printf("pdb: user bkpt (0x%x) at 0x%x:0x%lx:0x%lx\n",
+ exceptionVector, xen_regs->xcs & 3, cr3, xen_regs->eip));
+ return 1;
+ }
+
+ /*
+ * If PDB didn't set the breakpoint, is not single stepping,
+ * is not entering a system call in a domain,
+ * the user didn't press the magic debug key,
+ * then we don't handle the exception.
+ */
+ bkpt = pdb_bkpt_search(cr3, xen_regs->eip - 1);
+ if ( (bkpt == NULL) &&
+ !pdb_stepping &&
+ !pdb_system_call &&
+ xen_regs->eip != pdb_system_call_next_addr + 1 &&
+ (exceptionVector != KEYPRESS_EXCEPTION) &&
+ xen_regs->eip < 0xc0000000) /* Linux-specific for now! */
+ {
+ TRC(printf("pdb: user bkpt (0x%x) at 0x%lx:0x%lx\n",
+ exceptionVector, cr3, xen_regs->eip));
+ return 1;
+ }
+
+ printk("pdb_handle_exception [0x%x][0x%lx:0x%lx]\n",
+ exceptionVector, cr3, xen_regs->eip);
+
+ if ( pdb_stepping )
+ {
+ /* Stepped one instruction; now return to normal execution. */
+ xen_regs->eflags &= ~X86_EFLAGS_TF;
+ pdb_stepping = 0;
+ }
+
+ if ( pdb_system_call )
+ {
+ pdb_system_call = 0;
+
+ pdb_linux_syscall_exit_bkpt (xen_regs, &pdb_ctx);
+
+ /* we don't have a saved breakpoint so we need to rewind eip */
+ xen_regs->eip--;
+
+ /* if ther user doesn't care about breaking when entering a
+ system call then we'll just ignore the exception */
+ if ( (pdb_ctx.system_call & 0x01) == 0 )
+ {
+ return 0;
+ }
+ }
+
+ if ( exceptionVector == BREAKPT_EXCEPTION && bkpt != NULL)
+ {
+ /* Executed Int3: replace breakpoint byte with real program byte. */
+ xen_regs->eip--;
+ }
+
+ /* returning to user space after a system call */
+ if ( xen_regs->eip == pdb_system_call_next_addr + 1)
+ {
+ u_char instr[2]; /* REALLY REALLY REALLY STUPID */
+
+ mem2hex (&pdb_system_call_leave_instr, instr, sizeof(instr));
+
+ pdb_linux_set_values (instr, 1, pdb_system_call_next_addr,
+ pdb_ctx.process, pdb_ctx.ptbr);
+
+ pdb_system_call_next_addr = 0;
+ pdb_system_call_leave_instr = 0;
+
+ /* manually rewind eip */
+ xen_regs->eip--;
+
+ /* if the user doesn't care about breaking when returning
+ to user space after a system call then we'll just ignore
+ the exception */
+ if ( (pdb_ctx.system_call & 0x02) == 0 )
+ {
+ return 0;
+ }
+ }
+
+ /* Generate a signal for GDB. */
+ switch ( exceptionVector )
+ {
+ case KEYPRESS_EXCEPTION:
+ signal = 2; break; /* SIGINT */
+ case DEBUG_EXCEPTION:
+ signal = 5; break; /* SIGTRAP */
+ case BREAKPT_EXCEPTION:
+ signal = 5; break; /* SIGTRAP */
+ default:
+ printk("pdb: can't generate signal for unknown exception vector %d\n",
+ exceptionVector);
+ break;
+ }
+
+ pdb_out_buffer[0] = 'S';
+ pdb_out_buffer[1] = hexchars[signal >> 4];
+ pdb_out_buffer[2] = hexchars[signal % 16];
+ pdb_out_buffer[3] = 0;
+ pdb_put_packet(pdb_out_buffer, 1);
+
+ watchdog_save = watchdog_on;
+ watchdog_on = 0;
+
+ do {
+ pdb_out_buffer[0] = 0;
+ pdb_get_packet(pdb_in_buffer);
+ }
+ while ( pdb_process_command(pdb_in_buffer, xen_regs, cr3, signal) == 0 );
+
+ watchdog_on = watchdog_save;
+
+ return 0;
+}
+
+void pdb_key_pressed(u_char key, void *dev_id, struct pt_regs *regs)
+{
+ pdb_handle_exception(KEYPRESS_EXCEPTION, regs);
+ return;
+}
+
+void initialize_pdb()
+{
+ extern char opt_pdb[];
+
+ /* Certain state must be initialised even when PDB will not be used. */
+ memset((void *) &breakpoints, 0, sizeof(breakpoints));
+ INIT_LIST_HEAD(&breakpoints.list);
+ pdb_stepping = 0;
+
+ if ( strcmp(opt_pdb, "none") == 0 )
+ return;
+
+ if ( (pdb_serhnd = parse_serial_handle(opt_pdb)) == -1 )
+ {
+ printk("error: failed to initialize PDB on port %s\n", opt_pdb);
+ return;
+ }
+
+ pdb_ctx.valid = 1;
+ pdb_ctx.domain = -1;
+ pdb_ctx.process = -1;
+ pdb_ctx.system_call = 0;
+ pdb_ctx.ptbr = 0;
+
+ printk("pdb: pervasive debugger (%s) www.cl.cam.ac.uk/netos/pdb\n",
+ opt_pdb);
+
+ /* Acknowledge any spurious GDB packets. */
+ pdb_put_char('+');
+
+ add_key_handler('D', pdb_key_pressed, "enter pervasive debugger");
+
+ pdb_initialized = 1;
+}
+
+void breakpoint(void)
+{
+ if ( pdb_initialized )
+ asm("int $3");
+}
--- /dev/null
+/*
+ * linux/arch/i386/kernel/process.c
+ *
+ * Copyright (C) 1995 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+#define __KERNEL_SYSCALLS__
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/smp.h>
+#include <asm/ptrace.h>
+#include <xen/delay.h>
+#include <xen/interrupt.h>
+#include <asm/mc146818rtc.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+#include <asm/i387.h>
+#include <asm/mpspec.h>
+#include <asm/ldt.h>
+#include <xen/irq.h>
+#include <xen/event.h>
+#include <xen/shadow.h>
+
+int hlt_counter;
+
+void disable_hlt(void)
+{
+ hlt_counter++;
+}
+
+void enable_hlt(void)
+{
+ hlt_counter--;
+}
+
+/*
+ * We use this if we don't have any better
+ * idle routine..
+ */
+static void default_idle(void)
+{
+ if (!hlt_counter) {
+ __cli();
+ if (!current->hyp_events && !softirq_pending(smp_processor_id()))
+ safe_halt();
+ else
+ __sti();
+ }
+}
+
+void continue_cpu_idle_loop(void)
+{
+ int cpu = smp_processor_id();
+ for ( ; ; )
+ {
+ irq_stat[cpu].idle_timestamp = jiffies;
+ while (!current->hyp_events && !softirq_pending(cpu))
+ default_idle();
+ do_hyp_events();
+ do_softirq();
+ }
+}
+
+void startup_cpu_idle_loop(void)
+{
+ /* Just some sanity to ensure that the scheduler is set up okay. */
+ ASSERT(current->domain == IDLE_DOMAIN_ID);
+ (void)wake_up(current);
+ __enter_scheduler();
+
+ /*
+ * Declares CPU setup done to the boot processor.
+ * Therefore memory barrier to ensure state is visible.
+ */
+ smp_mb();
+ init_idle();
+
+ continue_cpu_idle_loop();
+}
+
+static long no_idt[2];
+static int reboot_mode;
+int reboot_thru_bios = 0;
+
+#ifdef CONFIG_SMP
+int reboot_smp = 0;
+static int reboot_cpu = -1;
+/* shamelessly grabbed from lib/vsprintf.c for readability */
+#define is_digit(c) ((c) >= '0' && (c) <= '9')
+#endif
+
+
+static inline void kb_wait(void)
+{
+ int i;
+
+ for (i=0; i<0x10000; i++)
+ if ((inb_p(0x64) & 0x02) == 0)
+ break;
+}
+
+
+void machine_restart(char * __unused)
+{
+ extern int opt_noreboot;
+#ifdef CONFIG_SMP
+ int cpuid;
+#endif
+
+ if ( opt_noreboot )
+ {
+ printk("Reboot disabled on cmdline: require manual reset\n");
+ for ( ; ; ) __asm__ __volatile__ ("hlt");
+ }
+
+#ifdef CONFIG_SMP
+ cpuid = GET_APIC_ID(apic_read(APIC_ID));
+
+ /* KAF: Need interrupts enabled for safe IPI. */
+ __sti();
+
+ if (reboot_smp) {
+
+ /* check to see if reboot_cpu is valid
+ if its not, default to the BSP */
+ if ((reboot_cpu == -1) ||
+ (reboot_cpu > (NR_CPUS -1)) ||
+ !(phys_cpu_present_map & (1<<cpuid)))
+ reboot_cpu = boot_cpu_physical_apicid;
+
+ reboot_smp = 0; /* use this as a flag to only go through this once*/
+ /* re-run this function on the other CPUs
+ it will fall though this section since we have
+ cleared reboot_smp, and do the reboot if it is the
+ correct CPU, otherwise it halts. */
+ if (reboot_cpu != cpuid)
+ smp_call_function((void *)machine_restart , NULL, 1, 0);
+ }
+
+ /* if reboot_cpu is still -1, then we want a tradional reboot,
+ and if we are not running on the reboot_cpu,, halt */
+ if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
+ for (;;)
+ __asm__ __volatile__ ("hlt");
+ }
+ /*
+ * Stop all CPUs and turn off local APICs and the IO-APIC, so
+ * other OSs see a clean IRQ state.
+ */
+ smp_send_stop();
+ disable_IO_APIC();
+#endif
+
+ if(!reboot_thru_bios) {
+ /* rebooting needs to touch the page at absolute addr 0 */
+ *((unsigned short *)__va(0x472)) = reboot_mode;
+ for (;;) {
+ int i;
+ for (i=0; i<100; i++) {
+ kb_wait();
+ udelay(50);
+ outb(0xfe,0x64); /* pulse reset low */
+ udelay(50);
+ }
+ /* That didn't work - force a triple fault.. */
+ __asm__ __volatile__("lidt %0": "=m" (no_idt));
+ __asm__ __volatile__("int3");
+ }
+ }
+
+ panic("Need to reinclude BIOS reboot code\n");
+}
+
+void machine_halt(void)
+{
+ machine_restart(0);
+}
+
+void machine_power_off(void)
+{
+ machine_restart(0);
+}
+
+void new_thread(struct task_struct *p,
+ unsigned long start_pc,
+ unsigned long start_stack,
+ unsigned long start_info)
+{
+ execution_context_t *ec = &p->shared_info->execution_context;
+
+ /*
+ * Initial register values:
+ * DS,ES,FS,GS = FLAT_RING1_DS
+ * CS:EIP = FLAT_RING1_CS:start_pc
+ * SS:ESP = FLAT_RING1_DS:start_stack
+ * ESI = start_info
+ * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
+ */
+ ec->ds = ec->es = ec->fs = ec->gs = ec->ss = FLAT_RING1_DS;
+ ec->cs = FLAT_RING1_CS;
+ ec->eip = start_pc;
+ ec->esp = start_stack;
+ ec->esi = start_info;
+
+ __save_flags(ec->eflags);
+ ec->eflags |= X86_EFLAGS_IF;
+
+ /* No fast trap at start of day. */
+ SET_DEFAULT_FAST_TRAP(&p->thread);
+}
+
+
+/*
+ * This special macro can be used to load a debugging register
+ */
+#define loaddebug(thread,register) \
+ __asm__("movl %0,%%db" #register \
+ : /* no output */ \
+ :"r" (thread->debugreg[register]))
+
+
+void switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+{
+ struct thread_struct *next = &next_p->thread;
+ struct tss_struct *tss = init_tss + smp_processor_id();
+ execution_context_t *stack_ec = get_execution_context();
+ int i;
+
+ __cli();
+
+ /* Switch guest general-register state. */
+ if ( !is_idle_task(prev_p) )
+ {
+ memcpy(&prev_p->shared_info->execution_context,
+ stack_ec,
+ sizeof(*stack_ec));
+ unlazy_fpu(prev_p);
+ CLEAR_FAST_TRAP(&prev_p->thread);
+ }
+
+ if ( !is_idle_task(next_p) )
+ {
+ memcpy(stack_ec,
+ &next_p->shared_info->execution_context,
+ sizeof(*stack_ec));
+
+ /*
+ * This is sufficient! If the descriptor DPL differs from CS RPL then
+ * we'll #GP. If DS, ES, FS, GS are DPL 0 then they'll be cleared
+ * automatically. If SS RPL or DPL differs from CS RPL then we'll #GP.
+ */
+ if ( (stack_ec->cs & 3) == 0 )
+ stack_ec->cs = FLAT_RING1_CS;
+ if ( (stack_ec->ss & 3) == 0 )
+ stack_ec->ss = FLAT_RING1_DS;
+
+ SET_FAST_TRAP(&next_p->thread);
+
+ /* Switch the guest OS ring-1 stack. */
+ tss->esp1 = next->guestos_sp;
+ tss->ss1 = next->guestos_ss;
+
+ /* Maybe switch the debug registers. */
+ if ( unlikely(next->debugreg[7]) )
+ {
+ loaddebug(next, 0);
+ loaddebug(next, 1);
+ loaddebug(next, 2);
+ loaddebug(next, 3);
+ /* no 4 and 5 */
+ loaddebug(next, 6);
+ loaddebug(next, 7);
+ }
+
+ /* Switch page tables. */
+ write_ptbase(&next_p->mm);
+ tlb_clocktick();
+ }
+
+ if ( unlikely(prev_p->io_bitmap != NULL) ||
+ unlikely(next_p->io_bitmap != NULL) )
+ {
+ if ( next_p->io_bitmap != NULL )
+ {
+ /* Copy in the appropriate parts of the IO bitmap. We use the
+ * selector to copy only the interesting parts of the bitmap. */
+
+ u64 old_sel = ~0ULL; /* IO bitmap selector for previous task. */
+
+ if ( prev_p->io_bitmap != NULL)
+ {
+ old_sel = prev_p->io_bitmap_sel;
+
+ /* Replace any areas of the IO bitmap that had bits cleared. */
+ for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ )
+ if ( !test_bit(i, &prev_p->io_bitmap_sel) )
+ memcpy(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS],
+ &next_p->io_bitmap[i * IOBMP_SELBIT_LWORDS],
+ IOBMP_SELBIT_LWORDS * sizeof(unsigned long));
+ }
+
+ /* Copy in any regions of the new task's bitmap that have bits
+ * clear and we haven't already dealt with. */
+ for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ )
+ {
+ if ( test_bit(i, &old_sel)
+ && !test_bit(i, &next_p->io_bitmap_sel) )
+ memcpy(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS],
+ &next_p->io_bitmap[i * IOBMP_SELBIT_LWORDS],
+ IOBMP_SELBIT_LWORDS * sizeof(unsigned long));
+ }
+
+ tss->bitmap = IO_BITMAP_OFFSET;
+
+ }
+ else
+ {
+ /* In this case, we're switching FROM a task with IO port access,
+ * to a task that doesn't use the IO bitmap. We set any TSS bits
+ * that might have been cleared, ready for future use. */
+ for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ )
+ if ( !test_bit(i, &prev_p->io_bitmap_sel) )
+ memset(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS],
+ 0xFF, IOBMP_SELBIT_LWORDS * sizeof(unsigned long));
+
+ /*
+ * a bitmap offset pointing outside of the TSS limit
+ * causes a nicely controllable SIGSEGV if a process
+ * tries to use a port IO instruction. The first
+ * sys_ioperm() call sets up the bitmap properly.
+ */
+ tss->bitmap = INVALID_IO_BITMAP_OFFSET;
+ }
+ }
+
+ set_current(next_p);
+
+ /* Switch GDT and LDT. */
+ __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->mm.gdt));
+ load_LDT(next_p);
+
+ __sti();
+}
+
+
+/* XXX Currently the 'domain' field is ignored! XXX */
+long do_iopl(domid_t domain, unsigned int new_io_pl)
+{
+ execution_context_t *ec = get_execution_context();
+ ec->eflags = (ec->eflags & 0xffffcfff) | ((new_io_pl&3) << 12);
+ return 0;
+}
--- /dev/null
+#include <asm/atomic.h>
+#include <asm/rwlock.h>
+
+#if defined(CONFIG_SMP)
+asm(
+".align 4\n"
+".globl __write_lock_failed\n"
+"__write_lock_failed:\n"
+" " LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n"
+"1: rep; nop\n"
+" cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n"
+" jne 1b\n"
+" " LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n"
+" jnz __write_lock_failed\n"
+" ret\n"
+
+".align 4\n"
+".globl __read_lock_failed\n"
+"__read_lock_failed:\n"
+" lock ; incl (%eax)\n"
+"1: rep; nop\n"
+" cmpl $1,(%eax)\n"
+" js 1b\n"
+" lock ; decl (%eax)\n"
+" js __read_lock_failed\n"
+" ret\n"
+);
+#endif
--- /dev/null
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/interrupt.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/pci.h>
+#include <xen/serial.h>
+#include <xen/acpi.h>
+#include <xen/module.h>
+#include <asm/bitops.h>
+#include <asm/smp.h>
+#include <asm/processor.h>
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <asm/desc.h>
+#include <asm/domain_page.h>
+#include <asm/pdb.h>
+
+char ignore_irq13; /* set if exception 16 works */
+struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
+
+/* Lots of nice things, since we only target PPro+. */
+unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE;
+EXPORT_SYMBOL(mmu_cr4_features);
+
+unsigned long wait_init_idle;
+
+struct task_struct *idle_task[NR_CPUS] = { &idle0_task };
+
+#ifdef CONFIG_ACPI_INTERPRETER
+int acpi_disabled = 0;
+#else
+int acpi_disabled = 1;
+#endif
+EXPORT_SYMBOL(acpi_disabled);
+
+#ifdef CONFIG_ACPI_BOOT
+extern int __initdata acpi_ht;
+int acpi_force __initdata = 0;
+#endif
+
+int phys_proc_id[NR_CPUS];
+int logical_proc_id[NR_CPUS];
+
+/* Standard macro to see if a specific flag is changeable */
+static inline int flag_is_changeable_p(u32 flag)
+{
+ u32 f1, f2;
+
+ asm("pushfl\n\t"
+ "pushfl\n\t"
+ "popl %0\n\t"
+ "movl %0,%1\n\t"
+ "xorl %2,%0\n\t"
+ "pushl %0\n\t"
+ "popfl\n\t"
+ "pushfl\n\t"
+ "popl %0\n\t"
+ "popfl\n\t"
+ : "=&r" (f1), "=&r" (f2)
+ : "ir" (flag));
+
+ return ((f1^f2) & flag) != 0;
+}
+
+/* Probe for the CPUID instruction */
+static int __init have_cpuid_p(void)
+{
+ return flag_is_changeable_p(X86_EFLAGS_ID);
+}
+
+void __init get_cpu_vendor(struct cpuinfo_x86 *c)
+{
+ char *v = c->x86_vendor_id;
+
+ if (!strcmp(v, "GenuineIntel"))
+ c->x86_vendor = X86_VENDOR_INTEL;
+ else if (!strcmp(v, "AuthenticAMD"))
+ c->x86_vendor = X86_VENDOR_AMD;
+ else if (!strcmp(v, "CyrixInstead"))
+ c->x86_vendor = X86_VENDOR_CYRIX;
+ else if (!strcmp(v, "UMC UMC UMC "))
+ c->x86_vendor = X86_VENDOR_UMC;
+ else if (!strcmp(v, "CentaurHauls"))
+ c->x86_vendor = X86_VENDOR_CENTAUR;
+ else if (!strcmp(v, "NexGenDriven"))
+ c->x86_vendor = X86_VENDOR_NEXGEN;
+ else if (!strcmp(v, "RiseRiseRise"))
+ c->x86_vendor = X86_VENDOR_RISE;
+ else if (!strcmp(v, "GenuineTMx86") ||
+ !strcmp(v, "TransmetaCPU"))
+ c->x86_vendor = X86_VENDOR_TRANSMETA;
+ else
+ c->x86_vendor = X86_VENDOR_UNKNOWN;
+}
+
+static void __init init_intel(struct cpuinfo_x86 *c)
+{
+ extern int opt_noht, opt_noacpi;
+
+ /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */
+ if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 )
+ clear_bit(X86_FEATURE_SEP, &c->x86_capability);
+
+ if ( opt_noht )
+ {
+ opt_noacpi = 1; /* Virtual CPUs only appear in ACPI tables. */
+ clear_bit(X86_FEATURE_HT, &c->x86_capability[0]);
+ }
+
+#ifdef CONFIG_SMP
+ if ( test_bit(X86_FEATURE_HT, &c->x86_capability) )
+ {
+ u32 eax, ebx, ecx, edx;
+ int initial_apic_id, siblings, cpu = smp_processor_id();
+
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+ siblings = (ebx & 0xff0000) >> 16;
+
+ if ( siblings <= 1 )
+ {
+ printk(KERN_INFO "CPU#%d: Hyper-Threading is disabled\n", cpu);
+ }
+ else if ( siblings > 2 )
+ {
+ panic("We don't support more than two logical CPUs per package!");
+ }
+ else
+ {
+ initial_apic_id = ebx >> 24 & 0xff;
+ phys_proc_id[cpu] = initial_apic_id >> 1;
+ logical_proc_id[cpu] = initial_apic_id & 1;
+ printk(KERN_INFO "CPU#%d: Physical ID: %d, Logical ID: %d\n",
+ cpu, phys_proc_id[cpu], logical_proc_id[cpu]);
+ }
+ }
+#endif
+}
+
+static void __init init_amd(struct cpuinfo_x86 *c)
+{
+ /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+ 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+ clear_bit(0*32+31, &c->x86_capability);
+
+ switch(c->x86)
+ {
+ case 5:
+ panic("AMD K6 is not supported.\n");
+ case 6: /* An Athlon/Duron. We can trust the BIOS probably */
+ break;
+ }
+}
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __init identify_cpu(struct cpuinfo_x86 *c)
+{
+ int junk, i, cpu = smp_processor_id();
+ u32 xlvl, tfms;
+
+ phys_proc_id[cpu] = cpu;
+ logical_proc_id[cpu] = 0;
+
+ c->x86_vendor = X86_VENDOR_UNKNOWN;
+ c->cpuid_level = -1; /* CPUID not detected */
+ c->x86_model = c->x86_mask = 0; /* So far unknown... */
+ c->x86_vendor_id[0] = '\0'; /* Unset */
+ memset(&c->x86_capability, 0, sizeof c->x86_capability);
+
+ if ( !have_cpuid_p() )
+ panic("Ancient processors not supported\n");
+
+ /* Get vendor name */
+ cpuid(0x00000000, &c->cpuid_level,
+ (int *)&c->x86_vendor_id[0],
+ (int *)&c->x86_vendor_id[8],
+ (int *)&c->x86_vendor_id[4]);
+
+ get_cpu_vendor(c);
+
+ if ( c->cpuid_level == 0 )
+ panic("Decrepit CPUID not supported\n");
+
+ cpuid(0x00000001, &tfms, &junk, &junk,
+ &c->x86_capability[0]);
+ c->x86 = (tfms >> 8) & 15;
+ c->x86_model = (tfms >> 4) & 15;
+ c->x86_mask = tfms & 15;
+
+ /* AMD-defined flags: level 0x80000001 */
+ xlvl = cpuid_eax(0x80000000);
+ if ( (xlvl & 0xffff0000) == 0x80000000 ) {
+ if ( xlvl >= 0x80000001 )
+ c->x86_capability[1] = cpuid_edx(0x80000001);
+ }
+
+ /* Transmeta-defined flags: level 0x80860001 */
+ xlvl = cpuid_eax(0x80860000);
+ if ( (xlvl & 0xffff0000) == 0x80860000 ) {
+ if ( xlvl >= 0x80860001 )
+ c->x86_capability[2] = cpuid_edx(0x80860001);
+ }
+
+ printk("CPU%d: Before vendor init, caps: %08x %08x %08x, vendor = %d\n",
+ smp_processor_id(),
+ c->x86_capability[0],
+ c->x86_capability[1],
+ c->x86_capability[2],
+ c->x86_vendor);
+
+ switch ( c->x86_vendor ) {
+ case X86_VENDOR_INTEL:
+ init_intel(c);
+ break;
+ case X86_VENDOR_AMD:
+ init_amd(c);
+ break;
+ case X86_VENDOR_UNKNOWN: /* Connectix Virtual PC reports this */
+ break;
+ case X86_VENDOR_CENTAUR:
+ break;
+ default:
+ printk("Unknown CPU identifier (%d): continuing anyway, "
+ "but might fail.\n", c->x86_vendor);
+ }
+
+ printk("CPU caps: %08x %08x %08x %08x\n",
+ c->x86_capability[0],
+ c->x86_capability[1],
+ c->x86_capability[2],
+ c->x86_capability[3]);
+
+ /*
+ * On SMP, boot_cpu_data holds the common feature set between
+ * all CPUs; so make sure that we indicate which features are
+ * common between the CPUs. The first time this routine gets
+ * executed, c == &boot_cpu_data.
+ */
+ if ( c != &boot_cpu_data ) {
+ /* AND the already accumulated flags with these */
+ for ( i = 0 ; i < NCAPINTS ; i++ )
+ boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+ }
+}
+
+
+unsigned long cpu_initialized;
+void __init cpu_init(void)
+{
+ int nr = smp_processor_id();
+ struct tss_struct * t = &init_tss[nr];
+
+ if ( test_and_set_bit(nr, &cpu_initialized) )
+ panic("CPU#%d already initialized!!!\n", nr);
+ printk("Initializing CPU#%d\n", nr);
+
+ /* Set up GDT and IDT. */
+ SET_GDT_ENTRIES(current, DEFAULT_GDT_ENTRIES);
+ SET_GDT_ADDRESS(current, DEFAULT_GDT_ADDRESS);
+ __asm__ __volatile__("lgdt %0": "=m" (*current->mm.gdt));
+ __asm__ __volatile__("lidt %0": "=m" (idt_descr));
+
+ /* No nested task. */
+ __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
+
+ /* Ensure FPU gets initialised for each domain. */
+ stts();
+
+ /* Set up and load the per-CPU TSS and LDT. */
+ t->ss0 = __HYPERVISOR_DS;
+ t->esp0 = get_stack_top();
+ set_tss_desc(nr,t);
+ load_TR(nr);
+ __asm__ __volatile__("lldt %%ax"::"a" (0));
+
+ /* Clear all 6 debug registers. */
+#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) );
+ CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
+#undef CD
+
+ /* Install correct page table. */
+ write_ptbase(¤t->mm);
+
+ init_idle_task();
+}
+
+static void __init do_initcalls(void)
+{
+ initcall_t *call;
+ for ( call = &__initcall_start; call < &__initcall_end; call++ )
+ (*call)();
+}
+
+/*
+ * IBM-compatible BIOSes place drive info tables at initial interrupt
+ * vectors 0x41 and 0x46. These are in the for of 16-bit-mode far ptrs.
+ */
+struct drive_info_struct { unsigned char dummy[32]; } drive_info;
+void get_bios_driveinfo(void)
+{
+ unsigned long seg, off, tab1, tab2;
+
+ off = (unsigned long)*(unsigned short *)(4*0x41+0);
+ seg = (unsigned long)*(unsigned short *)(4*0x41+2);
+ tab1 = (seg<<4) + off;
+
+ off = (unsigned long)*(unsigned short *)(4*0x46+0);
+ seg = (unsigned long)*(unsigned short *)(4*0x46+2);
+ tab2 = (seg<<4) + off;
+
+ printk("Reading BIOS drive-info tables at 0x%05lx and 0x%05lx\n",
+ tab1, tab2);
+
+ memcpy(drive_info.dummy+ 0, (char *)tab1, 16);
+ memcpy(drive_info.dummy+16, (char *)tab2, 16);
+}
+
+
+unsigned long pci_mem_start = 0x10000000;
+
+void __init start_of_day(void)
+{
+ extern void trap_init(void);
+ extern void init_IRQ(void);
+ extern void time_init(void);
+ extern void timer_bh(void);
+ extern void init_timervecs(void);
+ extern void ac_timer_init(void);
+ extern void initialize_keytable();
+ extern void initialize_keyboard(void);
+ extern int opt_nosmp, opt_watchdog, opt_noacpi, opt_ignorebiostables;
+ extern int do_timer_lists_from_pit;
+ unsigned long low_mem_size;
+
+#ifdef MEMORY_GUARD
+ /* Unmap the first page of CPU0's stack. */
+ extern unsigned long cpu0_stack[];
+ memguard_guard_range(cpu0_stack, PAGE_SIZE);
+#endif
+
+ open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ,
+ (void *)new_tlbflush_clock_period,
+ NULL);
+
+ if ( opt_watchdog )
+ nmi_watchdog = NMI_LOCAL_APIC;
+
+ /*
+ * We do this early, but tables are in the lowest 1MB (usually
+ * 0xfe000-0xfffff). Therefore they're unlikely to ever get clobbered.
+ */
+ get_bios_driveinfo();
+
+ /* Tell the PCI layer not to allocate too close to the RAM area.. */
+ low_mem_size = ((max_page << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
+ if ( low_mem_size > pci_mem_start ) pci_mem_start = low_mem_size;
+
+ identify_cpu(&boot_cpu_data); /* get CPU type info */
+ if ( cpu_has_fxsr ) set_in_cr4(X86_CR4_OSFXSR);
+ if ( cpu_has_xmm ) set_in_cr4(X86_CR4_OSXMMEXCPT);
+#ifdef CONFIG_SMP
+ if ( opt_ignorebiostables )
+ {
+ opt_nosmp = 1; /* No SMP without configuration */
+ opt_noacpi = 1; /* ACPI will just confuse matters also */
+ }
+ else
+ {
+ find_smp_config();
+ smp_alloc_memory(); /* trampoline which other CPUs jump at */
+ }
+#endif
+ paging_init(); /* not much here now, but sets up fixmap */
+ if ( !opt_noacpi )
+ acpi_boot_init();
+#ifdef CONFIG_SMP
+ if ( smp_found_config )
+ get_smp_config();
+#endif
+ domain_init();
+ scheduler_init();
+ trap_init();
+ init_IRQ(); /* installs simple interrupt wrappers. Starts HZ clock. */
+ time_init(); /* installs software handler for HZ clock. */
+ softirq_init();
+ init_timervecs();
+ init_bh(TIMER_BH, timer_bh);
+ init_apic_mappings(); /* make APICs addressable in our pagetables. */
+
+#ifndef CONFIG_SMP
+ APIC_init_uniprocessor();
+#else
+ if ( opt_nosmp )
+ APIC_init_uniprocessor();
+ else
+ smp_boot_cpus();
+ /*
+ * Does loads of stuff, including kicking the local
+ * APIC, and the IO APIC after other CPUs are booted.
+ * Each IRQ is preferably handled by IO-APIC, but
+ * fall thru to 8259A if we have to (but slower).
+ */
+#endif
+
+ __sti();
+
+ initialize_keytable(); /* call back handling for key codes */
+
+ serial_init_stage2();
+ initialize_keyboard(); /* setup keyboard (also for debugging) */
+
+#ifdef XEN_DEBUGGER
+ initialize_pdb(); /* pervasive debugger */
+#endif
+
+ if ( !cpu_has_apic )
+ {
+ do_timer_lists_from_pit = 1;
+ if ( smp_num_cpus != 1 )
+ panic("We need local APICs on SMP machines!");
+ }
+
+ ac_timer_init(); /* init accurate timers */
+ init_xen_time(); /* initialise the time */
+ schedulers_start(); /* start scheduler for each CPU */
+
+ check_nmi_watchdog();
+
+#ifdef CONFIG_PCI
+ pci_init();
+#endif
+ do_initcalls();
+
+#ifdef CONFIG_SMP
+ wait_init_idle = cpu_online_map;
+ clear_bit(smp_processor_id(), &wait_init_idle);
+ smp_threads_ready = 1;
+ smp_commence(); /* Tell other CPUs that state of the world is stable. */
+ while (wait_init_idle)
+ {
+ cpu_relax();
+ barrier();
+ }
+#endif
+
+ watchdog_on = 1;
+}
--- /dev/null
+/*
+ * Intel SMP support routines.
+ *
+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ * This code is released under the GNU General Public License version 2 or
+ * later.
+ */
+
+#include <xen/irq.h>
+#include <xen/sched.h>
+#include <xen/delay.h>
+#include <xen/spinlock.h>
+#include <asm/smp.h>
+#include <asm/mc146818rtc.h>
+#include <asm/flushtlb.h>
+#include <asm/smpboot.h>
+#include <asm/hardirq.h>
+
+#ifdef CONFIG_SMP
+
+/*
+ * Some notes on x86 processor bugs affecting SMP operation:
+ *
+ * Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
+ * The Linux implications for SMP are handled as follows:
+ *
+ * Pentium III / [Xeon]
+ * None of the E1AP-E3AP errata are visible to the user.
+ *
+ * E1AP. see PII A1AP
+ * E2AP. see PII A2AP
+ * E3AP. see PII A3AP
+ *
+ * Pentium II / [Xeon]
+ * None of the A1AP-A3AP errata are visible to the user.
+ *
+ * A1AP. see PPro 1AP
+ * A2AP. see PPro 2AP
+ * A3AP. see PPro 7AP
+ *
+ * Pentium Pro
+ * None of 1AP-9AP errata are visible to the normal user,
+ * except occasional delivery of 'spurious interrupt' as trap #15.
+ * This is very rare and a non-problem.
+ *
+ * 1AP. Linux maps APIC as non-cacheable
+ * 2AP. worked around in hardware
+ * 3AP. fixed in C0 and above steppings microcode update.
+ * Linux does not use excessive STARTUP_IPIs.
+ * 4AP. worked around in hardware
+ * 5AP. symmetric IO mode (normal Linux operation) not affected.
+ * 'noapic' mode has vector 0xf filled out properly.
+ * 6AP. 'noapic' mode might be affected - fixed in later steppings
+ * 7AP. We do not assume writes to the LVT deassering IRQs
+ * 8AP. We do not enable low power mode (deep sleep) during MP bootup
+ * 9AP. We do not use mixed mode
+ */
+
+/*
+ * the following functions deal with sending IPIs between CPUs.
+ *
+ * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
+ */
+
+static inline int __prepare_ICR (unsigned int shortcut, int vector)
+{
+ return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
+}
+
+static inline int __prepare_ICR2 (unsigned int mask)
+{
+ return SET_APIC_DEST_FIELD(mask);
+}
+
+static inline void __send_IPI_shortcut(unsigned int shortcut, int vector)
+{
+ /*
+ * Subtle. In the case of the 'never do double writes' workaround
+ * we have to lock out interrupts to be safe. As we don't care
+ * of the value read we use an atomic rmw access to avoid costly
+ * cli/sti. Otherwise we use an even cheaper single atomic write
+ * to the APIC.
+ */
+ unsigned int cfg;
+
+ /*
+ * Wait for idle.
+ */
+ apic_wait_icr_idle();
+
+ /*
+ * No need to touch the target chip field
+ */
+ cfg = __prepare_ICR(shortcut, vector);
+
+ /*
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
+ apic_write_around(APIC_ICR, cfg);
+}
+
+void send_IPI_self(int vector)
+{
+ __send_IPI_shortcut(APIC_DEST_SELF, vector);
+}
+
+static inline void send_IPI_mask(int mask, int vector)
+{
+ unsigned long cfg;
+ unsigned long flags;
+
+ __save_flags(flags);
+ __cli();
+
+
+ /*
+ * Wait for idle.
+ */
+ apic_wait_icr_idle();
+
+ /*
+ * prepare target chip field
+ */
+ cfg = __prepare_ICR2(mask);
+ apic_write_around(APIC_ICR2, cfg);
+
+ /*
+ * program the ICR
+ */
+ cfg = __prepare_ICR(0, vector);
+
+ /*
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
+ apic_write_around(APIC_ICR, cfg);
+
+ __restore_flags(flags);
+}
+
+static inline void send_IPI_allbutself(int vector)
+{
+ /*
+ * if there are no other CPUs in the system then
+ * we get an APIC send error if we try to broadcast.
+ * thus we have to avoid sending IPIs in this case.
+ */
+ if (!(smp_num_cpus > 1))
+ return;
+
+ __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
+}
+
+/*
+ * ********* XEN NOTICE **********
+ * I've left the following comments lying around as they look liek they might
+ * be useful to get multiprocessor guest OSes going. However, I suspect the
+ * issues we face will be quite different so I've ripped out all the
+ * TLBSTATE logic (I didn't understand it anyway :-). These comments do
+ * not apply to Xen, therefore! -- Keir (8th Oct 2003).
+ */
+/*
+ * Smarter SMP flushing macros.
+ * c/o Linus Torvalds.
+ *
+ * These mean you can really definitely utterly forget about
+ * writing to user space from interrupts. (Its not allowed anyway).
+ *
+ * Optimizations Manfred Spraul <manfred@colorfullife.com>
+ *
+ * The flush IPI assumes that a thread switch happens in this order:
+ * [cpu0: the cpu that switches]
+ * 1) switch_mm() either 1a) or 1b)
+ * 1a) thread switch to a different mm
+ * 1a1) clear_bit(cpu, &old_mm.cpu_vm_mask);
+ * Stop ipi delivery for the old mm. This is not synchronized with
+ * the other cpus, but smp_invalidate_interrupt ignore flush ipis
+ * for the wrong mm, and in the worst case we perform a superflous
+ * tlb flush.
+ * 1a2) set cpu_tlbstate to TLBSTATE_OK
+ * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+ * was in lazy tlb mode.
+ * 1a3) update cpu_tlbstate[].active_mm
+ * Now cpu0 accepts tlb flushes for the new mm.
+ * 1a4) set_bit(cpu, &new_mm.cpu_vm_mask);
+ * Now the other cpus will send tlb flush ipis.
+ * 1a4) change cr3.
+ * 1b) thread switch without mm change
+ * cpu_tlbstate[].active_mm is correct, cpu0 already handles
+ * flush ipis.
+ * 1b1) set cpu_tlbstate to TLBSTATE_OK
+ * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+ * Atomically set the bit [other cpus will start sending flush ipis],
+ * and test the bit.
+ * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+ * 2) switch %%esp, ie current
+ *
+ * The interrupt must handle 2 special cases:
+ * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+ * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+ * runs in kernel space, the cpu could load tlb entries for user space
+ * pages.
+ *
+ * The good news is that cpu_tlbstate is local to each cpu, no
+ * write/read ordering problems.
+ *
+ * TLB flush IPI:
+ *
+ * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+ * 2) Leave the mm if we are in the lazy tlb mode.
+ */
+
+static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED;
+volatile unsigned long flush_cpumask;
+
+asmlinkage void smp_invalidate_interrupt(void)
+{
+ ack_APIC_irq();
+ perfc_incrc(ipis);
+ if ( likely(test_and_clear_bit(smp_processor_id(), &flush_cpumask)) )
+ local_flush_tlb();
+}
+
+void flush_tlb_mask(unsigned long mask)
+{
+ ASSERT(!in_irq());
+
+ if ( mask & (1 << smp_processor_id()) )
+ {
+ local_flush_tlb();
+ mask &= ~(1 << smp_processor_id());
+ }
+
+ if ( mask != 0 )
+ {
+ /*
+ * We are certainly not reentering a flush_lock region on this CPU
+ * because we are not in an IRQ context. We can therefore wait for the
+ * other guy to release the lock. This is harder than it sounds because
+ * local interrupts might be disabled, and he may be waiting for us to
+ * execute smp_invalidate_interrupt(). We deal with this possibility by
+ * inlining the meat of that function here.
+ */
+ while ( unlikely(!spin_trylock(&flush_lock)) )
+ {
+ if ( test_and_clear_bit(smp_processor_id(), &flush_cpumask) )
+ local_flush_tlb();
+ rep_nop();
+ }
+
+ flush_cpumask = mask;
+ send_IPI_mask(mask, INVALIDATE_TLB_VECTOR);
+ while ( flush_cpumask != 0 )
+ {
+ rep_nop();
+ barrier();
+ }
+
+ spin_unlock(&flush_lock);
+ }
+}
+
+/*
+ * NB. Must be called with no locks held and interrupts enabled.
+ * (e.g., softirq context).
+ */
+void new_tlbflush_clock_period(void)
+{
+ spin_lock(&flush_lock);
+
+ /* Someone may acquire the lock and execute the flush before us. */
+ if ( ((tlbflush_clock+1) & TLBCLOCK_EPOCH_MASK) != 0 )
+ goto out;
+
+ if ( smp_num_cpus > 1 )
+ {
+ /* Flush everyone else. We definitely flushed just before entry. */
+ flush_cpumask = ((1 << smp_num_cpus) - 1) & ~(1 << smp_processor_id());
+ send_IPI_allbutself(INVALIDATE_TLB_VECTOR);
+ while ( flush_cpumask != 0 )
+ {
+ rep_nop();
+ barrier();
+ }
+ }
+
+ /* No need for atomicity: we are the only possible updater. */
+ tlbflush_clock++;
+
+ out:
+ spin_unlock(&flush_lock);
+}
+
+static void flush_tlb_all_pge_ipi(void* info)
+{
+ __flush_tlb_pge();
+}
+
+void flush_tlb_all_pge(void)
+{
+ smp_call_function (flush_tlb_all_pge_ipi,0,1,1);
+ __flush_tlb_pge();
+}
+
+void smp_send_event_check_mask(unsigned long cpu_mask)
+{
+ send_IPI_mask(cpu_mask, EVENT_CHECK_VECTOR);
+}
+
+/*
+ * Structure and data for smp_call_function(). This is designed to minimise
+ * static memory requirements. It also looks cleaner.
+ */
+static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
+
+struct call_data_struct {
+ void (*func) (void *info);
+ void *info;
+ atomic_t started;
+ atomic_t finished;
+ int wait;
+};
+
+static struct call_data_struct * call_data;
+
+/*
+ * this function sends a 'generic call function' IPI to all other CPUs
+ * in the system.
+ */
+
+int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
+ int wait)
+/*
+ * [SUMMARY] Run a function on all other CPUs.
+ * <func> The function to run. This must be fast and non-blocking.
+ * <info> An arbitrary pointer to pass to the function.
+ * <nonatomic> currently unused.
+ * <wait> If true, wait (atomically) until function has completed on other CPUs.
+ * [RETURNS] 0 on success, else a negative status code. Does not return until
+ * remote CPUs are nearly ready to execute <<func>> or are or have executed.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler, or bottom halfs.
+ */
+{
+ struct call_data_struct data;
+ int cpus = smp_num_cpus-1;
+
+ if (!cpus)
+ return 0;
+
+ data.func = func;
+ data.info = info;
+ atomic_set(&data.started, 0);
+ data.wait = wait;
+ if (wait)
+ atomic_set(&data.finished, 0);
+
+ ASSERT(local_irq_is_enabled());
+
+ spin_lock(&call_lock);
+
+ call_data = &data;
+ wmb();
+ /* Send a message to all other CPUs and wait for them to respond */
+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+
+ /* Wait for response */
+ while (atomic_read(&data.started) != cpus)
+ barrier();
+
+ if (wait)
+ while (atomic_read(&data.finished) != cpus)
+ barrier();
+
+ spin_unlock(&call_lock);
+
+ return 0;
+}
+
+static void stop_this_cpu (void * dummy)
+{
+ /*
+ * Remove this CPU:
+ */
+ clear_bit(smp_processor_id(), &cpu_online_map);
+ __cli();
+ disable_local_APIC();
+ for(;;) __asm__("hlt");
+}
+
+/*
+ * this function calls the 'stop' function on all other CPUs in the system.
+ */
+
+void smp_send_stop(void)
+{
+ smp_call_function(stop_this_cpu, NULL, 1, 0);
+ smp_num_cpus = 1;
+
+ __cli();
+ disable_local_APIC();
+ __sti();
+}
+
+/*
+ * Nothing to do, as all the work is done automatically when
+ * we return from the interrupt.
+ */
+asmlinkage void smp_event_check_interrupt(void)
+{
+ ack_APIC_irq();
+ perfc_incrc(ipis);
+}
+
+asmlinkage void smp_call_function_interrupt(void)
+{
+ void (*func) (void *info) = call_data->func;
+ void *info = call_data->info;
+ int wait = call_data->wait;
+
+ ack_APIC_irq();
+ perfc_incrc(ipis);
+
+ /*
+ * Notify initiating CPU that I've grabbed the data and am
+ * about to execute the function
+ */
+ mb();
+ atomic_inc(&call_data->started);
+ /*
+ * At this point the info structure may be out of scope unless wait==1
+ */
+ (*func)(info);
+ if (wait) {
+ mb();
+ atomic_inc(&call_data->finished);
+ }
+}
+
+#endif /* CONFIG_SMP */
--- /dev/null
+/*
+ * x86 SMP booting functions
+ *
+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ * Much of the core SMP work is based on previous work by Thomas Radke, to
+ * whom a great many thanks are extended.
+ *
+ * Thanks to Intel for making available several different Pentium,
+ * Pentium Pro and Pentium-II/Xeon MP machines.
+ * Original development of Linux SMP code supported by Caldera.
+ *
+ * This code is released under the GNU General Public License version 2 or
+ * later.
+ *
+ * Fixes
+ * Felix Koop : NR_CPUS used properly
+ * Jose Renau : Handle single CPU case.
+ * Alan Cox : By repeated request 8) - Total BogoMIP report.
+ * Greg Wright : Fix for kernel stacks panic.
+ * Erich Boleyn : MP v1.4 and additional changes.
+ * Matthias Sattler : Changes for 2.1 kernel map.
+ * Michel Lespinasse : Changes for 2.1 kernel map.
+ * Michael Chastain : Change trampoline.S to gnu as.
+ * Alan Cox : Dumb bug: 'B' step PPro's are fine
+ * Ingo Molnar : Added APIC timers, based on code
+ * from Jose Renau
+ * Ingo Molnar : various cleanups and rewrites
+ * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
+ * Maciej W. Rozycki : Bits for genuine 82489DX APICs
+ * Martin J. Bligh : Added support for multi-quad systems
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/interrupt.h>
+#include <xen/irq.h>
+#include <xen/mm.h>
+#include <xen/slab.h>
+#include <asm/flushtlb.h>
+#include <asm/mc146818rtc.h>
+#include <asm/smpboot.h>
+#include <xen/smp.h>
+#include <asm/msr.h>
+#include <asm/system.h>
+#include <asm/mpspec.h>
+#include <asm/io_apic.h>
+#include <xen/sched.h>
+#include <xen/delay.h>
+#include <xen/lib.h>
+
+#ifdef CONFIG_SMP
+
+/* Set if we find a B stepping CPU */
+static int smp_b_stepping;
+
+/* Setup configured maximum number of CPUs to activate */
+static int max_cpus = -1;
+
+/* Total count of live CPUs */
+int smp_num_cpus = 1;
+
+/* Bitmask of currently online CPUs */
+unsigned long cpu_online_map;
+
+static volatile unsigned long cpu_callin_map;
+static volatile unsigned long cpu_callout_map;
+
+/* Per CPU bogomips and other parameters */
+struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+
+/* Set when the idlers are all forked */
+int smp_threads_ready;
+
+/*
+ * Trampoline 80x86 program as an array.
+ */
+
+extern unsigned char trampoline_data [];
+extern unsigned char trampoline_end [];
+static unsigned char *trampoline_base;
+
+/*
+ * Currently trivial. Write the real->protected mode
+ * bootstrap into the page concerned. The caller
+ * has made sure it's suitably aligned.
+ */
+
+static unsigned long __init setup_trampoline(void)
+{
+ memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
+ return virt_to_phys(trampoline_base);
+}
+
+/*
+ * We are called very early to get the low memory for the
+ * SMP bootup trampoline page.
+ */
+void __init smp_alloc_memory(void)
+{
+ /*
+ * Has to be in very low memory so we can execute
+ * real-mode AP code.
+ */
+ trampoline_base = __va(0x90000);
+}
+
+/*
+ * The bootstrap kernel entry code has set these up. Save them for
+ * a given CPU
+ */
+
+void __init smp_store_cpu_info(int id)
+{
+ struct cpuinfo_x86 *c = cpu_data + id;
+
+ *c = boot_cpu_data;
+ c->pte_quick = 0;
+ c->pmd_quick = 0;
+ c->pgd_quick = 0;
+ c->pgtable_cache_sz = 0;
+ identify_cpu(c);
+ /*
+ * Mask B, Pentium, but not Pentium MMX
+ */
+ if (c->x86_vendor == X86_VENDOR_INTEL &&
+ c->x86 == 5 &&
+ c->x86_mask >= 1 && c->x86_mask <= 4 &&
+ c->x86_model <= 3)
+ /*
+ * Remember we have B step Pentia with bugs
+ */
+ smp_b_stepping = 1;
+}
+
+/*
+ * Architecture specific routine called by the kernel just before init is
+ * fired off. This allows the BP to have everything in order [we hope].
+ * At the end of this all the APs will hit the system scheduling and off
+ * we go. Each AP will load the system gdt's and jump through the kernel
+ * init into idle(). At this point the scheduler will one day take over
+ * and give them jobs to do. smp_callin is a standard routine
+ * we use to track CPUs as they power up.
+ */
+
+static atomic_t smp_commenced = ATOMIC_INIT(0);
+
+void __init smp_commence(void)
+{
+ /*
+ * Lets the callins below out of their loop.
+ */
+ Dprintk("Setting commenced=1, go go go\n");
+
+ wmb();
+ atomic_set(&smp_commenced,1);
+}
+
+/*
+ * TSC synchronization.
+ *
+ * We first check wether all CPUs have their TSC's synchronized,
+ * then we print a warning if not, and always resync.
+ */
+
+static atomic_t tsc_start_flag = ATOMIC_INIT(0);
+static atomic_t tsc_count_start = ATOMIC_INIT(0);
+static atomic_t tsc_count_stop = ATOMIC_INIT(0);
+static unsigned long long tsc_values[NR_CPUS];
+
+#define NR_LOOPS 5
+
+/*
+ * accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit
+ * multiplication. Not terribly optimized but we need it at boot time only
+ * anyway.
+ *
+ * result == a / b
+ * == (a1 + a2*(2^32)) / b
+ * == a1/b + a2*(2^32/b)
+ * == a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b
+ * ^---- (this multiplication can overflow)
+ */
+
+static unsigned long long div64 (unsigned long long a, unsigned long b0)
+{
+ unsigned int a1, a2;
+ unsigned long long res;
+
+ a1 = ((unsigned int*)&a)[0];
+ a2 = ((unsigned int*)&a)[1];
+
+ res = a1/b0 +
+ (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) +
+ a2 / b0 +
+ (a2 * (0xffffffff % b0)) / b0;
+
+ return res;
+}
+
+static void __init synchronize_tsc_bp (void)
+{
+ int i;
+ unsigned long long t0;
+ unsigned long long sum, avg;
+ long long delta;
+ int buggy = 0;
+
+ printk("checking TSC synchronization across CPUs: ");
+
+ atomic_set(&tsc_start_flag, 1);
+ wmb();
+
+ /*
+ * We loop a few times to get a primed instruction cache,
+ * then the last pass is more or less synchronized and
+ * the BP and APs set their cycle counters to zero all at
+ * once. This reduces the chance of having random offsets
+ * between the processors, and guarantees that the maximum
+ * delay between the cycle counters is never bigger than
+ * the latency of information-passing (cachelines) between
+ * two CPUs.
+ */
+ for (i = 0; i < NR_LOOPS; i++) {
+ /*
+ * all APs synchronize but they loop on '== num_cpus'
+ */
+ while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb();
+ atomic_set(&tsc_count_stop, 0);
+ wmb();
+ /*
+ * this lets the APs save their current TSC:
+ */
+ atomic_inc(&tsc_count_start);
+
+ rdtscll(tsc_values[smp_processor_id()]);
+ /*
+ * We clear the TSC in the last loop:
+ */
+ if (i == NR_LOOPS-1)
+ write_tsc(0, 0);
+
+ /*
+ * Wait for all APs to leave the synchronization point:
+ */
+ while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb();
+ atomic_set(&tsc_count_start, 0);
+ wmb();
+ atomic_inc(&tsc_count_stop);
+ }
+
+ sum = 0;
+ for (i = 0; i < smp_num_cpus; i++) {
+ t0 = tsc_values[i];
+ sum += t0;
+ }
+ avg = div64(sum, smp_num_cpus);
+
+ sum = 0;
+ for (i = 0; i < smp_num_cpus; i++) {
+ delta = tsc_values[i] - avg;
+ if (delta < 0)
+ delta = -delta;
+ /*
+ * We report bigger than 2 microseconds clock differences.
+ */
+ if (delta > 2*ticks_per_usec) {
+ long realdelta;
+ if (!buggy) {
+ buggy = 1;
+ printk("\n");
+ }
+ realdelta = div64(delta, ticks_per_usec);
+ if (tsc_values[i] < avg)
+ realdelta = -realdelta;
+
+ printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
+ i, realdelta);
+ }
+
+ sum += delta;
+ }
+ if (!buggy)
+ printk("passed.\n");
+}
+
+static void __init synchronize_tsc_ap (void)
+{
+ int i;
+
+ /*
+ * smp_num_cpus is not necessarily known at the time
+ * this gets called, so we first wait for the BP to
+ * finish SMP initialization:
+ */
+ while (!atomic_read(&tsc_start_flag)) mb();
+
+ for (i = 0; i < NR_LOOPS; i++) {
+ atomic_inc(&tsc_count_start);
+ while (atomic_read(&tsc_count_start) != smp_num_cpus) mb();
+
+ rdtscll(tsc_values[smp_processor_id()]);
+ if (i == NR_LOOPS-1)
+ write_tsc(0, 0);
+
+ atomic_inc(&tsc_count_stop);
+ while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb();
+ }
+}
+#undef NR_LOOPS
+
+static atomic_t init_deasserted;
+
+void __init smp_callin(void)
+{
+ int cpuid, phys_id, i;
+
+ /*
+ * If waken up by an INIT in an 82489DX configuration
+ * we may get here before an INIT-deassert IPI reaches
+ * our local APIC. We have to wait for the IPI or we'll
+ * lock up on an APIC access.
+ */
+ while (!atomic_read(&init_deasserted));
+
+ /*
+ * (This works even if the APIC is not enabled.)
+ */
+ phys_id = GET_APIC_ID(apic_read(APIC_ID));
+ cpuid = smp_processor_id();
+ if (test_and_set_bit(cpuid, &cpu_online_map)) {
+ printk("huh, phys CPU#%d, CPU#%d already present??\n",
+ phys_id, cpuid);
+ BUG();
+ }
+ Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
+
+ /*
+ * STARTUP IPIs are fragile beasts as they might sometimes
+ * trigger some glue motherboard logic. Complete APIC bus
+ * silence for 1 second, this overestimates the time the
+ * boot CPU is spending to send the up to 2 STARTUP IPIs
+ * by a factor of two. This should be enough.
+ */
+
+ for ( i = 0; i < 200; i++ )
+ {
+ if ( test_bit(cpuid, &cpu_callout_map) ) break;
+ mdelay(10);
+ }
+
+ if (!test_bit(cpuid, &cpu_callout_map)) {
+ printk("BUG: CPU%d started up but did not get a callout!\n",
+ cpuid);
+ BUG();
+ }
+
+ /*
+ * the boot CPU has finished the init stage and is spinning
+ * on callin_map until we finish. We are free to set up this
+ * CPU, first the APIC. (this is probably redundant on most
+ * boards)
+ */
+
+ Dprintk("CALLIN, before setup_local_APIC().\n");
+
+ setup_local_APIC();
+
+ __sti();
+
+#ifdef CONFIG_MTRR
+ /*
+ * Must be done before calibration delay is computed
+ */
+ mtrr_init_secondary_cpu ();
+#endif
+
+ Dprintk("Stack at about %p\n",&cpuid);
+
+ /*
+ * Save our processor parameters
+ */
+ smp_store_cpu_info(cpuid);
+
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ setup_apic_nmi_watchdog();
+
+ /*
+ * Allow the master to continue.
+ */
+ set_bit(cpuid, &cpu_callin_map);
+
+ /*
+ * Synchronize the TSC with the BP
+ */
+ synchronize_tsc_ap();
+}
+
+static int cpucount;
+
+/*
+ * Activate a secondary processor.
+ */
+void __init start_secondary(void)
+{
+ unsigned int cpu = cpucount;
+ /* 6 bytes suitable for passing to LIDT instruction. */
+ unsigned char idt_load[6];
+
+ extern void cpu_init(void);
+
+ set_current(idle_task[cpu]);
+
+ /*
+ * Dont put anything before smp_callin(), SMP
+ * booting is too fragile that we want to limit the
+ * things done here to the most necessary things.
+ */
+ cpu_init();
+ smp_callin();
+
+ while (!atomic_read(&smp_commenced))
+ rep_nop();
+
+ /*
+ * At this point, boot CPU has fully initialised the IDT. It is
+ * now safe to make ourselves a private copy.
+ */
+ idt_tables[cpu] = kmalloc(IDT_ENTRIES*8, GFP_KERNEL);
+ memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*8);
+ *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*8)-1;
+ *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
+ __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
+
+ /*
+ * low-memory mappings have been cleared, flush them from the local TLBs
+ * too.
+ */
+ local_flush_tlb();
+
+ startup_cpu_idle_loop();
+
+ BUG();
+}
+
+extern struct {
+ unsigned long esp, ss;
+} stack_start;
+
+/* which physical APIC ID maps to which logical CPU number */
+volatile int physical_apicid_2_cpu[MAX_APICID];
+/* which logical CPU number maps to which physical APIC ID */
+volatile int cpu_2_physical_apicid[NR_CPUS];
+
+/* which logical APIC ID maps to which logical CPU number */
+volatile int logical_apicid_2_cpu[MAX_APICID];
+/* which logical CPU number maps to which logical APIC ID */
+volatile int cpu_2_logical_apicid[NR_CPUS];
+
+static inline void init_cpu_to_apicid(void)
+/* Initialize all maps between cpu number and apicids */
+{
+ int apicid, cpu;
+
+ for (apicid = 0; apicid < MAX_APICID; apicid++) {
+ physical_apicid_2_cpu[apicid] = -1;
+ logical_apicid_2_cpu[apicid] = -1;
+ }
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ cpu_2_physical_apicid[cpu] = -1;
+ cpu_2_logical_apicid[cpu] = -1;
+ }
+}
+
+static inline void map_cpu_to_boot_apicid(int cpu, int apicid)
+/*
+ * set up a mapping between cpu and apicid. Uses logical apicids for multiquad,
+ * else physical apic ids
+ */
+{
+ physical_apicid_2_cpu[apicid] = cpu;
+ cpu_2_physical_apicid[cpu] = apicid;
+}
+
+static inline void unmap_cpu_to_boot_apicid(int cpu, int apicid)
+/*
+ * undo a mapping between cpu and apicid. Uses logical apicids for multiquad,
+ * else physical apic ids
+ */
+{
+ physical_apicid_2_cpu[apicid] = -1;
+ cpu_2_physical_apicid[cpu] = -1;
+}
+
+#if APIC_DEBUG
+static inline void inquire_remote_apic(int apicid)
+{
+ int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
+ char *names[] = { "ID", "VERSION", "SPIV" };
+ int timeout, status;
+
+ printk("Inquiring remote APIC #%d...\n", apicid);
+
+ for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+ printk("... APIC #%d %s: ", apicid, names[i]);
+
+ /*
+ * Wait for idle.
+ */
+ apic_wait_icr_idle();
+
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+ apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+
+ timeout = 0;
+ do {
+ udelay(100);
+ status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
+ } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
+
+ switch (status) {
+ case APIC_ICR_RR_VALID:
+ status = apic_read(APIC_RRR);
+ printk("%08x\n", status);
+ break;
+ default:
+ printk("failed\n");
+ }
+ }
+}
+#endif
+
+
+static int wakeup_secondary_via_INIT(int phys_apicid, unsigned long start_eip)
+{
+ unsigned long send_status = 0, accept_status = 0;
+ int maxlvt, timeout, num_starts, j;
+
+ Dprintk("Asserting INIT.\n");
+
+ /*
+ * Turn INIT on target chip
+ */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /*
+ * Send IPI
+ */
+ apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
+ | APIC_DM_INIT);
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ mdelay(10);
+
+ Dprintk("Deasserting INIT.\n");
+
+ /* Target chip */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /* Send IPI */
+ apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ atomic_set(&init_deasserted, 1);
+
+ /*
+ * Should we send STARTUP IPIs ?
+ *
+ * Determine this based on the APIC version.
+ * If we don't have an integrated APIC, don't send the STARTUP IPIs.
+ */
+ if (APIC_INTEGRATED(apic_version[phys_apicid]))
+ num_starts = 2;
+ else
+ num_starts = 0;
+
+ /*
+ * Run STARTUP IPI loop.
+ */
+ Dprintk("#startup loops: %d.\n", num_starts);
+
+ maxlvt = get_maxlvt();
+
+ for (j = 1; j <= num_starts; j++) {
+ Dprintk("Sending STARTUP #%d.\n",j);
+
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ Dprintk("After apic_write.\n");
+
+ /*
+ * STARTUP IPI
+ */
+
+ /* Target chip */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /* Boot on the stack */
+ /* Kick the second */
+ apic_write_around(APIC_ICR, APIC_DM_STARTUP
+ | (start_eip >> 12));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(300);
+
+ Dprintk("Startup point 1.\n");
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(200);
+ /*
+ * Due to the Pentium erratum 3AP.
+ */
+ if (maxlvt > 3) {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ }
+ accept_status = (apic_read(APIC_ESR) & 0xEF);
+ if (send_status || accept_status)
+ break;
+ }
+ Dprintk("After Startup.\n");
+
+ if (send_status)
+ printk("APIC never delivered???\n");
+ if (accept_status)
+ printk("APIC delivery error (%lx).\n", accept_status);
+
+ return (send_status | accept_status);
+}
+
+extern unsigned long cpu_initialized;
+
+static void __init do_boot_cpu (int apicid)
+/*
+ * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
+ * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
+ */
+{
+ struct task_struct *idle;
+ unsigned long boot_error = 0;
+ int timeout, cpu;
+ unsigned long start_eip, stack;
+
+ cpu = ++cpucount;
+
+ if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
+ panic("failed 'createdomain' for CPU %d", cpu);
+
+ set_bit(PF_IDLETASK, &idle->flags);
+
+ idle->mm.pagetable = mk_pagetable(__pa(idle_pg_table));
+
+ map_cpu_to_boot_apicid(cpu, apicid);
+
+ SET_DEFAULT_FAST_TRAP(&idle->thread);
+
+ idle_task[cpu] = idle;
+
+ /* start_eip had better be page-aligned! */
+ start_eip = setup_trampoline();
+
+ /* So we see what's up. */
+ printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+
+ stack = __pa(__get_free_pages(GFP_KERNEL, 1));
+ stack_start.esp = stack + STACK_SIZE - STACK_RESERVED;
+
+ /* Debug build: detect stack overflow by setting up a guard page. */
+ memguard_guard_range(__va(stack), PAGE_SIZE);
+
+ /*
+ * This grunge runs the startup process for
+ * the targeted processor.
+ */
+
+ atomic_set(&init_deasserted, 0);
+
+ Dprintk("Setting warm reset code and vector.\n");
+
+ CMOS_WRITE(0xa, 0xf);
+ local_flush_tlb();
+ Dprintk("1.\n");
+ *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
+ Dprintk("2.\n");
+ *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
+ Dprintk("3.\n");
+
+ /*
+ * Be paranoid about clearing APIC errors.
+ */
+ if ( APIC_INTEGRATED(apic_version[apicid]) )
+ {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ }
+
+ /*
+ * Status is now clean
+ */
+ boot_error = 0;
+
+ /*
+ * Starting actual IPI sequence...
+ */
+
+ boot_error = wakeup_secondary_via_INIT(apicid, start_eip);
+
+ if (!boot_error) {
+ /*
+ * allow APs to start initializing.
+ */
+ Dprintk("Before Callout %d.\n", cpu);
+ set_bit(cpu, &cpu_callout_map);
+ Dprintk("After Callout %d.\n", cpu);
+
+ /*
+ * Wait 5s total for a response
+ */
+ for (timeout = 0; timeout < 50000; timeout++) {
+ if (test_bit(cpu, &cpu_callin_map))
+ break; /* It has booted */
+ udelay(100);
+ }
+
+ if (test_bit(cpu, &cpu_callin_map)) {
+ /* number CPUs logically, starting from 1 (BSP is 0) */
+ printk("CPU%d has booted.\n", cpu);
+ } else {
+ boot_error= 1;
+ if (*((volatile unsigned long *)phys_to_virt(start_eip))
+ == 0xA5A5A5A5)
+ /* trampoline started but...? */
+ printk("Stuck ??\n");
+ else
+ /* trampoline code not run */
+ printk("Not responding.\n");
+#if APIC_DEBUG
+ inquire_remote_apic(apicid);
+#endif
+ }
+ }
+ if (boot_error) {
+ /* Try to put things back the way they were before ... */
+ unmap_cpu_to_boot_apicid(cpu, apicid);
+ clear_bit(cpu, &cpu_callout_map); /* was set here (do_boot_cpu()) */
+ clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
+ clear_bit(cpu, &cpu_online_map); /* was set in smp_callin() */
+ cpucount--;
+ }
+}
+
+
+/*
+ * Cycle through the processors sending APIC IPIs to boot each.
+ */
+
+static int boot_cpu_logical_apicid;
+/* Where the IO area was mapped on multiquad, always 0 otherwise */
+void *xquad_portio = NULL;
+
+void __init smp_boot_cpus(void)
+{
+ int apicid, bit;
+
+#ifdef CONFIG_MTRR
+ /* Must be done before other processors booted */
+ mtrr_init_boot_cpu ();
+#endif
+ /* Initialize the logical to physical CPU number mapping */
+ init_cpu_to_apicid();
+
+ /*
+ * Setup boot CPU information
+ */
+ smp_store_cpu_info(0); /* Final full version of the data */
+ printk("CPU%d booted\n", 0);
+
+ /*
+ * We have the boot CPU online for sure.
+ */
+ set_bit(0, &cpu_online_map);
+ boot_cpu_logical_apicid = logical_smp_processor_id();
+ map_cpu_to_boot_apicid(0, boot_cpu_apicid);
+
+ /*
+ * If we couldnt find an SMP configuration at boot time,
+ * get out of here now!
+ */
+ if (!smp_found_config) {
+ printk("SMP motherboard not detected.\n");
+ io_apic_irqs = 0;
+ cpu_online_map = phys_cpu_present_map = 1;
+ smp_num_cpus = 1;
+ if (APIC_init_uniprocessor())
+ printk("Local APIC not detected."
+ " Using dummy APIC emulation.\n");
+ goto smp_done;
+ }
+
+ /*
+ * Should not be necessary because the MP table should list the boot
+ * CPU too, but we do it for the sake of robustness anyway.
+ */
+ if (!test_bit(boot_cpu_physical_apicid, &phys_cpu_present_map)) {
+ printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
+ boot_cpu_physical_apicid);
+ phys_cpu_present_map |= (1 << hard_smp_processor_id());
+ }
+
+ /*
+ * If we couldn't find a local APIC, then get out of here now!
+ */
+ if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
+ !test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) {
+ printk("BIOS bug, local APIC #%d not detected!...\n",
+ boot_cpu_physical_apicid);
+ printk("... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
+ io_apic_irqs = 0;
+ cpu_online_map = phys_cpu_present_map = 1;
+ smp_num_cpus = 1;
+ goto smp_done;
+ }
+
+ verify_local_APIC();
+
+ /*
+ * If SMP should be disabled, then really disable it!
+ */
+ if (!max_cpus) {
+ smp_found_config = 0;
+ printk("SMP mode deactivated, forcing use of dummy APIC emulation.\n");
+ io_apic_irqs = 0;
+ cpu_online_map = phys_cpu_present_map = 1;
+ smp_num_cpus = 1;
+ goto smp_done;
+ }
+
+ connect_bsp_APIC();
+ setup_local_APIC();
+
+ if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid)
+ BUG();
+
+ /*
+ * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
+ *
+ * In clustered apic mode, phys_cpu_present_map is a constructed thus:
+ * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
+ * clustered apic ID.
+ */
+ Dprintk("CPU present map: %lx\n", phys_cpu_present_map);
+
+ for (bit = 0; bit < NR_CPUS; bit++) {
+ apicid = cpu_present_to_apicid(bit);
+ /*
+ * Don't even attempt to start the boot CPU!
+ */
+ if (apicid == boot_cpu_apicid)
+ continue;
+
+ if (!(phys_cpu_present_map & (1 << bit)))
+ continue;
+ if ((max_cpus >= 0) && (max_cpus <= cpucount+1))
+ continue;
+
+ do_boot_cpu(apicid);
+
+ /*
+ * Make sure we unmap all failed CPUs
+ */
+ if ((boot_apicid_to_cpu(apicid) == -1) &&
+ (phys_cpu_present_map & (1 << bit)))
+ printk("CPU #%d not responding - cannot use it.\n",
+ apicid);
+ }
+
+ /*
+ * Cleanup possible dangling ends...
+ */
+ /*
+ * Install writable page 0 entry to set BIOS data area.
+ */
+ local_flush_tlb();
+
+ /*
+ * Paranoid: Set warm reset code and vector here back
+ * to default values.
+ */
+ CMOS_WRITE(0, 0xf);
+
+ *((volatile long *) phys_to_virt(0x467)) = 0;
+
+ if (!cpucount) {
+ printk("Error: only one processor found.\n");
+ } else {
+ printk("Total of %d processors activated.\n", cpucount+1);
+ }
+ smp_num_cpus = cpucount + 1;
+
+ if (smp_b_stepping)
+ printk("WARNING: SMP operation may"
+ " be unreliable with B stepping processors.\n");
+ Dprintk("Boot done.\n");
+
+ /*
+ * Here we can be sure that there is an IO-APIC in the system. Let's
+ * go and set it up:
+ */
+ if ( nr_ioapics ) setup_IO_APIC();
+
+ /* Set up all local APIC timers in the system. */
+ setup_APIC_clocks();
+
+ /* Synchronize the TSC with the AP(s). */
+ if ( cpucount ) synchronize_tsc_bp();
+
+ smp_done:
+ ;
+}
+
+#endif /* CONFIG_SMP */
--- /dev/null
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
+ * (C) 2002-2003 University of Cambridge
+ ****************************************************************************
+ *
+ * File: i386/time.c
+ * Author: Rolf Neugebar & Keir Fraser
+ *
+ * Environment: Xen Hypervisor
+ * Description: modified version of Linux' time.c
+ * implements system and wall clock time.
+ * based on freebsd's implementation.
+ */
+
+/*
+ * linux/arch/i386/kernel/time.c
+ *
+ * Copyright (C) 1991, 1992, 1995 Linus Torvalds
+ */
+
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/lib.h>
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/interrupt.h>
+#include <xen/time.h>
+#include <xen/ac_timer.h>
+
+#include <asm/io.h>
+#include <xen/smp.h>
+#include <xen/irq.h>
+#include <asm/msr.h>
+#include <asm/mpspec.h>
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+#include <asm/mc146818rtc.h>
+
+extern rwlock_t xtime_lock;
+extern unsigned long wall_jiffies;
+
+/* GLOBAL */
+unsigned long cpu_khz; /* Detected as we calibrate the TSC */
+unsigned long ticks_per_usec; /* TSC ticks per microsecond. */
+spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
+int timer_ack = 0;
+int do_timer_lists_from_pit = 0;
+
+/* PRIVATE */
+static unsigned int rdtsc_bitshift; /* Which 32 bits of TSC do we use? */
+static u64 cpu_freq; /* CPU frequency (Hz) */
+static u32 st_scale_f; /* Cycles -> ns, fractional part */
+static u32 st_scale_i; /* Cycles -> ns, integer part */
+static u32 tsc_irq; /* CPU0's TSC at last 'time update' */
+static s_time_t stime_irq; /* System time at last 'time update' */
+
+static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ u64 full_tsc;
+
+ write_lock(&xtime_lock);
+
+#ifdef CONFIG_X86_IO_APIC
+ if ( timer_ack )
+ {
+ extern spinlock_t i8259A_lock;
+ spin_lock(&i8259A_lock);
+ outb(0x0c, 0x20);
+ /* Ack the IRQ; AEOI will end it automatically. */
+ inb(0x20);
+ spin_unlock(&i8259A_lock);
+ }
+#endif
+
+ /*
+ * Updates TSC timestamp (used to interpolate passage of time between
+ * interrupts).
+ */
+ rdtscll(full_tsc);
+ tsc_irq = (u32)(full_tsc >> rdtsc_bitshift);
+
+ /* Updates xtime (wallclock time). */
+ do_timer(regs);
+
+ /* Updates system time (nanoseconds since boot). */
+ stime_irq += MILLISECS(1000/HZ);
+
+ write_unlock(&xtime_lock);
+
+ /* Rough hack to allow accurate timers to sort-of-work with no APIC. */
+ if ( do_timer_lists_from_pit )
+ __cpu_raise_softirq(smp_processor_id(), AC_TIMER_SOFTIRQ);
+}
+
+static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0,
+ "timer", NULL, NULL};
+
+/* ------ Calibrate the TSC -------
+ * Return processor ticks per second / CALIBRATE_FRAC.
+ */
+
+#define CLOCK_TICK_RATE 1193180 /* system crystal frequency (Hz) */
+#define CALIBRATE_FRAC 20 /* calibrate over 50ms */
+#define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC)
+
+static unsigned long __init calibrate_tsc(void)
+{
+ unsigned long startlow, starthigh, endlow, endhigh, count;
+
+ /* Set the Gate high, disable speaker */
+ outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+
+ /*
+ * Now let's take care of CTC channel 2
+ *
+ * Set the Gate high, program CTC channel 2 for mode 0, (interrupt on
+ * terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB)
+ * to begin countdown.
+ */
+ outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */
+ outb(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */
+ outb(CALIBRATE_LATCH >> 8, 0x42); /* MSB of count */
+
+ rdtsc(startlow, starthigh);
+ for ( count = 0; (inb(0x61) & 0x20) == 0; count++ )
+ continue;
+ rdtsc(endlow, endhigh);
+
+ /* Error if the CTC doesn't behave itself. */
+ if ( count == 0 )
+ return 0;
+
+ /* [endhigh:endlow] = [endhigh:endlow] - [starthigh:startlow] */
+ __asm__( "subl %2,%0 ; sbbl %3,%1"
+ : "=a" (endlow), "=d" (endhigh)
+ : "g" (startlow), "g" (starthigh), "0" (endlow), "1" (endhigh) );
+
+ /* If quotient doesn't fit in 32 bits then we return error (zero). */
+ return endhigh ? 0 : endlow;
+}
+
+
+/***************************************************************************
+ * CMOS Timer functions
+ ***************************************************************************/
+
+/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
+ * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
+ * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
+ *
+ * [For the Julian calendar (which was used in Russia before 1917,
+ * Britain & colonies before 1752, anywhere else before 1582,
+ * and is still in use by some communities) leave out the
+ * -year/100+year/400 terms, and add 10.]
+ *
+ * This algorithm was first published by Gauss (I think).
+ *
+ * WARNING: this function will overflow on 2106-02-07 06:28:16 on
+ * machines were long is 32-bit! (However, as time_t is signed, we
+ * will already get problems at other places on 2038-01-19 03:14:08)
+ */
+static inline unsigned long
+mktime (unsigned int year, unsigned int mon,
+ unsigned int day, unsigned int hour,
+ unsigned int min, unsigned int sec)
+{
+ /* 1..12 -> 11,12,1..10: put Feb last since it has a leap day. */
+ if ( 0 >= (int) (mon -= 2) )
+ {
+ mon += 12;
+ year -= 1;
+ }
+
+ return ((((unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day)+
+ year*365 - 719499
+ )*24 + hour /* now have hours */
+ )*60 + min /* now have minutes */
+ )*60 + sec; /* finally seconds */
+}
+
+static unsigned long __get_cmos_time(void)
+{
+ unsigned int year, mon, day, hour, min, sec;
+
+ sec = CMOS_READ(RTC_SECONDS);
+ min = CMOS_READ(RTC_MINUTES);
+ hour = CMOS_READ(RTC_HOURS);
+ day = CMOS_READ(RTC_DAY_OF_MONTH);
+ mon = CMOS_READ(RTC_MONTH);
+ year = CMOS_READ(RTC_YEAR);
+
+ if ( !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD )
+ {
+ BCD_TO_BIN(sec);
+ BCD_TO_BIN(min);
+ BCD_TO_BIN(hour);
+ BCD_TO_BIN(day);
+ BCD_TO_BIN(mon);
+ BCD_TO_BIN(year);
+ }
+
+ if ( (year += 1900) < 1970 )
+ year += 100;
+
+ return mktime(year, mon, day, hour, min, sec);
+}
+
+static unsigned long get_cmos_time(void)
+{
+ unsigned long res, flags;
+ int i;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+
+ /* read RTC exactly on falling edge of update flag */
+ for ( i = 0 ; i < 1000000 ; i++ ) /* may take up to 1 second... */
+ if ( (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) )
+ break;
+ for ( i = 0 ; i < 1000000 ; i++ ) /* must try at least 2.228 ms */
+ if ( !(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) )
+ break;
+
+ res = __get_cmos_time();
+
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ return res;
+}
+
+/***************************************************************************
+ * System Time
+ ***************************************************************************/
+
+static inline u64 get_time_delta(void)
+{
+ s32 delta_tsc;
+ u32 low;
+ u64 delta, tsc;
+
+ rdtscll(tsc);
+ low = (u32)(tsc >> rdtsc_bitshift);
+ delta_tsc = (s32)(low - tsc_irq);
+ if ( unlikely(delta_tsc < 0) ) delta_tsc = 0;
+ delta = ((u64)delta_tsc * st_scale_f);
+ delta >>= 32;
+ delta += ((u64)delta_tsc * st_scale_i);
+
+ return delta;
+}
+
+s_time_t get_s_time(void)
+{
+ s_time_t now;
+ unsigned long flags;
+
+ read_lock_irqsave(&xtime_lock, flags);
+
+ now = stime_irq + get_time_delta();
+
+ /* Ensure that the returned system time is monotonically increasing. */
+ {
+ static s_time_t prev_now = 0;
+ if ( unlikely(now < prev_now) )
+ now = prev_now;
+ prev_now = now;
+ }
+
+ read_unlock_irqrestore(&xtime_lock, flags);
+
+ return now;
+}
+
+
+void update_dom_time(shared_info_t *si)
+{
+ unsigned long flags;
+
+ read_lock_irqsave(&xtime_lock, flags);
+
+ si->time_version1++;
+ wmb();
+
+ si->cpu_freq = cpu_freq;
+ si->tsc_timestamp.tsc_bitshift = rdtsc_bitshift;
+ si->tsc_timestamp.tsc_bits = tsc_irq;
+ si->system_time = stime_irq;
+ si->wc_sec = xtime.tv_sec;
+ si->wc_usec = xtime.tv_usec;
+ si->wc_usec += (jiffies - wall_jiffies) * (1000000 / HZ);
+ while ( si->wc_usec >= 1000000 )
+ {
+ si->wc_usec -= 1000000;
+ si->wc_sec++;
+ }
+
+ wmb();
+ si->time_version2++;
+
+ read_unlock_irqrestore(&xtime_lock, flags);
+}
+
+
+/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
+void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base)
+{
+ s64 delta;
+ long _usecs = (long)usecs;
+
+ write_lock_irq(&xtime_lock);
+
+ delta = (s64)(stime_irq - system_time_base);
+
+ _usecs += (long)(delta/1000);
+ _usecs -= (jiffies - wall_jiffies) * (1000000 / HZ);
+
+ while ( _usecs < 0 )
+ {
+ _usecs += 1000000;
+ secs--;
+ }
+
+ xtime.tv_sec = secs;
+ xtime.tv_usec = _usecs;
+
+ write_unlock_irq(&xtime_lock);
+
+ update_dom_time(current->shared_info);
+}
+
+
+/* Late init function (after all CPUs are booted). */
+int __init init_xen_time()
+{
+ u64 scale;
+ u64 full_tsc;
+ unsigned int cpu_ghz;
+
+ cpu_ghz = (unsigned int)(cpu_freq / 1000000000ULL);
+ for ( rdtsc_bitshift = 0; cpu_ghz != 0; rdtsc_bitshift++, cpu_ghz >>= 1 )
+ continue;
+
+ scale = 1000000000LL << (32 + rdtsc_bitshift);
+ scale /= cpu_freq;
+ st_scale_f = scale & 0xffffffff;
+ st_scale_i = scale >> 32;
+
+ /* System time ticks from zero. */
+ rdtscll(full_tsc);
+ stime_irq = (s_time_t)0;
+ tsc_irq = (u32)(full_tsc >> rdtsc_bitshift);
+
+ /* Wallclock time starts as the initial RTC time. */
+ xtime.tv_sec = get_cmos_time();
+
+ printk("Time init:\n");
+ printk(".... System Time: %lldns\n",
+ NOW());
+ printk(".... cpu_freq: %08X:%08X\n",
+ (u32)(cpu_freq>>32), (u32)cpu_freq);
+ printk(".... scale: %08X:%08X\n",
+ (u32)(scale>>32), (u32)scale);
+ printk(".... Wall Clock: %lds %ldus\n",
+ xtime.tv_sec, xtime.tv_usec);
+
+ return 0;
+}
+
+
+/* Early init function. */
+void __init time_init(void)
+{
+ unsigned long ticks_per_frac = calibrate_tsc();
+
+ if ( !ticks_per_frac )
+ panic("Error calibrating TSC\n");
+
+ ticks_per_usec = ticks_per_frac / (1000000/CALIBRATE_FRAC);
+ cpu_khz = ticks_per_frac / (1000/CALIBRATE_FRAC);
+
+ cpu_freq = (u64)ticks_per_frac * (u64)CALIBRATE_FRAC;
+
+ printk("Detected %lu.%03lu MHz processor.\n",
+ cpu_khz / 1000, cpu_khz % 1000);
+
+ setup_irq(0, &irq0);
+}
--- /dev/null
+/*
+ *
+ * Trampoline.S Derived from Setup.S by Linus Torvalds
+ *
+ * 4 Jan 1997 Michael Chastain: changed to gnu as.
+ *
+ * Entry: CS:IP point to the start of our code, we are
+ * in real mode with no stack, but the rest of the
+ * trampoline page to make our stack and everything else
+ * is a mystery.
+ *
+ * On entry to trampoline_data, the processor is in real mode
+ * with 16-bit addressing and 16-bit data. CS has some value
+ * and IP is zero. Thus, data addresses need to be absolute
+ * (no relocation) and are taken with regard to r_base.
+ */
+
+#include <xen/config.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_SMP
+
+.data
+
+.code16
+
+ENTRY(trampoline_data)
+r_base = .
+ mov %cs, %ax # Code and data in the same place
+ mov %ax, %ds
+
+ movl $0xA5A5A5A5, %ebx # Flag an SMP trampoline
+ cli # We should be safe anyway
+
+ movl $0xA5A5A5A5, trampoline_data - r_base
+
+ lidt idt_48 - r_base # load idt with 0, 0
+ lgdt gdt_48 - r_base # load gdt with whatever is appropriate
+
+ xor %ax, %ax
+ inc %ax # protected mode (PE) bit
+ lmsw %ax # into protected mode
+ jmp flush_instr
+flush_instr:
+ ljmpl $__HYPERVISOR_CS, $(MONITOR_BASE)-__PAGE_OFFSET
+
+idt_48:
+ .word 0 # idt limit = 0
+ .word 0, 0 # idt base = 0L
+
+gdt_48:
+ .word (LAST_RESERVED_GDT_ENTRY*8)+7
+ .long gdt_table-__PAGE_OFFSET
+
+.globl SYMBOL_NAME(trampoline_end)
+SYMBOL_NAME_LABEL(trampoline_end)
+
+#endif /* CONFIG_SMP */
--- /dev/null
+/******************************************************************************
+ * arch/i386/traps.c
+ *
+ * Modifications to Linux original are copyright (c) 2002-2003, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * xen/arch/i386/traps.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/interrupt.h>
+#include <xen/sched.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/mm.h>
+#include <asm/ptrace.h>
+#include <xen/delay.h>
+#include <xen/spinlock.h>
+#include <xen/irq.h>
+#include <xen/perfc.h>
+#include <xen/shadow.h>
+#include <asm/domain_page.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/desc.h>
+#include <asm/debugreg.h>
+#include <asm/smp.h>
+#include <asm/flushtlb.h>
+#include <asm/uaccess.h>
+#include <asm/i387.h>
+#include <asm/pdb.h>
+
+#define GTBF_TRAP 1
+#define GTBF_TRAP_NOCODE 2
+#define GTBF_TRAP_CR2 4
+struct guest_trap_bounce {
+ unsigned long error_code; /* 0 */
+ unsigned long cr2; /* 4 */
+ unsigned short flags; /* 8 */
+ unsigned short cs; /* 10 */
+ unsigned long eip; /* 12 */
+} guest_trap_bounce[NR_CPUS] = { { 0 } };
+
+#define DOUBLEFAULT_STACK_SIZE 1024
+static struct tss_struct doublefault_tss;
+static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
+
+asmlinkage int hypervisor_call(void);
+asmlinkage void lcall7(void);
+asmlinkage void lcall27(void);
+
+/* Master table, and the one used by CPU0. */
+struct desc_struct idt_table[256] = { {0, 0}, };
+/* All other CPUs have their own copy. */
+struct desc_struct *idt_tables[NR_CPUS] = { 0 };
+
+asmlinkage void divide_error(void);
+asmlinkage void debug(void);
+asmlinkage void nmi(void);
+asmlinkage void int3(void);
+asmlinkage void overflow(void);
+asmlinkage void bounds(void);
+asmlinkage void invalid_op(void);
+asmlinkage void device_not_available(void);
+asmlinkage void coprocessor_segment_overrun(void);
+asmlinkage void invalid_TSS(void);
+asmlinkage void segment_not_present(void);
+asmlinkage void stack_segment(void);
+asmlinkage void general_protection(void);
+asmlinkage void page_fault(void);
+asmlinkage void coprocessor_error(void);
+asmlinkage void simd_coprocessor_error(void);
+asmlinkage void alignment_check(void);
+asmlinkage void spurious_interrupt_bug(void);
+asmlinkage void machine_check(void);
+
+int kstack_depth_to_print = 8*20;
+
+static inline int kernel_text_address(unsigned long addr)
+{
+ if (addr >= (unsigned long) &_stext &&
+ addr <= (unsigned long) &_etext)
+ return 1;
+ return 0;
+
+}
+
+void show_stack(unsigned long *esp)
+{
+ unsigned long *stack, addr;
+ int i;
+
+ printk("Stack trace from ESP=%p:\n", esp);
+
+ stack = esp;
+ for ( i = 0; i < kstack_depth_to_print; i++ )
+ {
+ if ( ((long)stack & (STACK_SIZE-1)) == 0 )
+ break;
+ if ( i && ((i % 8) == 0) )
+ printk("\n ");
+ if ( kernel_text_address(*stack) )
+ printk("[%08lx] ", *stack++);
+ else
+ printk("%08lx ", *stack++);
+ }
+ printk("\n");
+
+ printk("Call Trace from ESP=%p: ", esp);
+ stack = esp;
+ i = 0;
+ while (((long) stack & (STACK_SIZE-1)) != 0) {
+ addr = *stack++;
+ if (kernel_text_address(addr)) {
+ if (i && ((i % 6) == 0))
+ printk("\n ");
+ printk("[<%08lx>] ", addr);
+ i++;
+ }
+ }
+ printk("\n");
+}
+
+void show_registers(struct pt_regs *regs)
+{
+ unsigned long esp;
+ unsigned short ss;
+
+ esp = (unsigned long) (®s->esp);
+ ss = __HYPERVISOR_DS;
+ if ( regs->xcs & 3 )
+ {
+ esp = regs->esp;
+ ss = regs->xss & 0xffff;
+ }
+
+ printk("CPU: %d\nEIP: %04x:[<%08lx>] \nEFLAGS: %08lx\n",
+ smp_processor_id(), 0xffff & regs->xcs, regs->eip, regs->eflags);
+ printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
+ regs->eax, regs->ebx, regs->ecx, regs->edx);
+ printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
+ regs->esi, regs->edi, regs->ebp, esp);
+ printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
+ regs->xds & 0xffff, regs->xes & 0xffff,
+ regs->xfs & 0xffff, regs->xgs & 0xffff, ss);
+
+ show_stack(®s->esp);
+}
+
+
+spinlock_t die_lock = SPIN_LOCK_UNLOCKED;
+
+void die(const char * str, struct pt_regs * regs, long err)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&die_lock, flags);
+ printk("%s: %04lx,%04lx\n", str, err >> 16, err & 0xffff);
+ show_registers(regs);
+ spin_unlock_irqrestore(&die_lock, flags);
+ panic("HYPERVISOR DEATH!!\n");
+}
+
+
+static inline void do_trap(int trapnr, char *str,
+ struct pt_regs *regs,
+ long error_code, int use_error_code)
+{
+ struct task_struct *p = current;
+ struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
+ trap_info_t *ti;
+ unsigned long fixup;
+
+ if (!(regs->xcs & 3))
+ goto fault_in_hypervisor;
+
+ ti = current->thread.traps + trapnr;
+ gtb->flags = use_error_code ? GTBF_TRAP : GTBF_TRAP_NOCODE;
+ gtb->error_code = error_code;
+ gtb->cs = ti->cs;
+ gtb->eip = ti->address;
+ if ( TI_GET_IF(ti) )
+ p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
+ return;
+
+ fault_in_hypervisor:
+
+ if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
+ {
+ DPRINTK("Trap %d: %08lx -> %08lx\n", trapnr, regs->eip, fixup);
+ regs->eip = fixup;
+ regs->xds = regs->xes = regs->xfs = regs->xgs = __HYPERVISOR_DS;
+ return;
+ }
+
+ show_registers(regs);
+ panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
+ "[error_code=%08x]\n",
+ smp_processor_id(), trapnr, str, error_code);
+}
+
+#define DO_ERROR_NOCODE(trapnr, str, name) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+do_trap(trapnr, str, regs, error_code, 0); \
+}
+
+#define DO_ERROR(trapnr, str, name) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+do_trap(trapnr, str, regs, error_code, 1); \
+}
+
+DO_ERROR_NOCODE( 0, "divide error", divide_error)
+ DO_ERROR_NOCODE( 4, "overflow", overflow)
+ DO_ERROR_NOCODE( 5, "bounds", bounds)
+ DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
+ DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
+ DO_ERROR(10, "invalid TSS", invalid_TSS)
+ DO_ERROR(11, "segment not present", segment_not_present)
+ DO_ERROR(12, "stack segment", stack_segment)
+/* Vector 15 reserved by Intel */
+ DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
+ DO_ERROR(17, "alignment check", alignment_check)
+ DO_ERROR_NOCODE(18, "machine check", machine_check)
+ DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
+
+ asmlinkage void do_int3(struct pt_regs *regs, long error_code)
+{
+ struct task_struct *p = current;
+ struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
+ trap_info_t *ti;
+
+#ifdef XEN_DEBUGGER
+ if ( pdb_initialized && pdb_handle_exception(3, regs) == 0 )
+ return;
+#endif
+
+ if ( (regs->xcs & 3) != 3 )
+ {
+ if ( unlikely((regs->xcs & 3) == 0) )
+ {
+ show_registers(regs);
+ panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n"
+ "[error_code=%08x]\n",
+ smp_processor_id(), error_code);
+ }
+ }
+
+ ti = current->thread.traps + 3;
+ gtb->flags = GTBF_TRAP_NOCODE;
+ gtb->error_code = error_code;
+ gtb->cs = ti->cs;
+ gtb->eip = ti->address;
+ if ( TI_GET_IF(ti) )
+ p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
+}
+
+asmlinkage void do_double_fault(void)
+{
+ extern spinlock_t console_lock;
+ struct tss_struct *tss = &doublefault_tss;
+ unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
+
+ /* Disable the NMI watchdog. It's useless now. */
+ watchdog_on = 0;
+
+ /* Find information saved during fault and dump it to the console. */
+ tss = &init_tss[cpu];
+ printk("CPU: %d\nEIP: %04x:[<%08lx>] \nEFLAGS: %08lx\n",
+ cpu, tss->cs, tss->eip, tss->eflags);
+ printk("CR3: %08lx\n", tss->__cr3);
+ printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
+ tss->eax, tss->ebx, tss->ecx, tss->edx);
+ printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
+ tss->esi, tss->edi, tss->ebp, tss->esp);
+ printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
+ tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
+ printk("************************************\n");
+ printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu);
+ printk("System needs manual reset.\n");
+ printk("************************************\n");
+
+ /* Lock up the console to prevent spurious output from other CPUs. */
+ spin_lock(&console_lock);
+
+ /* Wait for manual reset. */
+ for ( ; ; ) ;
+}
+
+asmlinkage void do_page_fault(struct pt_regs *regs, long error_code)
+{
+ struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
+ trap_info_t *ti;
+ unsigned long off, addr, fixup;
+ struct task_struct *p = current;
+ extern int map_ldt_shadow_page(unsigned int);
+
+ __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : );
+
+ perfc_incrc(page_faults);
+
+ if ( unlikely(addr >= LDT_VIRT_START) &&
+ (addr < (LDT_VIRT_START + (p->mm.ldt_ents*LDT_ENTRY_SIZE))) )
+ {
+ /*
+ * Copy a mapping from the guest's LDT, if it is valid. Otherwise we
+ * send the fault up to the guest OS to be handled.
+ */
+ off = addr - LDT_VIRT_START;
+ addr = p->mm.ldt_base + off;
+ if ( likely(map_ldt_shadow_page(off >> PAGE_SHIFT)) )
+ return; /* successfully copied the mapping */
+ }
+
+ if ( unlikely(p->mm.shadow_mode) &&
+ (addr < PAGE_OFFSET) && shadow_fault(addr, error_code) )
+ return; /* Returns TRUE if fault was handled. */
+
+ if ( unlikely(!(regs->xcs & 3)) )
+ goto fault_in_hypervisor;
+
+ ti = p->thread.traps + 14;
+ gtb->flags = GTBF_TRAP_CR2; /* page fault pushes %cr2 */
+ gtb->cr2 = addr;
+ gtb->error_code = error_code;
+ gtb->cs = ti->cs;
+ gtb->eip = ti->address;
+ if ( TI_GET_IF(ti) )
+ p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
+ return;
+
+ fault_in_hypervisor:
+
+ if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
+ {
+ perfc_incrc(copy_user_faults);
+ if ( !p->mm.shadow_mode )
+ DPRINTK("Page fault: %08lx -> %08lx\n", regs->eip, fixup);
+ regs->eip = fixup;
+ regs->xds = regs->xes = regs->xfs = regs->xgs = __HYPERVISOR_DS;
+ return;
+ }
+
+ if ( addr >= PAGE_OFFSET )
+ {
+ unsigned long page;
+ page = l2_pgentry_val(idle_pg_table[addr >> L2_PAGETABLE_SHIFT]);
+ printk("*pde = %08lx\n", page);
+ if ( page & _PAGE_PRESENT )
+ {
+ page &= PAGE_MASK;
+ page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT];
+ printk(" *pte = %08lx\n", page);
+ }
+#ifdef MEMORY_GUARD
+ if ( !(error_code & 1) )
+ printk(" -- POSSIBLY AN ACCESS TO FREED MEMORY? --\n");
+#endif
+ }
+
+#ifdef XEN_DEBUGGER
+ if ( pdb_page_fault_possible )
+ {
+ pdb_page_fault = 1;
+ /* make eax & edx valid to complete the instruction */
+ regs->eax = (long)&pdb_page_fault_scratch;
+ regs->edx = (long)&pdb_page_fault_scratch;
+ return;
+ }
+#endif
+
+ show_registers(regs);
+ panic("CPU%d FATAL PAGE FAULT\n"
+ "[error_code=%08x]\n"
+ "Faulting linear address might be %08lx\n",
+ smp_processor_id(), error_code, addr);
+}
+
+asmlinkage void do_general_protection(struct pt_regs *regs, long error_code)
+{
+ struct task_struct *p = current;
+ struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
+ trap_info_t *ti;
+ unsigned long fixup;
+
+ /* Badness if error in ring 0, or result of an interrupt. */
+ if ( !(regs->xcs & 3) || (error_code & 1) )
+ goto gp_in_kernel;
+
+ /*
+ * Cunning trick to allow arbitrary "INT n" handling.
+ *
+ * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
+ * instruction from trapping to the appropriate vector, when that might not
+ * be expected by Xen or the guest OS. For example, that entry might be for
+ * a fault handler (unlike traps, faults don't increment EIP), or might
+ * expect an error code on the stack (which a software trap never
+ * provides), or might be a hardware interrupt handler that doesn't like
+ * being called spuriously.
+ *
+ * Instead, a GPF occurs with the faulting IDT vector in the error code.
+ * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is
+ * clear to indicate that it's a software fault, not hardware.
+ *
+ * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
+ * okay because they can only be triggered by an explicit DPL-checked
+ * instruction. The DPL specified by the guest OS for these vectors is NOT
+ * CHECKED!!
+ */
+ if ( (error_code & 3) == 2 )
+ {
+ /* This fault must be due to <INT n> instruction. */
+ ti = current->thread.traps + (error_code>>3);
+ if ( TI_GET_DPL(ti) >= (regs->xcs & 3) )
+ {
+#ifdef XEN_DEBUGGER
+ if ( pdb_initialized && (pdb_ctx.system_call != 0) )
+ {
+ unsigned long cr3;
+ __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : );
+ if ( cr3 == pdb_ctx.ptbr )
+ pdb_linux_syscall_enter_bkpt(regs, error_code, ti);
+ }
+#endif
+
+ gtb->flags = GTBF_TRAP_NOCODE;
+ regs->eip += 2;
+ goto finish_propagation;
+ }
+ }
+
+ /* Pass on GPF as is. */
+ ti = current->thread.traps + 13;
+ gtb->flags = GTBF_TRAP;
+ gtb->error_code = error_code;
+ finish_propagation:
+ gtb->cs = ti->cs;
+ gtb->eip = ti->address;
+ if ( TI_GET_IF(ti) )
+ p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
+ return;
+
+ gp_in_kernel:
+
+ if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
+ {
+ DPRINTK("GPF (%04lx): %08lx -> %08lx\n", error_code, regs->eip, fixup);
+ regs->eip = fixup;
+ regs->xds = regs->xes = regs->xfs = regs->xgs = __HYPERVISOR_DS;
+ return;
+ }
+
+ die("general protection fault", regs, error_code);
+}
+
+asmlinkage void mem_parity_error(unsigned char reason, struct pt_regs * regs)
+{
+ printk("NMI received. Dazed and confused, but trying to continue\n");
+ printk("You probably have a hardware problem with your RAM chips\n");
+
+ /* Clear and disable the memory parity error line. */
+ reason = (reason & 0xf) | 4;
+ outb(reason, 0x61);
+
+ show_registers(regs);
+ panic("PARITY ERROR");
+}
+
+asmlinkage void io_check_error(unsigned char reason, struct pt_regs * regs)
+{
+ printk("NMI: IOCK error (debug interrupt?)\n");
+
+ reason = (reason & 0xf) | 8;
+ outb(reason, 0x61);
+
+ show_registers(regs);
+ panic("IOCK ERROR");
+}
+
+static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+{
+ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
+ printk("Dazed and confused, but trying to continue\n");
+ printk("Do you have a strange power saving mode enabled?\n");
+}
+
+asmlinkage void do_nmi(struct pt_regs * regs, unsigned long reason)
+{
+ ++nmi_count(smp_processor_id());
+
+#if CONFIG_X86_LOCAL_APIC
+ if ( nmi_watchdog )
+ nmi_watchdog_tick(regs);
+ else
+#endif
+ unknown_nmi_error((unsigned char)(reason&0xff), regs);
+}
+
+asmlinkage void math_state_restore(struct pt_regs *regs, long error_code)
+{
+ /* Prevent recursion. */
+ clts();
+
+ if ( !test_bit(PF_USEDFPU, ¤t->flags) )
+ {
+ if ( test_bit(PF_DONEFPUINIT, ¤t->flags) )
+ restore_fpu(current);
+ else
+ init_fpu();
+ set_bit(PF_USEDFPU, ¤t->flags); /* so we fnsave on switch_to() */
+ }
+
+ if ( test_and_clear_bit(PF_GUEST_STTS, ¤t->flags) )
+ {
+ struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
+ gtb->flags = GTBF_TRAP_NOCODE;
+ gtb->cs = current->thread.traps[7].cs;
+ gtb->eip = current->thread.traps[7].address;
+ }
+}
+
+#ifdef XEN_DEBUGGER
+asmlinkage void do_pdb_debug(struct pt_regs *regs, long error_code)
+{
+ unsigned int condition;
+ struct task_struct *tsk = current;
+ struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
+
+ __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
+ if ( (condition & (1 << 14)) != (1 << 14) )
+ printk("\nwarning: debug trap w/o BS bit [0x%x]\n\n", condition);
+ __asm__("movl %0,%%db6" : : "r" (0));
+
+ if ( pdb_handle_exception(1, regs) != 0 )
+ {
+ tsk->thread.debugreg[6] = condition;
+
+ gtb->flags = GTBF_TRAP_NOCODE;
+ gtb->cs = tsk->thread.traps[1].cs;
+ gtb->eip = tsk->thread.traps[1].address;
+ }
+}
+#endif
+
+asmlinkage void do_debug(struct pt_regs *regs, long error_code)
+{
+ unsigned int condition;
+ struct task_struct *tsk = current;
+ struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
+
+#ifdef XEN_DEBUGGER
+ if ( pdb_initialized )
+ return do_pdb_debug(regs, error_code);
+#endif
+
+ __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
+
+ /* Mask out spurious debug traps due to lazy DR7 setting */
+ if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
+ (tsk->thread.debugreg[7] == 0) )
+ {
+ __asm__("movl %0,%%db7" : : "r" (0));
+ return;
+ }
+
+ if ( (regs->xcs & 3) == 0 )
+ {
+ /* Clear TF just for absolute sanity. */
+ regs->eflags &= ~EF_TF;
+ /*
+ * Basically, we ignore watchpoints when they trigger in
+ * the hypervisor. This may happen when a buffer is passed
+ * to us which previously had a watchpoint set on it.
+ * No need to bump EIP; the only faulting trap is an
+ * instruction breakpoint, which can't happen to us.
+ */
+ return;
+ }
+
+ /* Save debug status register where guest OS can peek at it */
+ tsk->thread.debugreg[6] = condition;
+
+ gtb->flags = GTBF_TRAP_NOCODE;
+ gtb->cs = tsk->thread.traps[1].cs;
+ gtb->eip = tsk->thread.traps[1].address;
+}
+
+
+asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs,
+ long error_code)
+{ /* nothing */ }
+
+
+#define _set_gate(gate_addr,type,dpl,addr) \
+do { \
+ int __d0, __d1; \
+ __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
+ "movw %4,%%dx\n\t" \
+ "movl %%eax,%0\n\t" \
+ "movl %%edx,%1" \
+ :"=m" (*((long *) (gate_addr))), \
+ "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
+ :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
+ "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
+} while (0)
+
+void set_intr_gate(unsigned int n, void *addr)
+{
+ _set_gate(idt_table+n,14,0,addr);
+}
+
+static void __init set_system_gate(unsigned int n, void *addr)
+{
+ _set_gate(idt_table+n,14,3,addr);
+}
+
+static void set_task_gate(unsigned int n, unsigned int sel)
+{
+ idt_table[n].a = sel << 16;
+ idt_table[n].b = 0x8500;
+}
+
+#define _set_seg_desc(gate_addr,type,dpl,base,limit) {\
+ *((gate_addr)+1) = ((base) & 0xff000000) | \
+ (((base) & 0x00ff0000)>>16) | \
+ ((limit) & 0xf0000) | \
+ ((dpl)<<13) | \
+ (0x00408000) | \
+ ((type)<<8); \
+ *(gate_addr) = (((base) & 0x0000ffff)<<16) | \
+ ((limit) & 0x0ffff); }
+
+#define _set_tssldt_desc(n,addr,limit,type) \
+__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
+ "movw %%ax,2(%2)\n\t" \
+ "rorl $16,%%eax\n\t" \
+ "movb %%al,4(%2)\n\t" \
+ "movb %4,5(%2)\n\t" \
+ "movb $0,6(%2)\n\t" \
+ "movb %%ah,7(%2)\n\t" \
+ "rorl $16,%%eax" \
+ : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type))
+
+void set_tss_desc(unsigned int n, void *addr)
+{
+ _set_tssldt_desc(gdt_table+__TSS(n), (int)addr, 8299, 0x89);
+}
+
+void __init trap_init(void)
+{
+ /*
+ * Make a separate task for double faults. This will get us debug output if
+ * we blow the kernel stack.
+ */
+ struct tss_struct *tss = &doublefault_tss;
+ memset(tss, 0, sizeof(*tss));
+ tss->ds = __HYPERVISOR_DS;
+ tss->es = __HYPERVISOR_DS;
+ tss->ss = __HYPERVISOR_DS;
+ tss->esp = (unsigned long)
+ &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
+ tss->__cr3 = __pa(idle_pg_table);
+ tss->cs = __HYPERVISOR_CS;
+ tss->eip = (unsigned long)do_double_fault;
+ tss->eflags = 2;
+ tss->bitmap = INVALID_IO_BITMAP_OFFSET;
+ _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY,
+ (int)tss, 235, 0x89);
+
+ /*
+ * Note that interrupt gates are always used, rather than trap gates. We
+ * must have interrupts disabled until DS/ES/FS/GS are saved because the
+ * first activation must have the "bad" value(s) for these registers and
+ * we may lose them if another activation is installed before they are
+ * saved. The page-fault handler also needs interrupts disabled until %cr2
+ * has been read and saved on the stack.
+ */
+ set_intr_gate(0,÷_error);
+ set_intr_gate(1,&debug);
+ set_intr_gate(2,&nmi);
+ set_system_gate(3,&int3); /* usable from all privilege levels */
+ set_system_gate(4,&overflow); /* usable from all privilege levels */
+ set_intr_gate(5,&bounds);
+ set_intr_gate(6,&invalid_op);
+ set_intr_gate(7,&device_not_available);
+ set_task_gate(8,__DOUBLEFAULT_TSS_ENTRY<<3);
+ set_intr_gate(9,&coprocessor_segment_overrun);
+ set_intr_gate(10,&invalid_TSS);
+ set_intr_gate(11,&segment_not_present);
+ set_intr_gate(12,&stack_segment);
+ set_intr_gate(13,&general_protection);
+ set_intr_gate(14,&page_fault);
+ set_intr_gate(15,&spurious_interrupt_bug);
+ set_intr_gate(16,&coprocessor_error);
+ set_intr_gate(17,&alignment_check);
+ set_intr_gate(18,&machine_check);
+ set_intr_gate(19,&simd_coprocessor_error);
+
+ /* Only ring 1 can access monitor services. */
+ _set_gate(idt_table+HYPERVISOR_CALL_VECTOR,14,1,&hypervisor_call);
+
+ /* CPU0 uses the master IDT. */
+ idt_tables[0] = idt_table;
+
+ /*
+ * Should be a barrier for any external CPU state.
+ */
+ {
+ extern void cpu_init(void);
+ cpu_init();
+ }
+}
+
+
+long do_set_trap_table(trap_info_t *traps)
+{
+ trap_info_t cur;
+ trap_info_t *dst = current->thread.traps;
+
+ for ( ; ; )
+ {
+ if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT;
+
+ if ( cur.address == 0 ) break;
+
+ if ( !VALID_CODESEL(cur.cs) ) return -EPERM;
+
+ memcpy(dst+cur.vector, &cur, sizeof(cur));
+ traps++;
+ }
+
+ return 0;
+}
+
+
+long do_set_callbacks(unsigned long event_selector,
+ unsigned long event_address,
+ unsigned long failsafe_selector,
+ unsigned long failsafe_address)
+{
+ struct task_struct *p = current;
+
+ if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) )
+ return -EPERM;
+
+ p->event_selector = event_selector;
+ p->event_address = event_address;
+ p->failsafe_selector = failsafe_selector;
+ p->failsafe_address = failsafe_address;
+
+ return 0;
+}
+
+
+long set_fast_trap(struct task_struct *p, int idx)
+{
+ trap_info_t *ti;
+
+ /* Index 0 is special: it disables fast traps. */
+ if ( idx == 0 )
+ {
+ if ( p == current )
+ CLEAR_FAST_TRAP(&p->thread);
+ SET_DEFAULT_FAST_TRAP(&p->thread);
+ return 0;
+ }
+
+ /*
+ * We only fast-trap vectors 0x20-0x2f, and vector 0x80.
+ * The former range is used by Windows and MS-DOS.
+ * Vector 0x80 is used by Linux and the BSD variants.
+ */
+ if ( (idx != 0x80) && ((idx < 0x20) || (idx > 0x2f)) )
+ return -1;
+
+ ti = p->thread.traps + idx;
+
+ /*
+ * We can't virtualise interrupt gates, as there's no way to get
+ * the CPU to automatically clear the events_mask variable.
+ */
+ if ( TI_GET_IF(ti) )
+ return -1;
+
+ if ( p == current )
+ CLEAR_FAST_TRAP(&p->thread);
+
+ p->thread.fast_trap_idx = idx;
+ p->thread.fast_trap_desc.a = (ti->cs << 16) | (ti->address & 0xffff);
+ p->thread.fast_trap_desc.b =
+ (ti->address & 0xffff0000) | 0x8f00 | (TI_GET_DPL(ti)&3)<<13;
+
+ if ( p == current )
+ SET_FAST_TRAP(&p->thread);
+
+ return 0;
+}
+
+
+long do_set_fast_trap(int idx)
+{
+ return set_fast_trap(current, idx);
+}
+
+
+long do_fpu_taskswitch(void)
+{
+ set_bit(PF_GUEST_STTS, ¤t->flags);
+ stts();
+ return 0;
+}
+
+
+long set_debugreg(struct task_struct *p, int reg, unsigned long value)
+{
+ int i;
+
+ switch ( reg )
+ {
+ case 0:
+ if ( value > (PAGE_OFFSET-4) ) return -EPERM;
+ if ( p == current )
+ __asm__ ( "movl %0, %%db0" : : "r" (value) );
+ break;
+ case 1:
+ if ( value > (PAGE_OFFSET-4) ) return -EPERM;
+ if ( p == current )
+ __asm__ ( "movl %0, %%db1" : : "r" (value) );
+ break;
+ case 2:
+ if ( value > (PAGE_OFFSET-4) ) return -EPERM;
+ if ( p == current )
+ __asm__ ( "movl %0, %%db2" : : "r" (value) );
+ break;
+ case 3:
+ if ( value > (PAGE_OFFSET-4) ) return -EPERM;
+ if ( p == current )
+ __asm__ ( "movl %0, %%db3" : : "r" (value) );
+ break;
+ case 6:
+ /*
+ * DR6: Bits 4-11,16-31 reserved (set to 1).
+ * Bit 12 reserved (set to 0).
+ */
+ value &= 0xffffefff; /* reserved bits => 0 */
+ value |= 0xffff0ff0; /* reserved bits => 1 */
+ if ( p == current )
+ __asm__ ( "movl %0, %%db6" : : "r" (value) );
+ break;
+ case 7:
+ /*
+ * DR7: Bit 10 reserved (set to 1).
+ * Bits 11-12,14-15 reserved (set to 0).
+ * Privileged bits:
+ * GD (bit 13): must be 0.
+ * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
+ * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
+ */
+ /* DR7 == 0 => debugging disabled for this domain. */
+ if ( value != 0 )
+ {
+ value &= 0xffff27ff; /* reserved bits => 0 */
+ value |= 0x00000400; /* reserved bits => 1 */
+ if ( (value & (1<<13)) != 0 ) return -EPERM;
+ for ( i = 0; i < 16; i += 2 )
+ if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
+ }
+ if ( p == current )
+ __asm__ ( "movl %0, %%db7" : : "r" (value) );
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ p->thread.debugreg[reg] = value;
+ return 0;
+}
+
+long do_set_debugreg(int reg, unsigned long value)
+{
+ return set_debugreg(current, reg, value);
+}
+
+unsigned long do_get_debugreg(int reg)
+{
+ if ( (reg < 0) || (reg > 7) ) return -EINVAL;
+ return current->thread.debugreg[reg];
+}
--- /dev/null
+/*
+ * User address space access functions.
+ * The non inlined parts of asm-i386/uaccess.h are here.
+ *
+ * Copyright 1997 Andi Kleen <ak@muc.de>
+ * Copyright 1997 Linus Torvalds
+ */
+#include <xen/config.h>
+#include <asm/uaccess.h>
+//#include <asm/mmx.h>
+
+#ifdef CONFIG_X86_USE_3DNOW_AND_WORKS
+
+unsigned long
+__generic_copy_to_user(void *to, const void *from, unsigned long n)
+{
+ if (access_ok(VERIFY_WRITE, to, n))
+ {
+ if(n<512)
+ __copy_user(to,from,n);
+ else
+ mmx_copy_user(to,from,n);
+ }
+ return n;
+}
+
+unsigned long
+__generic_copy_from_user(void *to, const void *from, unsigned long n)
+{
+ if (access_ok(VERIFY_READ, from, n))
+ {
+ if(n<512)
+ __copy_user_zeroing(to,from,n);
+ else
+ mmx_copy_user_zeroing(to, from, n);
+ }
+ else
+ memset(to, 0, n);
+ return n;
+}
+
+#else
+
+unsigned long
+__generic_copy_to_user(void *to, const void *from, unsigned long n)
+{
+ prefetch(from);
+ if (access_ok(VERIFY_WRITE, to, n))
+ __copy_user(to,from,n);
+ return n;
+}
+
+unsigned long
+__generic_copy_from_user(void *to, const void *from, unsigned long n)
+{
+ prefetchw(to);
+ if (access_ok(VERIFY_READ, from, n))
+ __copy_user_zeroing(to,from,n);
+ else
+ memset(to, 0, n);
+ return n;
+}
+
+#endif
+
+/*
+ * Copy a null terminated string from userspace.
+ */
+
+#define __do_strncpy_from_user(dst,src,count,res) \
+do { \
+ int __d0, __d1, __d2; \
+ __asm__ __volatile__( \
+ " testl %1,%1\n" \
+ " jz 2f\n" \
+ "0: lodsb\n" \
+ " stosb\n" \
+ " testb %%al,%%al\n" \
+ " jz 1f\n" \
+ " decl %1\n" \
+ " jnz 0b\n" \
+ "1: subl %1,%0\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl %5,%0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,3b\n" \
+ ".previous" \
+ : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \
+ "=&D" (__d2) \
+ : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
+ : "memory"); \
+} while (0)
+
+long
+__strncpy_from_user(char *dst, const char *src, long count)
+{
+ long res;
+ __do_strncpy_from_user(dst, src, count, res);
+ return res;
+}
+
+long
+strncpy_from_user(char *dst, const char *src, long count)
+{
+ long res = -EFAULT;
+ if (access_ok(VERIFY_READ, src, 1))
+ __do_strncpy_from_user(dst, src, count, res);
+ return res;
+}
+
+
+/*
+ * Zero Userspace
+ */
+
+#define __do_clear_user(addr,size) \
+do { \
+ int __d0; \
+ __asm__ __volatile__( \
+ "0: rep; stosl\n" \
+ " movl %2,%0\n" \
+ "1: rep; stosb\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: lea 0(%2,%0,4),%0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,3b\n" \
+ " .long 1b,2b\n" \
+ ".previous" \
+ : "=&c"(size), "=&D" (__d0) \
+ : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \
+} while (0)
+
+unsigned long
+clear_user(void *to, unsigned long n)
+{
+ if (access_ok(VERIFY_WRITE, to, n))
+ __do_clear_user(to, n);
+ return n;
+}
+
+unsigned long
+__clear_user(void *to, unsigned long n)
+{
+ __do_clear_user(to, n);
+ return n;
+}
+
+/*
+ * Return the size of a string (including the ending 0)
+ *
+ * Return 0 on exception, a value greater than N if too long
+ */
+
+long strnlen_user(const char *s, long n)
+{
+ unsigned long mask = -__addr_ok(s);
+ unsigned long res, tmp;
+
+ __asm__ __volatile__(
+ " testl %0, %0\n"
+ " jz 3f\n"
+ " andl %0,%%ecx\n"
+ "0: repne; scasb\n"
+ " setne %%al\n"
+ " subl %%ecx,%0\n"
+ " addl %0,%%eax\n"
+ "1:\n"
+ ".section .fixup,\"ax\"\n"
+ "2: xorl %%eax,%%eax\n"
+ " jmp 1b\n"
+ "3: movb $1,%%al\n"
+ " jmp 1b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 0b,2b\n"
+ ".previous"
+ :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp)
+ :"0" (n), "1" (s), "2" (0), "3" (mask)
+ :"cc");
+ return res & mask;
+}
--- /dev/null
+/* ld script to make i386 Linux kernel
+ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
+ */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(start)
+SECTIONS
+{
+ . = 0xFC400000 + 0x100000;
+ _text = .; /* Text and read-only data */
+ .text : {
+ *(.text)
+ *(.fixup)
+ *(.gnu.warning)
+ } = 0x9090
+ .text.lock : { *(.text.lock) } /* out-of-line lock text */
+
+ _etext = .; /* End of text section */
+
+ .rodata : { *(.rodata) *(.rodata.*) }
+ .kstrtab : { *(.kstrtab) }
+
+ . = ALIGN(16); /* Exception table */
+ __start___ex_table = .;
+ __ex_table : { *(__ex_table) }
+ __stop___ex_table = .;
+
+ __start___ksymtab = .; /* Kernel symbol table */
+ __ksymtab : { *(__ksymtab) }
+ __stop___ksymtab = .;
+
+ __start___kallsyms = .; /* All kernel symbols */
+ __kallsyms : { *(__kallsyms) }
+ __stop___kallsyms = .;
+
+ .data : { /* Data */
+ *(.data)
+ CONSTRUCTORS
+ }
+
+ _edata = .; /* End of data section */
+
+ . = ALIGN(8192); /* init_task */
+ .data.init_task : { *(.data.init_task) }
+
+ . = ALIGN(4096); /* Init code and data */
+ __init_begin = .;
+ .text.init : { *(.text.init) }
+ .data.init : { *(.data.init) }
+ . = ALIGN(16);
+ __setup_start = .;
+ .setup.init : { *(.setup.init) }
+ __setup_end = .;
+ __initcall_start = .;
+ .initcall.init : { *(.initcall.init) }
+ __initcall_end = .;
+ . = ALIGN(4096);
+ __init_end = .;
+
+ . = ALIGN(4096);
+ .data.page_aligned : { *(.data.idt) }
+
+ . = ALIGN(32);
+ .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+
+ __bss_start = .; /* BSS */
+ .bss : {
+ *(.bss)
+ }
+ _end = . ;
+
+ /* Sections to be discarded */
+ /DISCARD/ : {
+ *(.text.exit)
+ *(.data.exit)
+ *(.exitcall.exit)
+ }
+
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+}
+++ /dev/null
-########################################
-# x86-specific definitions
-
-CC := gcc
-LD := ld
-# Linker should relocate monitor to this address
-MONITOR_BASE := 0xFC500000
-# Bootloader should load monitor to this real address
-LOAD_BASE := 0x00100000
-CFLAGS := -nostdinc -fno-builtin -fno-common -fno-strict-aliasing
-CFLAGS += -iwithprefix include -O3 -Wall -DMONITOR_BASE=$(MONITOR_BASE)
-CFLAGS += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__ -DNDEBUG
-#CFLAGS += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__
-CFLAGS += -Wno-pointer-arith -Wredundant-decls -m64
-LDARCHFLAGS :=
-LDFLAGS := -T xen.lds -N
-
-
* This file contains portions of code from Linux.
*/
-#include <asm-i386/io.h>
-#include <asm-i386/irq.h>
+#include <asm/io.h>
+#include <asm/irq.h>
#include <xen/sched.h>
#include <xen/keyhandler.h>
#include <hypervisor-ifs/kbd.h>
* Copyright (c) 2003-2004, K A Fraser
*/
-#include <asm-i386/io.h>
+#include <asm/io.h>
#include <xen/sched.h>
#include <xen/keyhandler.h>
#include <xen/reboot.h>
}
}
-int
-pci_set_dma_mask(struct pci_dev *dev, u64 mask)
-{
- if (!pci_dma_supported(dev, mask))
- return -EIO;
-
- dev->dma_mask = mask;
-
- return 0;
-}
-
-int
-pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask)
-{
- if (!pci_dac_dma_supported(dev, mask))
- return -EIO;
-
- dev->dma_mask = mask;
-
- return 0;
-}
+#if 0 /* NOT IN XEN */
+int pci_set_dma_mask(struct pci_dev *dev, u64 mask)
+int pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask)
+#endif
/*
* Translate the low bits of the PCI base
+++ /dev/null
-/*
- * asm-i386/acpi.h
- *
- * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#ifndef _ASM_ACPI_H
-#define _ASM_ACPI_H
-
-#ifdef __KERNEL__
-
-#define COMPILER_DEPENDENT_INT64 long long
-#define COMPILER_DEPENDENT_UINT64 unsigned long long
-
-/*
- * Calling conventions:
- *
- * ACPI_SYSTEM_XFACE - Interfaces to host OS (handlers, threads)
- * ACPI_EXTERNAL_XFACE - External ACPI interfaces
- * ACPI_INTERNAL_XFACE - Internal ACPI interfaces
- * ACPI_INTERNAL_VAR_XFACE - Internal variable-parameter list interfaces
- */
-#define ACPI_SYSTEM_XFACE
-#define ACPI_EXTERNAL_XFACE
-#define ACPI_INTERNAL_XFACE
-#define ACPI_INTERNAL_VAR_XFACE
-
-/* Asm macros */
-
-#define ACPI_ASM_MACROS
-#define BREAKPOINT3
-#define ACPI_DISABLE_IRQS() __cli()
-#define ACPI_ENABLE_IRQS() __sti()
-#define ACPI_FLUSH_CPU_CACHE() wbinvd()
-
-/*
- * A brief explanation as GNU inline assembly is a bit hairy
- * %0 is the output parameter in EAX ("=a")
- * %1 and %2 are the input parameters in ECX ("c")
- * and an immediate value ("i") respectively
- * All actual register references are preceded with "%%" as in "%%edx"
- * Immediate values in the assembly are preceded by "$" as in "$0x1"
- * The final asm parameter are the operation altered non-output registers.
- */
-#define ACPI_ACQUIRE_GLOBAL_LOCK(GLptr, Acq) \
- do { \
- int dummy; \
- asm("1: movl (%1),%%eax;" \
- "movl %%eax,%%edx;" \
- "andl %2,%%edx;" \
- "btsl $0x1,%%edx;" \
- "adcl $0x0,%%edx;" \
- "lock; cmpxchgl %%edx,(%1);" \
- "jnz 1b;" \
- "cmpb $0x3,%%dl;" \
- "sbbl %%eax,%%eax" \
- :"=a"(Acq),"=c"(dummy):"c"(GLptr),"i"(~1L):"dx"); \
- } while(0)
-
-#define ACPI_RELEASE_GLOBAL_LOCK(GLptr, Acq) \
- do { \
- int dummy; \
- asm("1: movl (%1),%%eax;" \
- "movl %%eax,%%edx;" \
- "andl %2,%%edx;" \
- "lock; cmpxchgl %%edx,(%1);" \
- "jnz 1b;" \
- "andl $0x1,%%eax" \
- :"=a"(Acq),"=c"(dummy):"c"(GLptr),"i"(~3L):"dx"); \
- } while(0)
-
-
-/*
- * Math helper asm macros
- */
-#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \
- asm("divl %2;" \
- :"=a"(q32), "=d"(r32) \
- :"r"(d32), \
- "0"(n_lo), "1"(n_hi))
-
-
-#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \
- asm("shrl $1,%2;" \
- "rcrl $1,%3;" \
- :"=r"(n_hi), "=r"(n_lo) \
- :"0"(n_hi), "1"(n_lo))
-
-
-#ifdef CONFIG_ACPI_BOOT
-extern int acpi_lapic;
-extern int acpi_ioapic;
-extern int acpi_noirq;
-
-/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */
-#define FIX_ACPI_PAGES 4
-
-#else /* !CONFIG_ACPI_BOOT */
-# define acpi_lapic 0
-# define acpi_ioapic 0
-
-#endif /* !CONFIG_ACPI_BOOT */
-
-#ifdef CONFIG_ACPI_PCI
-static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
-extern int acpi_irq_balance_set(char *str);
-#else
-static inline void acpi_noirq_set(void) { }
-static inline int acpi_irq_balance_set(char *str) { return 0; }
-#endif
-
-#ifdef CONFIG_ACPI_SLEEP
-
-extern unsigned long saved_eip;
-extern unsigned long saved_esp;
-extern unsigned long saved_ebp;
-extern unsigned long saved_ebx;
-extern unsigned long saved_esi;
-extern unsigned long saved_edi;
-
-static inline void acpi_save_register_state(unsigned long return_point)
-{
- saved_eip = return_point;
- asm volatile ("movl %%esp,(%0)" : "=m" (saved_esp));
- asm volatile ("movl %%ebp,(%0)" : "=m" (saved_ebp));
- asm volatile ("movl %%ebx,(%0)" : "=m" (saved_ebx));
- asm volatile ("movl %%edi,(%0)" : "=m" (saved_edi));
- asm volatile ("movl %%esi,(%0)" : "=m" (saved_esi));
-}
-
-#define acpi_restore_register_state() do {} while (0)
-
-
-/* routines for saving/restoring kernel state */
-extern int acpi_save_state_mem(void);
-extern int acpi_save_state_disk(void);
-extern void acpi_restore_state_mem(void);
-
-extern unsigned long acpi_wakeup_address;
-
-extern void do_suspend_lowlevel_s4bios(int resume);
-
-/* early initialization routine */
-extern void acpi_reserve_bootmem(void);
-
-#endif /*CONFIG_ACPI_SLEEP*/
-
-
-#endif /*__KERNEL__*/
-
-#endif /*_ASM_ACPI_H*/
+++ /dev/null
-#ifndef __ASM_APIC_H
-#define __ASM_APIC_H
-
-#include <xen/config.h>
-#include <asm/ptrace.h>
-#include <asm/apicdef.h>
-#include <asm/system.h>
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
-#define APIC_DEBUG 0
-
-#if APIC_DEBUG
-#define Dprintk(x...) printk(x)
-#else
-#define Dprintk(x...)
-#endif
-
-/*
- * Basic functions accessing APICs.
- */
-
-static __inline void apic_write(unsigned long reg, unsigned long v)
-{
- *((volatile unsigned long *)(APIC_BASE+reg)) = v;
-}
-
-static __inline void apic_write_atomic(unsigned long reg, unsigned long v)
-{
- xchg((volatile unsigned long *)(APIC_BASE+reg), v);
-}
-
-static __inline unsigned long apic_read(unsigned long reg)
-{
- return *((volatile unsigned long *)(APIC_BASE+reg));
-}
-
-static __inline__ void apic_wait_icr_idle(void)
-{
- do { } while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY );
-}
-
-#ifdef CONFIG_X86_GOOD_APIC
-# define FORCE_READ_AROUND_WRITE 0
-# define apic_read_around(x)
-# define apic_write_around(x,y) apic_write((x),(y))
-#else
-# define FORCE_READ_AROUND_WRITE 1
-# define apic_read_around(x) apic_read(x)
-# define apic_write_around(x,y) apic_write_atomic((x),(y))
-#endif
-
-static inline void ack_APIC_irq(void)
-{
- /*
- * ack_APIC_irq() actually gets compiled as a single instruction:
- * - a single rmw on Pentium/82489DX
- * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
- * ... yummie.
- */
-
- /* Docs say use 0 for future compatibility */
- apic_write_around(APIC_EOI, 0);
-}
-
-extern int get_maxlvt(void);
-extern void clear_local_APIC(void);
-extern void connect_bsp_APIC (void);
-extern void disconnect_bsp_APIC (void);
-extern void disable_local_APIC (void);
-extern int verify_local_APIC (void);
-extern void cache_APIC_registers (void);
-extern void sync_Arb_IDs (void);
-extern void init_bsp_APIC (void);
-extern void setup_local_APIC (void);
-extern void init_apic_mappings (void);
-extern void smp_local_timer_interrupt (struct pt_regs * regs);
-extern void setup_APIC_clocks (void);
-extern void setup_apic_nmi_watchdog (void);
-extern inline void nmi_watchdog_tick (struct pt_regs * regs);
-extern int APIC_init_uniprocessor (void);
-extern void disable_APIC_timer(void);
-extern void enable_APIC_timer(void);
-
-/*extern struct pm_dev *apic_pm_register(pm_dev_t, unsigned long, pm_callback);*/
-/*extern void apic_pm_unregister(struct pm_dev*);*/
-
-extern unsigned int watchdog_on;
-
-extern unsigned int apic_timer_irqs [NR_CPUS];
-extern int check_nmi_watchdog (void);
-
-extern unsigned int nmi_watchdog;
-#define NMI_NONE 0
-#define NMI_IO_APIC 1
-#define NMI_LOCAL_APIC 2
-#define NMI_INVALID 3
-
-#endif /* CONFIG_X86_LOCAL_APIC */
-
-#endif /* __ASM_APIC_H */
+++ /dev/null
-#ifndef __ASM_APICDEF_H
-#define __ASM_APICDEF_H
-
-/*
- * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
- *
- * Alan Cox <Alan.Cox@linux.org>, 1995.
- * Ingo Molnar <mingo@redhat.com>, 1999, 2000
- */
-
-#define APIC_DEFAULT_PHYS_BASE 0xfee00000
-
-#define APIC_ID 0x20
-#define APIC_ID_MASK (0x0F<<24)
-#define GET_APIC_ID(x) (((x)>>24)&0x0F)
-#define APIC_LVR 0x30
-#define APIC_LVR_MASK 0xFF00FF
-#define GET_APIC_VERSION(x) ((x)&0xFF)
-#define GET_APIC_MAXLVT(x) (((x)>>16)&0xFF)
-#define APIC_INTEGRATED(x) ((x)&0xF0)
-#define APIC_XAPIC_SUPPORT(x) ((x)>=0x14)
-#define APIC_TASKPRI 0x80
-#define APIC_TPRI_MASK 0xFF
-#define APIC_ARBPRI 0x90
-#define APIC_ARBPRI_MASK 0xFF
-#define APIC_PROCPRI 0xA0
-#define APIC_EOI 0xB0
-#define APIC_EIO_ACK 0x0 /* Write this to the EOI register */
-#define APIC_RRR 0xC0
-#define APIC_LDR 0xD0
-#define APIC_LDR_MASK (0xFF<<24)
-#define GET_APIC_LOGICAL_ID(x) (((x)>>24)&0xFF)
-#define SET_APIC_LOGICAL_ID(x) (((x)<<24))
-#define APIC_ALL_CPUS 0xFF
-#define APIC_DFR 0xE0
-#define APIC_DFR_CLUSTER 0x0FFFFFFFul /* Clustered */
-#define APIC_DFR_FLAT 0xFFFFFFFFul /* Flat mode */
-#define APIC_SPIV 0xF0
-#define APIC_SPIV_FOCUS_DISABLED (1<<9)
-#define APIC_SPIV_APIC_ENABLED (1<<8)
-#define APIC_ISR 0x100
-#define APIC_TMR 0x180
-#define APIC_IRR 0x200
-#define APIC_ESR 0x280
-#define APIC_ESR_SEND_CS 0x00001
-#define APIC_ESR_RECV_CS 0x00002
-#define APIC_ESR_SEND_ACC 0x00004
-#define APIC_ESR_RECV_ACC 0x00008
-#define APIC_ESR_SENDILL 0x00020
-#define APIC_ESR_RECVILL 0x00040
-#define APIC_ESR_ILLREGA 0x00080
-#define APIC_ICR 0x300
-#define APIC_DEST_SELF 0x40000
-#define APIC_DEST_ALLINC 0x80000
-#define APIC_DEST_ALLBUT 0xC0000
-#define APIC_ICR_RR_MASK 0x30000
-#define APIC_ICR_RR_INVALID 0x00000
-#define APIC_ICR_RR_INPROG 0x10000
-#define APIC_ICR_RR_VALID 0x20000
-#define APIC_INT_LEVELTRIG 0x08000
-#define APIC_INT_ASSERT 0x04000
-#define APIC_ICR_BUSY 0x01000
-#define APIC_DEST_PHYSICAL 0x00000
-#define APIC_DEST_LOGICAL 0x00800
-#define APIC_DM_FIXED 0x00000
-#define APIC_DM_LOWEST 0x00100
-#define APIC_DM_SMI 0x00200
-#define APIC_DM_REMRD 0x00300
-#define APIC_DM_NMI 0x00400
-#define APIC_DM_INIT 0x00500
-#define APIC_DM_STARTUP 0x00600
-#define APIC_DM_EXTINT 0x00700
-#define APIC_VECTOR_MASK 0x000FF
-#define APIC_ICR2 0x310
-#define GET_APIC_DEST_FIELD(x) (((x)>>24)&0xFF)
-#define SET_APIC_DEST_FIELD(x) ((x)<<24)
-#define APIC_LVTT 0x320
-#define APIC_LVTPC 0x340
-#define APIC_LVT0 0x350
-#define APIC_LVT_TIMER_BASE_MASK (0x3<<18)
-#define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3)
-#define SET_APIC_TIMER_BASE(x) (((x)<<18))
-#define APIC_TIMER_BASE_CLKIN 0x0
-#define APIC_TIMER_BASE_TMBASE 0x1
-#define APIC_TIMER_BASE_DIV 0x2
-#define APIC_LVT_TIMER_PERIODIC (1<<17)
-#define APIC_LVT_MASKED (1<<16)
-#define APIC_LVT_LEVEL_TRIGGER (1<<15)
-#define APIC_LVT_REMOTE_IRR (1<<14)
-#define APIC_INPUT_POLARITY (1<<13)
-#define APIC_SEND_PENDING (1<<12)
-#define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7)
-#define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8))
-#define APIC_MODE_FIXED 0x0
-#define APIC_MODE_NMI 0x4
-#define APIC_MODE_EXINT 0x7
-#define APIC_LVT1 0x360
-#define APIC_LVTERR 0x370
-#define APIC_TMICT 0x380
-#define APIC_TMCCT 0x390
-#define APIC_TDCR 0x3E0
-#define APIC_TDR_DIV_TMBASE (1<<2)
-#define APIC_TDR_DIV_1 0xB
-#define APIC_TDR_DIV_2 0x0
-#define APIC_TDR_DIV_4 0x1
-#define APIC_TDR_DIV_8 0x2
-#define APIC_TDR_DIV_16 0x3
-#define APIC_TDR_DIV_32 0x8
-#define APIC_TDR_DIV_64 0x9
-#define APIC_TDR_DIV_128 0xA
-
-#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
-
-#ifdef CONFIG_X86_CLUSTERED_APIC
-#define MAX_IO_APICS 32
-#else
-#define MAX_IO_APICS 8
-#endif
-
-
-/*
- * The broadcast ID is 0xF for old APICs and 0xFF for xAPICs. SAPICs
- * don't broadcast (yet?), but if they did, they might use 0xFFFF.
- */
-#define APIC_BROADCAST_ID_XAPIC (0xFF)
-#define APIC_BROADCAST_ID_APIC (0x0F)
-
-/*
- * the local APIC register structure, memory mapped. Not terribly well
- * tested, but we might eventually use this one in the future - the
- * problem why we cannot use it right now is the P5 APIC, it has an
- * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
- */
-#define u32 unsigned int
-
-#define lapic ((volatile struct local_apic *)APIC_BASE)
-
-struct local_apic {
-
-/*000*/ struct { u32 __reserved[4]; } __reserved_01;
-
-/*010*/ struct { u32 __reserved[4]; } __reserved_02;
-
-/*020*/ struct { /* APIC ID Register */
- u32 __reserved_1 : 24,
- phys_apic_id : 4,
- __reserved_2 : 4;
- u32 __reserved[3];
- } id;
-
-/*030*/ const
- struct { /* APIC Version Register */
- u32 version : 8,
- __reserved_1 : 8,
- max_lvt : 8,
- __reserved_2 : 8;
- u32 __reserved[3];
- } version;
-
-/*040*/ struct { u32 __reserved[4]; } __reserved_03;
-
-/*050*/ struct { u32 __reserved[4]; } __reserved_04;
-
-/*060*/ struct { u32 __reserved[4]; } __reserved_05;
-
-/*070*/ struct { u32 __reserved[4]; } __reserved_06;
-
-/*080*/ struct { /* Task Priority Register */
- u32 priority : 8,
- __reserved_1 : 24;
- u32 __reserved_2[3];
- } tpr;
-
-/*090*/ const
- struct { /* Arbitration Priority Register */
- u32 priority : 8,
- __reserved_1 : 24;
- u32 __reserved_2[3];
- } apr;
-
-/*0A0*/ const
- struct { /* Processor Priority Register */
- u32 priority : 8,
- __reserved_1 : 24;
- u32 __reserved_2[3];
- } ppr;
-
-/*0B0*/ struct { /* End Of Interrupt Register */
- u32 eoi;
- u32 __reserved[3];
- } eoi;
-
-/*0C0*/ struct { u32 __reserved[4]; } __reserved_07;
-
-/*0D0*/ struct { /* Logical Destination Register */
- u32 __reserved_1 : 24,
- logical_dest : 8;
- u32 __reserved_2[3];
- } ldr;
-
-/*0E0*/ struct { /* Destination Format Register */
- u32 __reserved_1 : 28,
- model : 4;
- u32 __reserved_2[3];
- } dfr;
-
-/*0F0*/ struct { /* Spurious Interrupt Vector Register */
- u32 spurious_vector : 8,
- apic_enabled : 1,
- focus_cpu : 1,
- __reserved_2 : 22;
- u32 __reserved_3[3];
- } svr;
-
-/*100*/ struct { /* In Service Register */
-/*170*/ u32 bitfield;
- u32 __reserved[3];
- } isr [8];
-
-/*180*/ struct { /* Trigger Mode Register */
-/*1F0*/ u32 bitfield;
- u32 __reserved[3];
- } tmr [8];
-
-/*200*/ struct { /* Interrupt Request Register */
-/*270*/ u32 bitfield;
- u32 __reserved[3];
- } irr [8];
-
-/*280*/ union { /* Error Status Register */
- struct {
- u32 send_cs_error : 1,
- receive_cs_error : 1,
- send_accept_error : 1,
- receive_accept_error : 1,
- __reserved_1 : 1,
- send_illegal_vector : 1,
- receive_illegal_vector : 1,
- illegal_register_address : 1,
- __reserved_2 : 24;
- u32 __reserved_3[3];
- } error_bits;
- struct {
- u32 errors;
- u32 __reserved_3[3];
- } all_errors;
- } esr;
-
-/*290*/ struct { u32 __reserved[4]; } __reserved_08;
-
-/*2A0*/ struct { u32 __reserved[4]; } __reserved_09;
-
-/*2B0*/ struct { u32 __reserved[4]; } __reserved_10;
-
-/*2C0*/ struct { u32 __reserved[4]; } __reserved_11;
-
-/*2D0*/ struct { u32 __reserved[4]; } __reserved_12;
-
-/*2E0*/ struct { u32 __reserved[4]; } __reserved_13;
-
-/*2F0*/ struct { u32 __reserved[4]; } __reserved_14;
-
-/*300*/ struct { /* Interrupt Command Register 1 */
- u32 vector : 8,
- delivery_mode : 3,
- destination_mode : 1,
- delivery_status : 1,
- __reserved_1 : 1,
- level : 1,
- trigger : 1,
- __reserved_2 : 2,
- shorthand : 2,
- __reserved_3 : 12;
- u32 __reserved_4[3];
- } icr1;
-
-/*310*/ struct { /* Interrupt Command Register 2 */
- union {
- u32 __reserved_1 : 24,
- phys_dest : 4,
- __reserved_2 : 4;
- u32 __reserved_3 : 24,
- logical_dest : 8;
- } dest;
- u32 __reserved_4[3];
- } icr2;
-
-/*320*/ struct { /* LVT - Timer */
- u32 vector : 8,
- __reserved_1 : 4,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- timer_mode : 1,
- __reserved_3 : 14;
- u32 __reserved_4[3];
- } lvt_timer;
-
-/*330*/ struct { u32 __reserved[4]; } __reserved_15;
-
-/*340*/ struct { /* LVT - Performance Counter */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- __reserved_3 : 15;
- u32 __reserved_4[3];
- } lvt_pc;
-
-/*350*/ struct { /* LVT - LINT0 */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- polarity : 1,
- remote_irr : 1,
- trigger : 1,
- mask : 1,
- __reserved_2 : 15;
- u32 __reserved_3[3];
- } lvt_lint0;
-
-/*360*/ struct { /* LVT - LINT1 */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- polarity : 1,
- remote_irr : 1,
- trigger : 1,
- mask : 1,
- __reserved_2 : 15;
- u32 __reserved_3[3];
- } lvt_lint1;
-
-/*370*/ struct { /* LVT - Error */
- u32 vector : 8,
- __reserved_1 : 4,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- __reserved_3 : 15;
- u32 __reserved_4[3];
- } lvt_error;
-
-/*380*/ struct { /* Timer Initial Count Register */
- u32 initial_count;
- u32 __reserved_2[3];
- } timer_icr;
-
-/*390*/ const
- struct { /* Timer Current Count Register */
- u32 curr_count;
- u32 __reserved_2[3];
- } timer_ccr;
-
-/*3A0*/ struct { u32 __reserved[4]; } __reserved_16;
-
-/*3B0*/ struct { u32 __reserved[4]; } __reserved_17;
-
-/*3C0*/ struct { u32 __reserved[4]; } __reserved_18;
-
-/*3D0*/ struct { u32 __reserved[4]; } __reserved_19;
-
-/*3E0*/ struct { /* Timer Divide Configuration Register */
- u32 divisor : 4,
- __reserved_1 : 28;
- u32 __reserved_2[3];
- } timer_dcr;
-
-/*3F0*/ struct { u32 __reserved[4]; } __reserved_20;
-
-} __attribute__ ((packed));
-
-#undef u32
-
-#endif
+++ /dev/null
-#ifndef __ARCH_I386_ATOMIC__
-#define __ARCH_I386_ATOMIC__
-
-#include <xen/config.h>
-
-/*
- * Atomic operations that C can't guarantee us. Useful for
- * resource counting etc..
- */
-
-#ifdef CONFIG_SMP
-#define LOCK "lock ; "
-#else
-#define LOCK ""
-#endif
-
-/*
- * Make sure gcc doesn't try to be clever and move things around
- * on us. We need to use _exactly_ the address the user gave us,
- * not some alias that contains the same information.
- */
-typedef struct { volatile int counter; } atomic_t;
-
-#define ATOMIC_INIT(i) { (i) }
-
-/**
- * atomic_read - read atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically reads the value of @v. Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */
-#define atomic_read(v) ((v)->counter)
-
-/**
- * atomic_set - set atomic variable
- * @v: pointer of type atomic_t
- * @i: required value
- *
- * Atomically sets the value of @v to @i. Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */
-#define atomic_set(v,i) (((v)->counter) = (i))
-
-/**
- * atomic_add - add integer to atomic variable
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v. Note that the guaranteed useful range
- * of an atomic_t is only 24 bits.
- */
-static __inline__ void atomic_add(int i, atomic_t *v)
-{
- __asm__ __volatile__(
- LOCK "addl %1,%0"
- :"=m" (v->counter)
- :"ir" (i), "m" (v->counter));
-}
-
-/**
- * atomic_sub - subtract the atomic variable
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v. Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */
-static __inline__ void atomic_sub(int i, atomic_t *v)
-{
- __asm__ __volatile__(
- LOCK "subl %1,%0"
- :"=m" (v->counter)
- :"ir" (i), "m" (v->counter));
-}
-
-/**
- * atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases. Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */
-static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- LOCK "subl %2,%0; sete %1"
- :"=m" (v->counter), "=qm" (c)
- :"ir" (i), "m" (v->counter) : "memory");
- return c;
-}
-
-/**
- * atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1. Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */
-static __inline__ void atomic_inc(atomic_t *v)
-{
- __asm__ __volatile__(
- LOCK "incl %0"
- :"=m" (v->counter)
- :"m" (v->counter));
-}
-
-/**
- * atomic_dec - decrement atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1. Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */
-static __inline__ void atomic_dec(atomic_t *v)
-{
- __asm__ __volatile__(
- LOCK "decl %0"
- :"=m" (v->counter)
- :"m" (v->counter));
-}
-
-/**
- * atomic_dec_and_test - decrement and test
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases. Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */
-static __inline__ int atomic_dec_and_test(atomic_t *v)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- LOCK "decl %0; sete %1"
- :"=m" (v->counter), "=qm" (c)
- :"m" (v->counter) : "memory");
- return c != 0;
-}
-
-/**
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases. Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */
-static __inline__ int atomic_inc_and_test(atomic_t *v)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- LOCK "incl %0; sete %1"
- :"=m" (v->counter), "=qm" (c)
- :"m" (v->counter) : "memory");
- return c != 0;
-}
-
-/**
- * atomic_add_negative - add and test if negative
- * @v: pointer of type atomic_t
- * @i: integer value to add
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero. Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */
-static __inline__ int atomic_add_negative(int i, atomic_t *v)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- LOCK "addl %2,%0; sets %1"
- :"=m" (v->counter), "=qm" (c)
- :"ir" (i), "m" (v->counter) : "memory");
- return c;
-}
-
-/* Atomic operations are already serializing on x86 */
-#define smp_mb__before_atomic_dec() barrier()
-#define smp_mb__after_atomic_dec() barrier()
-#define smp_mb__before_atomic_inc() barrier()
-#define smp_mb__after_atomic_inc() barrier()
-
-#endif
+++ /dev/null
-#ifndef _I386_BITOPS_H
-#define _I386_BITOPS_H
-
-/*
- * Copyright 1992, Linus Torvalds.
- */
-
-#include <xen/config.h>
-
-/*
- * These have to be done with inline assembly: that way the bit-setting
- * is guaranteed to be atomic. All bit operations return 0 if the bit
- * was cleared before the operation and != 0 if it was not.
- *
- * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
- */
-
-#ifdef CONFIG_SMP
-#define LOCK_PREFIX "lock ; "
-#else
-#define LOCK_PREFIX ""
-#endif
-
-#define ADDR (*(volatile long *) addr)
-
-/**
- * set_bit - Atomically set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * This function is atomic and may not be reordered. See __set_bit()
- * if you do not require the atomic guarantees.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static __inline__ void set_bit(int nr, volatile void * addr)
-{
- __asm__ __volatile__( LOCK_PREFIX
- "btsl %1,%0"
- :"=m" (ADDR)
- :"Ir" (nr));
-}
-
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static __inline__ void __set_bit(int nr, volatile void * addr)
-{
- __asm__(
- "btsl %1,%0"
- :"=m" (ADDR)
- :"Ir" (nr));
-}
-
-/**
- * clear_bit - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and may not be reordered. However, it does
- * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
- * in order to ensure changes are visible on other processors.
- */
-static __inline__ void clear_bit(int nr, volatile void * addr)
-{
- __asm__ __volatile__( LOCK_PREFIX
- "btrl %1,%0"
- :"=m" (ADDR)
- :"Ir" (nr));
-}
-#define smp_mb__before_clear_bit() barrier()
-#define smp_mb__after_clear_bit() barrier()
-
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static __inline__ void __change_bit(int nr, volatile void * addr)
-{
- __asm__ __volatile__(
- "btcl %1,%0"
- :"=m" (ADDR)
- :"Ir" (nr));
-}
-
-/**
- * change_bit - Toggle a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * change_bit() is atomic and may not be reordered.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static __inline__ void change_bit(int nr, volatile void * addr)
-{
- __asm__ __volatile__( LOCK_PREFIX
- "btcl %1,%0"
- :"=m" (ADDR)
- :"Ir" (nr));
-}
-
-/**
- * test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static __inline__ int test_and_set_bit(int nr, volatile void * addr)
-{
- int oldbit;
-
- __asm__ __volatile__( LOCK_PREFIX
- "btsl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"=m" (ADDR)
- :"Ir" (nr) : "memory");
- return oldbit;
-}
-
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail. You must protect multiple accesses with a lock.
- */
-static __inline__ int __test_and_set_bit(int nr, volatile void * addr)
-{
- int oldbit;
-
- __asm__(
- "btsl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"=m" (ADDR)
- :"Ir" (nr));
- return oldbit;
-}
-
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static __inline__ int test_and_clear_bit(int nr, volatile void * addr)
-{
- int oldbit;
-
- __asm__ __volatile__( LOCK_PREFIX
- "btrl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"=m" (ADDR)
- :"Ir" (nr) : "memory");
- return oldbit;
-}
-
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail. You must protect multiple accesses with a lock.
- */
-static __inline__ int __test_and_clear_bit(int nr, volatile void * addr)
-{
- int oldbit;
-
- __asm__(
- "btrl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"=m" (ADDR)
- :"Ir" (nr));
- return oldbit;
-}
-
-/* WARNING: non atomic and it can be reordered! */
-static __inline__ int __test_and_change_bit(int nr, volatile void * addr)
-{
- int oldbit;
-
- __asm__ __volatile__(
- "btcl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"=m" (ADDR)
- :"Ir" (nr) : "memory");
- return oldbit;
-}
-
-/**
- * test_and_change_bit - Change a bit and return its new value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static __inline__ int test_and_change_bit(int nr, volatile void * addr)
-{
- int oldbit;
-
- __asm__ __volatile__( LOCK_PREFIX
- "btcl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"=m" (ADDR)
- :"Ir" (nr) : "memory");
- return oldbit;
-}
-
-
-static __inline__ int constant_test_bit(int nr, const volatile void * addr)
-{
- return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
-}
-
-static __inline__ int variable_test_bit(int nr, volatile void * addr)
-{
- int oldbit;
-
- __asm__ __volatile__(
- "btl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit)
- :"m" (ADDR),"Ir" (nr));
- return oldbit;
-}
-
-#define test_bit(nr,addr) \
-(__builtin_constant_p(nr) ? \
- constant_test_bit((nr),(addr)) : \
- variable_test_bit((nr),(addr)))
-
-/**
- * find_first_zero_bit - find the first zero bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit-number of the first zero bit, not the number of the byte
- * containing a bit.
- */
-static __inline__ int find_first_zero_bit(void * addr, unsigned size)
-{
- int d0, d1, d2;
- int res;
-
- if (!size)
- return 0;
- /* This looks at memory. Mark it volatile to tell gcc not to move it around */
- __asm__ __volatile__(
- "movl $-1,%%eax\n\t"
- "xorl %%edx,%%edx\n\t"
- "repe; scasl\n\t"
- "je 1f\n\t"
- "xorl -4(%%edi),%%eax\n\t"
- "subl $4,%%edi\n\t"
- "bsfl %%eax,%%edx\n"
- "1:\tsubl %%ebx,%%edi\n\t"
- "shll $3,%%edi\n\t"
- "addl %%edi,%%edx"
- :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
- :"1" ((size + 31) >> 5), "2" (addr), "b" (addr));
- return res;
-}
-
-/**
- * find_next_zero_bit - find the first zero bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
-{
- unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
- int set = 0, bit = offset & 31, res;
-
- if (bit) {
- /*
- * Look for zero in first byte
- */
- __asm__("bsfl %1,%0\n\t"
- "jne 1f\n\t"
- "movl $32, %0\n"
- "1:"
- : "=r" (set)
- : "r" (~(*p >> bit)));
- if (set < (32 - bit))
- return set + offset;
- set = 32 - bit;
- p++;
- }
- /*
- * No zero yet, search remaining full bytes for a zero
- */
- res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr));
- return (offset + set + res);
-}
-
-/**
- * ffz - find first zero in word.
- * @word: The word to search
- *
- * Undefined if no zero exists, so code should check against ~0UL first.
- */
-static __inline__ unsigned long ffz(unsigned long word)
-{
- __asm__("bsfl %1,%0"
- :"=r" (word)
- :"r" (~word));
- return word;
-}
-
-/**
- * ffs - find first bit set
- * @x: the word to search
- *
- * This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
- */
-static __inline__ int ffs(int x)
-{
- int r;
-
- __asm__("bsfl %1,%0\n\t"
- "jnz 1f\n\t"
- "movl $-1,%0\n"
- "1:" : "=r" (r) : "g" (x));
- return r+1;
-}
-
-/**
- * hweightN - returns the hamming weight of a N-bit word
- * @x: the word to weigh
- *
- * The Hamming Weight of a number is the total number of bits set in it.
- */
-
-#define hweight32(x) generic_hweight32(x)
-#define hweight16(x) generic_hweight16(x)
-#define hweight8(x) generic_hweight8(x)
-
-#define ext2_set_bit __test_and_set_bit
-#define ext2_clear_bit __test_and_clear_bit
-#define ext2_test_bit test_bit
-#define ext2_find_first_zero_bit find_first_zero_bit
-#define ext2_find_next_zero_bit find_next_zero_bit
-
-/* Bitmap functions for the minix filesystem. */
-#define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,addr)
-#define minix_set_bit(nr,addr) __set_bit(nr,addr)
-#define minix_test_and_clear_bit(nr,addr) __test_and_clear_bit(nr,addr)
-#define minix_test_bit(nr,addr) test_bit(nr,addr)
-#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
-
-#endif /* _I386_BITOPS_H */
+++ /dev/null
-/*
- * include/asm-i386/cache.h
- */
-#ifndef __ARCH_I386_CACHE_H
-#define __ARCH_I386_CACHE_H
-
-#include <xen/config.h>
-
-/* L1 cache line size */
-#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
-#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
-
-#endif
+++ /dev/null
-/******************************************************************************
- * config.h
- *
- * A Linux-style configuration list.
- */
-
-#ifndef __XEN_I386_CONFIG_H__
-#define __XEN_I386_CONFIG_H__
-
-#define CONFIG_X86 1
-
-#define CONFIG_SMP 1
-#define CONFIG_X86_LOCAL_APIC 1
-#define CONFIG_X86_IO_APIC 1
-#define CONFIG_X86_L1_CACHE_SHIFT 5
-
-#define CONFIG_ACPI 1
-#define CONFIG_ACPI_BOOT 1
-
-#define CONFIG_PCI 1
-#define CONFIG_PCI_BIOS 1
-#define CONFIG_PCI_DIRECT 1
-
-#define CONFIG_IDE 1
-#define CONFIG_BLK_DEV_IDE 1
-#define CONFIG_BLK_DEV_IDEDMA 1
-#define CONFIG_BLK_DEV_IDEPCI 1
-#define CONFIG_IDEDISK_MULTI_MODE 1
-#define CONFIG_IDEDISK_STROKE 1
-#define CONFIG_IDEPCI_SHARE_IRQ 1
-#define CONFIG_BLK_DEV_IDEDMA_PCI 1
-#define CONFIG_IDEDMA_PCI_AUTO 1
-#define CONFIG_IDEDMA_AUTO 1
-#define CONFIG_IDEDMA_ONLYDISK 1
-#define CONFIG_BLK_DEV_IDE_MODES 1
-#define CONFIG_BLK_DEV_PIIX 1
-
-#define CONFIG_SCSI 1
-#define CONFIG_SCSI_LOGGING 1
-#define CONFIG_BLK_DEV_SD 1
-#define CONFIG_SD_EXTRA_DEVS 40
-#define CONFIG_SCSI_MULTI_LUN 1
-
-#define CONFIG_XEN_ATTENTION_KEY 1
-
-
-#define HZ 100
-
-/*
- * Just to keep compiler happy.
- * NB. DO NOT CHANGE SMP_CACHE_BYTES WITHOUT FIXING arch/i386/entry.S!!!
- * It depends on size of irq_cpustat_t, for example, being 64 bytes. :-)
- * Mmmm... so niiiiiice....
- */
-#define SMP_CACHE_BYTES 64
-#define NR_CPUS 16
-#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
-#define ____cacheline_aligned __cacheline_aligned
-
-/*** Hypervisor owns top 64MB of virtual address space. ***/
-#define HYPERVISOR_VIRT_START (0xFC000000UL)
-
-/*
- * First 4MB are mapped read-only for all. It's for the machine->physical
- * mapping table (MPT table). The following are virtual addresses.
- */
-#define READONLY_MPT_VIRT_START (HYPERVISOR_VIRT_START)
-#define READONLY_MPT_VIRT_END (READONLY_MPT_VIRT_START + (4*1024*1024))
-/*
- * Next 12MB is fixed monitor space, which is part of a 40MB direct-mapped
- * memory region. The following are machine addresses.
- */
-#define MAX_MONITOR_ADDRESS (12*1024*1024)
-#define MAX_DIRECTMAP_ADDRESS (40*1024*1024)
-/* And the virtual addresses for the direct-map region... */
-#define DIRECTMAP_VIRT_START (READONLY_MPT_VIRT_END)
-#define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + MAX_DIRECTMAP_ADDRESS)
-#define MONITOR_VIRT_START (DIRECTMAP_VIRT_START)
-#define MONITOR_VIRT_END (MONITOR_VIRT_START + MAX_MONITOR_ADDRESS)
-#define RDWR_MPT_VIRT_START (MONITOR_VIRT_END)
-#define RDWR_MPT_VIRT_END (RDWR_MPT_VIRT_START + (4*1024*1024))
-#define FRAMETABLE_VIRT_START (RDWR_MPT_VIRT_END)
-#define FRAMETABLE_VIRT_END (DIRECTMAP_VIRT_END)
-/* Next 4MB of virtual address space is used as a linear p.t. mapping. */
-#define LINEAR_PT_VIRT_START (DIRECTMAP_VIRT_END)
-#define LINEAR_PT_VIRT_END (LINEAR_PT_VIRT_START + (4*1024*1024))
-/* Next 4MB of virtual address space is used as a shadow linear p.t. map. */
-#define SH_LINEAR_PT_VIRT_START (LINEAR_PT_VIRT_END)
-#define SH_LINEAR_PT_VIRT_END (SH_LINEAR_PT_VIRT_START + (4*1024*1024))
-/* Next 4MB of virtual address space used for per-domain mappings (eg. GDT). */
-#define PERDOMAIN_VIRT_START (SH_LINEAR_PT_VIRT_END)
-#define PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + (4*1024*1024))
-#define GDT_VIRT_START (PERDOMAIN_VIRT_START)
-#define GDT_VIRT_END (GDT_VIRT_START + (64*1024))
-#define LDT_VIRT_START (GDT_VIRT_END)
-#define LDT_VIRT_END (LDT_VIRT_START + (64*1024))
-/* Penultimate 4MB of virtual address space used for domain page mappings. */
-#define MAPCACHE_VIRT_START (PERDOMAIN_VIRT_END)
-#define MAPCACHE_VIRT_END (MAPCACHE_VIRT_START + (4*1024*1024))
-/* Final 4MB of virtual address space used for ioremap(). */
-#define IOREMAP_VIRT_START (MAPCACHE_VIRT_END)
-#define IOREMAP_VIRT_END (IOREMAP_VIRT_START + (4*1024*1024))
-
-/*
- * Amount of slack domain memory to leave in system, in megabytes.
- * Prevents a hard out-of-memory crunch for thinsg like network receive.
- */
-#define SLACK_DOMAIN_MEM_KILOBYTES 2048
-
-/* Linkage for x86 */
-#define FASTCALL(x) x __attribute__((regparm(3)))
-#define asmlinkage __attribute__((regparm(0)))
-#define __ALIGN .align 16,0x90
-#define __ALIGN_STR ".align 16,0x90"
-#define SYMBOL_NAME_STR(X) #X
-#define SYMBOL_NAME(X) X
-#define SYMBOL_NAME_LABEL(X) X##:
-#ifdef __ASSEMBLY__
-#define ALIGN __ALIGN
-#define ALIGN_STR __ALIGN_STR
-#define ENTRY(name) \
- .globl SYMBOL_NAME(name); \
- ALIGN; \
- SYMBOL_NAME_LABEL(name)
-#endif
-
-#define PGT_base_page_table PGT_l2_page_table
-
-#define barrier() __asm__ __volatile__("": : :"memory")
-
-#define __HYPERVISOR_CS 0x0808
-#define __HYPERVISOR_DS 0x0810
-
-#define NR_syscalls 256
-
-#ifndef NDEBUG
-#define MEMORY_GUARD
-#endif
-
-#ifndef __ASSEMBLY__
-extern unsigned long _end; /* standard ELF symbol */
-extern void __out_of_line_bug(int line) __attribute__((noreturn));
-#define out_of_line_bug() __out_of_line_bug(__LINE__)
-#endif /* __ASSEMBLY__ */
-
-#endif /* __XEN_I386_CONFIG_H__ */
+++ /dev/null
-/*
- * cpufeature.h
- *
- * Defines x86 CPU feature bits
- */
-
-#ifndef __ASM_I386_CPUFEATURE_H
-#define __ASM_I386_CPUFEATURE_H
-
-/* Sample usage: CPU_FEATURE_P(cpu.x86_capability, FPU) */
-#define CPU_FEATURE_P(CAP, FEATURE) test_bit(CAP, X86_FEATURE_##FEATURE ##_BIT)
-
-#define NCAPINTS 6 /* Currently we have 6 32-bit words worth of info */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
-#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */
-#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */
-#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */
-#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN (0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */
-#define X86_FEATURE_DTES (0*32+21) /* Debug Trace Store */
-#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
- /* of FPU context), and CR4.OSFXSR available */
-#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */
-#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */
-#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */
-#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */
-#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */
-#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */
-
-/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
-/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP (1*32+19) /* MP Capable. */
-#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */
-
-/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */
-
-/* Other features, Linux-defined mapping, word 3 */
-/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */
-/* cpu types for specific tunings: */
-#define X86_FEATURE_K8 (3*32+ 4) /* Opteron, Athlon64 */
-#define X86_FEATURE_K7 (3*32+ 5) /* Athlon */
-#define X86_FEATURE_P3 (3*32+ 6) /* P3 */
-#define X86_FEATURE_P4 (3*32+ 7) /* P4 */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */
-
-/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */
-
-
-#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability)
-#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability)
-
-#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU)
-#define cpu_has_vme boot_cpu_has(X86_FEATURE_VME)
-#define cpu_has_de boot_cpu_has(X86_FEATURE_DE)
-#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE)
-#define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC)
-#define cpu_has_pae boot_cpu_has(X86_FEATURE_PAE)
-#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE)
-#define cpu_has_sse2 boot_cpu_has(X86_FEATURE_XMM2)
-#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC)
-#define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP)
-#define cpu_has_mtrr boot_cpu_has(X86_FEATURE_MTRR)
-#define cpu_has_mmx boot_cpu_has(X86_FEATURE_MMX)
-#define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR)
-#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM)
-#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT)
-#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP)
-#define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR)
-#define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR)
-#define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR)
-#define cpu_has_xstore boot_cpu_has(X86_FEATURE_XSTORE)
-
-#endif /* __ASM_I386_CPUFEATURE_H */
-
-/*
- * Local Variables:
- * mode:c
- * comment-column:42
- * End:
- */
+++ /dev/null
-#ifndef _I386_CURRENT_H
-#define _I386_CURRENT_H
-
-struct task_struct;
-
-#define STACK_RESERVED \
- (sizeof(execution_context_t) + sizeof(struct task_struct *))
-
-static inline struct task_struct * get_current(void)
-{
- struct task_struct *current;
- __asm__ ( "orl %%esp,%0; andl $~3,%0; movl (%0),%0"
- : "=r" (current) : "0" (STACK_SIZE-4) );
- return current;
-}
-
-#define current get_current()
-
-static inline void set_current(struct task_struct *p)
-{
- __asm__ ( "orl %%esp,%0; andl $~3,%0; movl %1,(%0)"
- : : "r" (STACK_SIZE-4), "r" (p) );
-}
-
-static inline execution_context_t *get_execution_context(void)
-{
- execution_context_t *execution_context;
- __asm__ ( "andl %%esp,%0; addl %2,%0"
- : "=r" (execution_context)
- : "0" (~(STACK_SIZE-1)), "i" (STACK_SIZE-STACK_RESERVED) );
- return execution_context;
-}
-
-static inline unsigned long get_stack_top(void)
-{
- unsigned long p;
- __asm__ ( "orl %%esp,%0; andl $~3,%0"
- : "=r" (p) : "0" (STACK_SIZE-4) );
- return p;
-}
-
-#define schedule_tail(_p) \
- __asm__ __volatile__ ( \
- "andl %%esp,%0; addl %2,%0; movl %0,%%esp; jmp *%1" \
- : : "r" (~(STACK_SIZE-1)), \
- "r" (unlikely(is_idle_task((_p))) ? \
- continue_cpu_idle_loop : \
- continue_nonidle_task), \
- "i" (STACK_SIZE-STACK_RESERVED) )
-
-
-#endif /* !(_I386_CURRENT_H) */
+++ /dev/null
-#ifndef _I386_DEBUGREG_H
-#define _I386_DEBUGREG_H
-
-
-/* Indicate the register numbers for a number of the specific
- debug registers. Registers 0-3 contain the addresses we wish to trap on */
-#define DR_FIRSTADDR 0 /* u_debugreg[DR_FIRSTADDR] */
-#define DR_LASTADDR 3 /* u_debugreg[DR_LASTADDR] */
-
-#define DR_STATUS 6 /* u_debugreg[DR_STATUS] */
-#define DR_CONTROL 7 /* u_debugreg[DR_CONTROL] */
-
-/* Define a few things for the status register. We can use this to determine
- which debugging register was responsible for the trap. The other bits
- are either reserved or not of interest to us. */
-
-#define DR_TRAP0 (0x1) /* db0 */
-#define DR_TRAP1 (0x2) /* db1 */
-#define DR_TRAP2 (0x4) /* db2 */
-#define DR_TRAP3 (0x8) /* db3 */
-
-#define DR_STEP (0x4000) /* single-step */
-#define DR_SWITCH (0x8000) /* task switch */
-
-/* Now define a bunch of things for manipulating the control register.
- The top two bytes of the control register consist of 4 fields of 4
- bits - each field corresponds to one of the four debug registers,
- and indicates what types of access we trap on, and how large the data
- field is that we are looking at */
-
-#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */
-#define DR_CONTROL_SIZE 4 /* 4 control bits per register */
-
-#define DR_RW_EXECUTE (0x0) /* Settings for the access types to trap on */
-#define DR_RW_WRITE (0x1)
-#define DR_RW_READ (0x3)
-
-#define DR_LEN_1 (0x0) /* Settings for data length to trap on */
-#define DR_LEN_2 (0x4)
-#define DR_LEN_4 (0xC)
-
-/* The low byte to the control register determine which registers are
- enabled. There are 4 fields of two bits. One bit is "local", meaning
- that the processor will reset the bit after a task switch and the other
- is global meaning that we have to explicitly reset the bit. With linux,
- you can use either one, since we explicitly zero the register when we enter
- kernel mode. */
-
-#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
-#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
-#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
-
-#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
-#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */
-
-/* The second byte to the control register has a few special things.
- We can slow the instruction pipeline for instructions coming via the
- gdt or the ldt if we want to. I am not sure why this is an advantage */
-
-#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
-#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
-#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
-
-#endif
+++ /dev/null
-#ifndef _I386_DELAY_H
-#define _I386_DELAY_H
-
-/*
- * Copyright (C) 1993 Linus Torvalds
- *
- * Delay routines calling functions in arch/i386/lib/delay.c
- */
-
-extern unsigned long ticks_per_usec;
-extern void __udelay(unsigned long usecs);
-#define udelay(n) __udelay(n)
-
-#endif /* defined(_I386_DELAY_H) */
+++ /dev/null
-#ifndef __ARCH_DESC_H
-#define __ARCH_DESC_H
-
-#define LDT_ENTRY_SIZE 8
-
-#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
-
-#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
-#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1)
-
-#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY)
-#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
-
-#define load_TR(n) __asm__ __volatile__ ("ltr %%ax" : : "a" (__TSS(n)<<3) )
-
-/*
- * Guest OS must provide its own code selectors, or use the one we provide. The
- * RPL must be 1, as we only create bounce frames to ring 1. Any LDT selector
- * value is okay. Note that checking only the RPL is insufficient: if the
- * selector is poked into an interrupt, trap or call gate then the RPL is
- * ignored when the gate is accessed.
- */
-#define VALID_SEL(_s) \
- (((((_s)>>3) < FIRST_RESERVED_GDT_ENTRY) || \
- (((_s)>>3) > LAST_RESERVED_GDT_ENTRY) || \
- ((_s)&4)) && \
- (((_s)&3) == 1))
-#define VALID_CODESEL(_s) ((_s) == FLAT_RING1_CS || VALID_SEL(_s))
-
-/* These are bitmasks for the first 32 bits of a descriptor table entry. */
-#define _SEGMENT_TYPE (15<< 8)
-#define _SEGMENT_S ( 1<<12) /* System descriptor (yes iff S==0) */
-#define _SEGMENT_DPL ( 3<<13) /* Descriptor Privilege Level */
-#define _SEGMENT_P ( 1<<15) /* Segment Present */
-#define _SEGMENT_G ( 1<<23) /* Granularity */
-
-#ifndef __ASSEMBLY__
-struct desc_struct {
- unsigned long a,b;
-};
-
-extern struct desc_struct gdt_table[];
-extern struct desc_struct *idt, *gdt;
-
-struct Xgt_desc_struct {
- unsigned short size;
- unsigned long address __attribute__((packed));
-};
-
-#define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2))
-#define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2))
-
-extern void set_intr_gate(unsigned int irq, void * addr);
-extern void set_tss_desc(unsigned int n, void *addr);
-
-#endif /* !__ASSEMBLY__ */
-
-#endif
+++ /dev/null
-#ifndef __I386_DIV64
-#define __I386_DIV64
-
-#define do_div(n,base) ({ \
- unsigned long __upper, __low, __high, __mod; \
- asm("":"=a" (__low), "=d" (__high):"A" (n)); \
- __upper = __high; \
- if (__high) { \
- __upper = __high % (base); \
- __high = __high / (base); \
- } \
- asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (base), "0" (__low), "1" (__upper)); \
- asm("":"=A" (n):"a" (__low),"d" (__high)); \
- __mod; \
-})
-
-#endif
+++ /dev/null
-/* $Id: dma.h,v 1.7 1992/12/14 00:29:34 root Exp root $
- * linux/include/asm/dma.h: Defines for using and allocating dma channels.
- * Written by Hennus Bergman, 1992.
- * High DMA channel support & info by Hannu Savolainen
- * and John Boyd, Nov. 1992.
- */
-
-#ifndef _ASM_DMA_H
-#define _ASM_DMA_H
-
-#include <xen/config.h>
-#include <xen/spinlock.h> /* And spinlocks */
-#include <asm/io.h> /* need byte IO */
-#include <xen/delay.h>
-
-
-#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER
-#define dma_outb outb_p
-#else
-#define dma_outb outb
-#endif
-
-#define dma_inb inb
-
-/*
- * NOTES about DMA transfers:
- *
- * controller 1: channels 0-3, byte operations, ports 00-1F
- * controller 2: channels 4-7, word operations, ports C0-DF
- *
- * - ALL registers are 8 bits only, regardless of transfer size
- * - channel 4 is not used - cascades 1 into 2.
- * - channels 0-3 are byte - addresses/counts are for physical bytes
- * - channels 5-7 are word - addresses/counts are for physical words
- * - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries
- * - transfer count loaded to registers is 1 less than actual count
- * - controller 2 offsets are all even (2x offsets for controller 1)
- * - page registers for 5-7 don't use data bit 0, represent 128K pages
- * - page registers for 0-3 use bit 0, represent 64K pages
- *
- * DMA transfers are limited to the lower 16MB of _physical_ memory.
- * Note that addresses loaded into registers must be _physical_ addresses,
- * not logical addresses (which may differ if paging is active).
- *
- * Address mapping for channels 0-3:
- *
- * A23 ... A16 A15 ... A8 A7 ... A0 (Physical addresses)
- * | ... | | ... | | ... |
- * | ... | | ... | | ... |
- * | ... | | ... | | ... |
- * P7 ... P0 A7 ... A0 A7 ... A0
- * | Page | Addr MSB | Addr LSB | (DMA registers)
- *
- * Address mapping for channels 5-7:
- *
- * A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0 (Physical addresses)
- * | ... | \ \ ... \ \ \ ... \ \
- * | ... | \ \ ... \ \ \ ... \ (not used)
- * | ... | \ \ ... \ \ \ ... \
- * P7 ... P1 (0) A7 A6 ... A0 A7 A6 ... A0
- * | Page | Addr MSB | Addr LSB | (DMA registers)
- *
- * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses
- * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at
- * the hardware level, so odd-byte transfers aren't possible).
- *
- * Transfer count (_not # bytes_) is limited to 64K, represented as actual
- * count - 1 : 64K => 0xFFFF, 1 => 0x0000. Thus, count is always 1 or more,
- * and up to 128K bytes may be transferred on channels 5-7 in one operation.
- *
- */
-
-#define MAX_DMA_CHANNELS 8
-
-/* The maximum address that we can perform a DMA transfer to on this platform */
-/*#define MAX_DMA_ADDRESS (PAGE_OFFSET+0x1000000)*/
-
-/* 8237 DMA controllers */
-#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */
-#define IO_DMA2_BASE 0xC0 /* 16 bit master DMA, ch 4(=slave input)..7 */
-
-/* DMA controller registers */
-#define DMA1_CMD_REG 0x08 /* command register (w) */
-#define DMA1_STAT_REG 0x08 /* status register (r) */
-#define DMA1_REQ_REG 0x09 /* request register (w) */
-#define DMA1_MASK_REG 0x0A /* single-channel mask (w) */
-#define DMA1_MODE_REG 0x0B /* mode register (w) */
-#define DMA1_CLEAR_FF_REG 0x0C /* clear pointer flip-flop (w) */
-#define DMA1_TEMP_REG 0x0D /* Temporary Register (r) */
-#define DMA1_RESET_REG 0x0D /* Master Clear (w) */
-#define DMA1_CLR_MASK_REG 0x0E /* Clear Mask */
-#define DMA1_MASK_ALL_REG 0x0F /* all-channels mask (w) */
-
-#define DMA2_CMD_REG 0xD0 /* command register (w) */
-#define DMA2_STAT_REG 0xD0 /* status register (r) */
-#define DMA2_REQ_REG 0xD2 /* request register (w) */
-#define DMA2_MASK_REG 0xD4 /* single-channel mask (w) */
-#define DMA2_MODE_REG 0xD6 /* mode register (w) */
-#define DMA2_CLEAR_FF_REG 0xD8 /* clear pointer flip-flop (w) */
-#define DMA2_TEMP_REG 0xDA /* Temporary Register (r) */
-#define DMA2_RESET_REG 0xDA /* Master Clear (w) */
-#define DMA2_CLR_MASK_REG 0xDC /* Clear Mask */
-#define DMA2_MASK_ALL_REG 0xDE /* all-channels mask (w) */
-
-#define DMA_ADDR_0 0x00 /* DMA address registers */
-#define DMA_ADDR_1 0x02
-#define DMA_ADDR_2 0x04
-#define DMA_ADDR_3 0x06
-#define DMA_ADDR_4 0xC0
-#define DMA_ADDR_5 0xC4
-#define DMA_ADDR_6 0xC8
-#define DMA_ADDR_7 0xCC
-
-#define DMA_CNT_0 0x01 /* DMA count registers */
-#define DMA_CNT_1 0x03
-#define DMA_CNT_2 0x05
-#define DMA_CNT_3 0x07
-#define DMA_CNT_4 0xC2
-#define DMA_CNT_5 0xC6
-#define DMA_CNT_6 0xCA
-#define DMA_CNT_7 0xCE
-
-#define DMA_PAGE_0 0x87 /* DMA page registers */
-#define DMA_PAGE_1 0x83
-#define DMA_PAGE_2 0x81
-#define DMA_PAGE_3 0x82
-#define DMA_PAGE_5 0x8B
-#define DMA_PAGE_6 0x89
-#define DMA_PAGE_7 0x8A
-
-#define DMA_MODE_READ 0x44 /* I/O to memory, no autoinit, increment, single mode */
-#define DMA_MODE_WRITE 0x48 /* memory to I/O, no autoinit, increment, single mode */
-#define DMA_MODE_CASCADE 0xC0 /* pass thru DREQ->HRQ, DACK<-HLDA only */
-
-#define DMA_AUTOINIT 0x10
-
-
-extern spinlock_t dma_spin_lock;
-
-static __inline__ unsigned long claim_dma_lock(void)
-{
- unsigned long flags;
- spin_lock_irqsave(&dma_spin_lock, flags);
- return flags;
-}
-
-static __inline__ void release_dma_lock(unsigned long flags)
-{
- spin_unlock_irqrestore(&dma_spin_lock, flags);
-}
-
-/* enable/disable a specific DMA channel */
-static __inline__ void enable_dma(unsigned int dmanr)
-{
- if (dmanr<=3)
- dma_outb(dmanr, DMA1_MASK_REG);
- else
- dma_outb(dmanr & 3, DMA2_MASK_REG);
-}
-
-static __inline__ void disable_dma(unsigned int dmanr)
-{
- if (dmanr<=3)
- dma_outb(dmanr | 4, DMA1_MASK_REG);
- else
- dma_outb((dmanr & 3) | 4, DMA2_MASK_REG);
-}
-
-/* Clear the 'DMA Pointer Flip Flop'.
- * Write 0 for LSB/MSB, 1 for MSB/LSB access.
- * Use this once to initialize the FF to a known state.
- * After that, keep track of it. :-)
- * --- In order to do that, the DMA routines below should ---
- * --- only be used while holding the DMA lock ! ---
- */
-static __inline__ void clear_dma_ff(unsigned int dmanr)
-{
- if (dmanr<=3)
- dma_outb(0, DMA1_CLEAR_FF_REG);
- else
- dma_outb(0, DMA2_CLEAR_FF_REG);
-}
-
-/* set mode (above) for a specific DMA channel */
-static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
-{
- if (dmanr<=3)
- dma_outb(mode | dmanr, DMA1_MODE_REG);
- else
- dma_outb(mode | (dmanr&3), DMA2_MODE_REG);
-}
-
-/* Set only the page register bits of the transfer address.
- * This is used for successive transfers when we know the contents of
- * the lower 16 bits of the DMA current address register, but a 64k boundary
- * may have been crossed.
- */
-static __inline__ void set_dma_page(unsigned int dmanr, char pagenr)
-{
- switch(dmanr) {
- case 0:
- dma_outb(pagenr, DMA_PAGE_0);
- break;
- case 1:
- dma_outb(pagenr, DMA_PAGE_1);
- break;
- case 2:
- dma_outb(pagenr, DMA_PAGE_2);
- break;
- case 3:
- dma_outb(pagenr, DMA_PAGE_3);
- break;
- case 5:
- dma_outb(pagenr & 0xfe, DMA_PAGE_5);
- break;
- case 6:
- dma_outb(pagenr & 0xfe, DMA_PAGE_6);
- break;
- case 7:
- dma_outb(pagenr & 0xfe, DMA_PAGE_7);
- break;
- }
-}
-
-
-/* Set transfer address & page bits for specific DMA channel.
- * Assumes dma flipflop is clear.
- */
-static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
-{
- set_dma_page(dmanr, a>>16);
- if (dmanr <= 3) {
- dma_outb( a & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
- dma_outb( (a>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
- } else {
- dma_outb( (a>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
- dma_outb( (a>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
- }
-}
-
-
-/* Set transfer size (max 64k for DMA0..3, 128k for DMA5..7) for
- * a specific DMA channel.
- * You must ensure the parameters are valid.
- * NOTE: from a manual: "the number of transfers is one more
- * than the initial word count"! This is taken into account.
- * Assumes dma flip-flop is clear.
- * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
- */
-static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
-{
- count--;
- if (dmanr <= 3) {
- dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
- dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
- } else {
- dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
- dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
- }
-}
-
-
-/* Get DMA residue count. After a DMA transfer, this
- * should return zero. Reading this while a DMA transfer is
- * still in progress will return unpredictable results.
- * If called before the channel has been used, it may return 1.
- * Otherwise, it returns the number of _bytes_ left to transfer.
- *
- * Assumes DMA flip-flop is clear.
- */
-static __inline__ int get_dma_residue(unsigned int dmanr)
-{
- unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE
- : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE;
-
- /* using short to get 16-bit wrap around */
- unsigned short count;
-
- count = 1 + dma_inb(io_port);
- count += dma_inb(io_port) << 8;
-
- return (dmanr<=3)? count : (count<<1);
-}
-
-
-/* These are in kernel/dma.c: */
-extern int request_dma(unsigned int dmanr, const char * device_id); /* reserve a DMA channel */
-extern void free_dma(unsigned int dmanr); /* release it again */
-
-/* From PCI */
-
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#else
-#define isa_dma_bridge_buggy (0)
-#endif
-
-#endif /* _ASM_DMA_H */
+++ /dev/null
-/******************************************************************************
- * domain_page.h
- *
- * Allow temporary mapping of domain page frames into Xen space.
- */
-
-#ifndef __ASM_DOMAIN_PAGE_H__
-#define __ASM_DOMAIN_PAGE_H__
-
-#include <xen/config.h>
-#include <xen/sched.h>
-
-extern unsigned long *mapcache;
-#define MAPCACHE_ENTRIES 1024
-
-/*
- * Maps a given physical address, returning corresponding virtual address.
- * The entire page containing that VA is now accessible until a
- * corresponding call to unmap_domain_mem().
- */
-extern void *map_domain_mem(unsigned long pa);
-
-/*
- * Pass a VA within a page previously mapped with map_domain_mem().
- * That page will then be removed from the mapping lists.
- */
-extern void unmap_domain_mem(void *va);
-
-#endif /* __ASM_DOMAIN_PAGE_H__ */
+++ /dev/null
-/*
- * fixmap.h: compile-time virtual memory allocation
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1998 Ingo Molnar
- *
- * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
- */
-
-#ifndef _ASM_FIXMAP_H
-#define _ASM_FIXMAP_H
-
-#include <xen/config.h>
-#include <asm/acpi.h>
-#include <asm/apicdef.h>
-#include <asm/page.h>
-
-/*
- * Here we define all the compile-time 'special' virtual
- * addresses. The point is to have a constant address at
- * compile time, but to set the physical address only
- * in the boot process. We allocate these special addresses
- * from the end of virtual memory (0xfffff000) backwards.
- * Also this lets us do fail-safe vmalloc(), we
- * can guarantee that these special addresses and
- * vmalloc()-ed addresses never overlap.
- *
- * these 'compile-time allocated' memory buffers are
- * fixed-size 4k pages. (or larger if used with an increment
- * highger than 1) use fixmap_set(idx,phys) to associate
- * physical memory with fixmap indices.
- *
- * TLB entries of such buffers will not be flushed across
- * task switches.
- */
-
-/*
- * on UP currently we will have no trace of the fixmap mechanizm,
- * no page table allocations, etc. This might change in the
- * future, say framebuffers for the console driver(s) could be
- * fix-mapped?
- */
-enum fixed_addresses {
-#ifdef CONFIG_X86_LOCAL_APIC
- FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
-#endif
-#ifdef CONFIG_X86_IO_APIC
- FIX_IO_APIC_BASE_0,
- FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
-#endif
-#ifdef CONFIG_HIGHMEM
- FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
- FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
-#endif
-#ifdef CONFIG_ACPI_BOOT
- FIX_ACPI_BEGIN,
- FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
-#endif
- __end_of_fixed_addresses
-};
-
-extern void __set_fixmap (enum fixed_addresses idx,
- l1_pgentry_t entry);
-
-#define set_fixmap(idx, phys) \
- __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR))
-/*
- * Some hardware wants to get fixmapped without caching.
- */
-#define set_fixmap_nocache(idx, phys) \
- __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR_NOCACHE))
-/*
- * used by vmalloc.c.
- *
- * Leave one empty page between vmalloc'ed areas and
- * the start of the fixmap, and leave one page empty
- * at the top of mem..
- */
-#define FIXADDR_TOP (0xffffe000UL)
-#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
-#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
-
-#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
-
-extern void __this_fixmap_does_not_exist(void);
-
-/*
- * 'index to address' translation. If anyone tries to use the idx
- * directly without tranlation, we catch the bug with a NULL-deference
- * kernel oops. Illegal ranges of incoming indices are caught too.
- */
-static inline unsigned long fix_to_virt(const unsigned int idx)
-{
- /*
- * this branch gets completely eliminated after inlining,
- * except when someone tries to use fixaddr indices in an
- * illegal way. (such as mixing up address types or using
- * out-of-range indices).
- *
- * If it doesn't get removed, the linker will complain
- * loudly with a reasonably clear error message..
- */
- if (idx >= __end_of_fixed_addresses)
- __this_fixmap_does_not_exist();
-
- return __fix_to_virt(idx);
-}
-
-#endif
+++ /dev/null
-/******************************************************************************
- * flushtlb.h
- *
- * TLB flushes are timestamped using a global virtual 'clock' which ticks
- * on any TLB flush on any processor.
- *
- * Copyright (c) 2003, K A Fraser
- */
-
-#ifndef __FLUSHTLB_H__
-#define __FLUSHTLB_H__
-
-#include <xen/smp.h>
-
-/*
- * Every time the TLB clock passes an "epoch", every CPU's TLB is flushed.
- * Therefore, if the current TLB time and a previously-read timestamp differ
- * in their significant bits (i.e., ~TLBCLOCK_EPOCH_MASK), then the TLB clock
- * has wrapped at least once and every CPU's TLB is guaranteed to have been
- * flushed meanwhile.
- * This allows us to deal gracefully with a bounded (a.k.a. wrapping) clock.
- */
-#define TLBCLOCK_EPOCH_MASK ((1U<<16)-1)
-
-/*
- * 'cpu_stamp' is the current timestamp for the CPU we are testing.
- * 'lastuse_stamp' is a timestamp taken when the PFN we are testing was last
- * used for a purpose that may have caused the CPU's TLB to become tainted.
- */
-static inline int NEED_FLUSH(u32 cpu_stamp, u32 lastuse_stamp)
-{
- /*
- * Why does this work?
- * 1. XOR sets high-order bits determines if stamps from differing epochs.
- * 2. Subtraction sets high-order bits if 'cpu_stamp > lastuse_stamp'.
- * In either case a flush is unnecessary: we therefore OR the results from
- * (1) and (2), mask the high-order bits, and return the inverse.
- */
- return !(((lastuse_stamp^cpu_stamp)|(lastuse_stamp-cpu_stamp)) &
- ~TLBCLOCK_EPOCH_MASK);
-}
-
-extern u32 tlbflush_clock;
-extern u32 tlbflush_time[NR_CPUS];
-
-extern void tlb_clocktick(void);
-extern void new_tlbflush_clock_period(void);
-
-#endif /* __FLUSHTLB_H__ */
+++ /dev/null
-#ifndef __ASM_HARDIRQ_H
-#define __ASM_HARDIRQ_H
-
-#include <xen/config.h>
-#include <xen/irq.h>
-
-/* assembly code in softirq.h is sensitive to the offsets of these fields */
-typedef struct {
- unsigned int __softirq_pending;
- unsigned int __local_irq_count;
- unsigned int __local_bh_count;
- unsigned int __syscall_count;
- unsigned int __nmi_count;
- unsigned long idle_timestamp;
-} ____cacheline_aligned irq_cpustat_t;
-
-#include <xen/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
-
-/*
- * Are we in an interrupt context? Either doing bottom half
- * or hardware interrupt processing?
- */
-#define in_interrupt() ({ int __cpu = smp_processor_id(); \
- (local_irq_count(__cpu) + local_bh_count(__cpu) != 0); })
-
-#define in_irq() (local_irq_count(smp_processor_id()) != 0)
-
-#ifndef CONFIG_SMP
-
-#define hardirq_trylock(cpu) (local_irq_count(cpu) == 0)
-#define hardirq_endlock(cpu) do { } while (0)
-
-#define irq_enter(cpu, irq) (local_irq_count(cpu)++)
-#define irq_exit(cpu, irq) (local_irq_count(cpu)--)
-
-#define synchronize_irq() barrier()
-
-#else
-
-#include <asm/atomic.h>
-#include <asm/smp.h>
-
-extern unsigned char global_irq_holder;
-extern unsigned volatile long global_irq_lock; /* long for set_bit -RR */
-
-static inline int irqs_running (void)
-{
- int i;
-
- for (i = 0; i < smp_num_cpus; i++)
- if (local_irq_count(i))
- return 1;
- return 0;
-}
-
-static inline void release_irqlock(int cpu)
-{
- /* if we didn't own the irq lock, just ignore.. */
- if (global_irq_holder == (unsigned char) cpu) {
- global_irq_holder = NO_PROC_ID;
- clear_bit(0,&global_irq_lock);
- }
-}
-
-static inline void irq_enter(int cpu, int irq)
-{
- ++local_irq_count(cpu);
-
- smp_mb();
-
- while (test_bit(0,&global_irq_lock)) {
- cpu_relax();
- }
-}
-
-static inline void irq_exit(int cpu, int irq)
-{
- --local_irq_count(cpu);
-}
-
-static inline int hardirq_trylock(int cpu)
-{
- return !local_irq_count(cpu) && !test_bit(0,&global_irq_lock);
-}
-
-#define hardirq_endlock(cpu) do { } while (0)
-
-extern void synchronize_irq(void);
-
-#endif /* CONFIG_SMP */
-
-#endif /* __ASM_HARDIRQ_H */
+++ /dev/null
-/*
- * linux/include/asm-i386/hdreg.h
- *
- * Copyright (C) 1994-1996 Linus Torvalds & authors
- */
-
-#ifndef __ASMi386_HDREG_H
-#define __ASMi386_HDREG_H
-
-//typedef unsigned short ide_ioreg_t;
-typedef unsigned long ide_ioreg_t;
-
-#endif /* __ASMi386_HDREG_H */
+++ /dev/null
-/*
- * include/asm-i386/i387.h
- *
- * Copyright (C) 1994 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- * Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-#ifndef __ASM_I386_I387_H
-#define __ASM_I386_I387_H
-
-#include <xen/sched.h>
-#include <asm/processor.h>
-
-extern void init_fpu(void);
-extern void save_init_fpu( struct task_struct *tsk );
-extern void restore_fpu( struct task_struct *tsk );
-
-#define unlazy_fpu( tsk ) do { \
- if ( test_bit(PF_USEDFPU, &tsk->flags) ) \
- save_init_fpu( tsk ); \
-} while (0)
-
-#define clear_fpu( tsk ) do { \
- if ( test_and_clear_bit(PF_USEDFPU, &tsk->flags) ) { \
- asm volatile("fwait"); \
- stts(); \
- } \
-} while (0)
-
-#define load_mxcsr( val ) do { \
- unsigned long __mxcsr = ((unsigned long)(val) & 0xffbf); \
- asm volatile( "ldmxcsr %0" : : "m" (__mxcsr) ); \
-} while (0)
-
-#endif /* __ASM_I386_I387_H */
+++ /dev/null
-/*
- * linux/include/asm-i386/ide.h
- *
- * Copyright (C) 1994-1996 Linus Torvalds & authors
- */
-
-/*
- * This file contains the i386 architecture specific IDE code.
- */
-
-#ifndef __ASMi386_IDE_H
-#define __ASMi386_IDE_H
-
-#ifdef __KERNEL__
-
-#include <xen/config.h>
-
-#ifndef MAX_HWIFS
-# ifdef CONFIG_BLK_DEV_IDEPCI
-#define MAX_HWIFS 10
-# else
-#define MAX_HWIFS 6
-# endif
-#endif
-
-#define ide__sti() __sti()
-
-static __inline__ int ide_default_irq(ide_ioreg_t base)
-{
- switch (base) {
- case 0x1f0: return 14;
- case 0x170: return 15;
- case 0x1e8: return 11;
- case 0x168: return 10;
- case 0x1e0: return 8;
- case 0x160: return 12;
- default:
- return 0;
- }
-}
-
-static __inline__ ide_ioreg_t ide_default_io_base(int index)
-{
- switch (index) {
- case 0: return 0x1f0;
- case 1: return 0x170;
- case 2: return 0x1e8;
- case 3: return 0x168;
- case 4: return 0x1e0;
- case 5: return 0x160;
- default:
- return 0;
- }
-}
-
-static __inline__ void ide_init_hwif_ports(hw_regs_t *hw, ide_ioreg_t data_port, ide_ioreg_t ctrl_port, int *irq)
-{
- ide_ioreg_t reg = data_port;
- int i;
-
- for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
- hw->io_ports[i] = reg;
- reg += 1;
- }
- if (ctrl_port) {
- hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port;
- } else {
- hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206;
- }
- if (irq != NULL)
- *irq = 0;
- hw->io_ports[IDE_IRQ_OFFSET] = 0;
-}
-
-static __inline__ void ide_init_default_hwifs(void)
-{
-#ifndef CONFIG_BLK_DEV_IDEPCI
- hw_regs_t hw;
- int index;
-
- for(index = 0; index < MAX_HWIFS; index++) {
- ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL);
- hw.irq = ide_default_irq(ide_default_io_base(index));
- ide_register_hw(&hw, NULL);
- }
-#endif /* CONFIG_BLK_DEV_IDEPCI */
-}
-
-typedef union {
- unsigned all : 8; /* all of the bits together */
- struct {
- unsigned head : 4; /* always zeros here */
- unsigned unit : 1; /* drive select number, 0 or 1 */
- unsigned bit5 : 1; /* always 1 */
- unsigned lba : 1; /* using LBA instead of CHS */
- unsigned bit7 : 1; /* always 1 */
- } b;
-} select_t;
-
-typedef union {
- unsigned all : 8; /* all of the bits together */
- struct {
- unsigned bit0 : 1;
- unsigned nIEN : 1; /* device INTRQ to host */
- unsigned SRST : 1; /* host soft reset bit */
- unsigned bit3 : 1; /* ATA-2 thingy */
- unsigned reserved456 : 3;
- unsigned HOB : 1; /* 48-bit address ordering */
- } b;
-} control_t;
-
-#define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id))
-#define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id))
-#define ide_check_region(from,extent) check_region((from), (extent))
-#define ide_request_region(from,extent,name) request_region((from), (extent), (name))
-#define ide_release_region(from,extent) release_region((from), (extent))
-
-/*
- * The following are not needed for the non-m68k ports
- */
-#define ide_ack_intr(hwif) (1)
-#define ide_fix_driveid(id) do {} while (0)
-#define ide_release_lock(lock) do {} while (0)
-#define ide_get_lock(lock, hdlr, data) do {} while (0)
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASMi386_IDE_H */
+++ /dev/null
-#ifndef _ASM_IO_H
-#define _ASM_IO_H
-
-#include <xen/config.h>
-#include <asm/page.h>
-
-#define IO_SPACE_LIMIT 0xffff
-
-/*#include <xen/vmalloc.h>*/
-
-/*
- * Temporary debugging check to catch old code using
- * unmapped ISA addresses. Will be removed in 2.4.
- */
-#if CONFIG_DEBUG_IOVIRT
- extern void *__io_virt_debug(unsigned long x, const char *file, int line);
- extern unsigned long __io_phys_debug(unsigned long x, const char *file, int line);
- #define __io_virt(x) __io_virt_debug((unsigned long)(x), __FILE__, __LINE__)
-//#define __io_phys(x) __io_phys_debug((unsigned long)(x), __FILE__, __LINE__)
-#else
- #define __io_virt(x) ((void *)(x))
-//#define __io_phys(x) __pa(x)
-#endif
-
-
-/**
- * virt_to_phys - map virtual addresses to physical
- * @address: address to remap
- *
- * The returned physical address is the physical (CPU) mapping for
- * the memory address given. It is only valid to use this function on
- * addresses directly mapped or allocated via kmalloc.
- *
- * This function does not give bus mappings for DMA transfers. In
- * almost all conceivable cases a device driver should not be using
- * this function
- */
-
-static inline unsigned long virt_to_phys(volatile void * address)
-{
- return __pa(address);
-}
-
-/**
- * phys_to_virt - map physical address to virtual
- * @address: address to remap
- *
- * The returned virtual address is a current CPU mapping for
- * the memory address given. It is only valid to use this function on
- * addresses that have a kernel mapping
- *
- * This function does not handle bus mappings for DMA transfers. In
- * almost all conceivable cases a device driver should not be using
- * this function
- */
-
-static inline void * phys_to_virt(unsigned long address)
-{
- return __va(address);
-}
-
-/*
- * Change "struct pfn_info" to physical address.
- */
-#ifdef CONFIG_HIGHMEM64G
-#define page_to_phys(page) ((u64)(page - frame_table) << PAGE_SHIFT)
-#else
-#define page_to_phys(page) ((page - frame_table) << PAGE_SHIFT)
-#endif
-
-#define page_to_pfn(_page) ((unsigned long)((_page) - frame_table))
-#define page_to_virt(_page) phys_to_virt(page_to_phys(_page))
-
-
-extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
-
-static inline void * ioremap (unsigned long offset, unsigned long size)
-{
- return __ioremap(offset, size, 0);
-}
-
-/*
- * This one maps high address device memory and turns off caching for that area.
- * it's useful if some control registers are in such an area and write combining
- * or read caching is not desirable:
- */
-static inline void * ioremap_nocache (unsigned long offset, unsigned long size)
-{
- return __ioremap(offset, size, _PAGE_PCD);
-}
-
-extern void iounmap(void *addr);
-
-/*
- * IO bus memory addresses are also 1:1 with the physical address
- */
-#define virt_to_bus virt_to_phys
-#define bus_to_virt phys_to_virt
-#define page_to_bus page_to_phys
-
-/*
- * readX/writeX() are used to access memory mapped devices. On some
- * architectures the memory mapped IO stuff needs to be accessed
- * differently. On the x86 architecture, we just read/write the
- * memory location directly.
- */
-
-#define readb(addr) (*(volatile unsigned char *) __io_virt(addr))
-#define readw(addr) (*(volatile unsigned short *) __io_virt(addr))
-#define readl(addr) (*(volatile unsigned int *) __io_virt(addr))
-#define __raw_readb readb
-#define __raw_readw readw
-#define __raw_readl readl
-
-#define writeb(b,addr) (*(volatile unsigned char *) __io_virt(addr) = (b))
-#define writew(b,addr) (*(volatile unsigned short *) __io_virt(addr) = (b))
-#define writel(b,addr) (*(volatile unsigned int *) __io_virt(addr) = (b))
-#define __raw_writeb writeb
-#define __raw_writew writew
-#define __raw_writel writel
-
-#define memset_io(a,b,c) memset(__io_virt(a),(b),(c))
-#define memcpy_fromio(a,b,c) memcpy((a),__io_virt(b),(c))
-#define memcpy_toio(a,b,c) memcpy(__io_virt(a),(b),(c))
-
-/*
- * ISA space is 'always mapped' on a typical x86 system, no need to
- * explicitly ioremap() it. The fact that the ISA IO space is mapped
- * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
- * are physical addresses. The following constant pointer can be
- * used as the IO-area pointer (it can be iounmapped as well, so the
- * analogy with PCI is quite large):
- */
-#define __ISA_IO_base ((char *)(PAGE_OFFSET))
-
-#define isa_readb(a) readb(__ISA_IO_base + (a))
-#define isa_readw(a) readw(__ISA_IO_base + (a))
-#define isa_readl(a) readl(__ISA_IO_base + (a))
-#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a))
-#define isa_writew(w,a) writew(w,__ISA_IO_base + (a))
-#define isa_writel(l,a) writel(l,__ISA_IO_base + (a))
-#define isa_memset_io(a,b,c) memset_io(__ISA_IO_base + (a),(b),(c))
-#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),__ISA_IO_base + (b),(c))
-#define isa_memcpy_toio(a,b,c) memcpy_toio(__ISA_IO_base + (a),(b),(c))
-
-
-/*
- * Again, i386 does not require mem IO specific function.
- */
-
-#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(b),(c),(d))
-#define isa_eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(__ISA_IO_base + (b)),(c),(d))
-
-static inline int check_signature(unsigned long io_addr,
- const unsigned char *signature, int length)
-{
- int retval = 0;
- do {
- if (readb(io_addr) != *signature)
- goto out;
- io_addr++;
- signature++;
- length--;
- } while (length);
- retval = 1;
-out:
- return retval;
-}
-
-static inline int isa_check_signature(unsigned long io_addr,
- const unsigned char *signature, int length)
-{
- int retval = 0;
- do {
- if (isa_readb(io_addr) != *signature)
- goto out;
- io_addr++;
- signature++;
- length--;
- } while (length);
- retval = 1;
-out:
- return retval;
-}
-
-/*
- * Cache management
- *
- * This needed for two cases
- * 1. Out of order aware processors
- * 2. Accidentally out of order processors (PPro errata #51)
- */
-
-#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
-
-static inline void flush_write_buffers(void)
-{
- __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory");
-}
-
-#define dma_cache_inv(_start,_size) flush_write_buffers()
-#define dma_cache_wback(_start,_size) flush_write_buffers()
-#define dma_cache_wback_inv(_start,_size) flush_write_buffers()
-
-#else
-
-/* Nothing to do */
-
-#define dma_cache_inv(_start,_size) do { } while (0)
-#define dma_cache_wback(_start,_size) do { } while (0)
-#define dma_cache_wback_inv(_start,_size) do { } while (0)
-#define flush_write_buffers()
-
-#endif
-
-#ifdef SLOW_IO_BY_JUMPING
-#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:"
-#else
-#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
-#endif
-
-#ifdef REALLY_SLOW_IO
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
-#else
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
-#endif
-
-
-/*
- * Talk about misusing macros..
- */
-#define __OUT1(s,x) \
-static inline void out##s(unsigned x value, unsigned short port) {
-
-#define __OUT2(s,s1,s2) \
-__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
-
-#define __OUT(s,s1,x) \
-__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));}
-
-#define __IN1(s) \
-static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
-
-#define __IN2(s,s1,s2) \
-__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
-
-#define __IN(s,s1,i...) \
-__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; }
-
-#define __INS(s) \
-static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
-{ __asm__ __volatile__ ("rep ; ins" #s \
-: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
-
-#define __OUTS(s) \
-static inline void outs##s(unsigned short port, const void * addr, unsigned long count) \
-{ __asm__ __volatile__ ("rep ; outs" #s \
-: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
-
-#define RETURN_TYPE unsigned char
-__IN(b,"")
-#undef RETURN_TYPE
-#define RETURN_TYPE unsigned short
-__IN(w,"")
-#undef RETURN_TYPE
-#define RETURN_TYPE unsigned int
-__IN(l,"")
-#undef RETURN_TYPE
-
-__OUT(b,"b",char)
-__OUT(w,"w",short)
-__OUT(l,,int)
-
-__INS(b)
-__INS(w)
-__INS(l)
-
-__OUTS(b)
-__OUTS(w)
-__OUTS(l)
-
-#endif
+++ /dev/null
-#ifndef __ASM_IO_APIC_H
-#define __ASM_IO_APIC_H
-
-#include <xen/config.h>
-#include <xen/types.h>
-
-/*
- * Intel IO-APIC support for SMP and UP systems.
- *
- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
- */
-
-#ifdef CONFIG_X86_IO_APIC
-
-#define APIC_MISMATCH_DEBUG
-
-#define IO_APIC_BASE(idx) \
- ((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \
- + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK)))
-
-/*
- * The structure of the IO-APIC:
- */
-struct IO_APIC_reg_00 {
- __u32 __reserved_2 : 14,
- LTS : 1,
- delivery_type : 1,
- __reserved_1 : 8,
- ID : 4,
- __reserved_0 : 4;
-} __attribute__ ((packed));
-
-struct IO_APIC_reg_01 {
- __u32 version : 8,
- __reserved_2 : 7,
- PRQ : 1,
- entries : 8,
- __reserved_1 : 8;
-} __attribute__ ((packed));
-
-struct IO_APIC_reg_02 {
- __u32 __reserved_2 : 24,
- arbitration : 4,
- __reserved_1 : 4;
-} __attribute__ ((packed));
-
-struct IO_APIC_reg_03 {
- __u32 boot_DT : 1,
- __reserved_1 : 31;
-} __attribute__ ((packed));
-
-/*
- * # of IO-APICs and # of IRQ routing registers
- */
-extern int nr_ioapics;
-extern int nr_ioapic_registers[MAX_IO_APICS];
-
-enum ioapic_irq_destination_types {
- dest_Fixed = 0,
- dest_LowestPrio = 1,
- dest_SMI = 2,
- dest__reserved_1 = 3,
- dest_NMI = 4,
- dest_INIT = 5,
- dest__reserved_2 = 6,
- dest_ExtINT = 7
-};
-
-struct IO_APIC_route_entry {
- __u32 vector : 8,
- delivery_mode : 3, /* 000: FIXED
- * 001: lowest prio
- * 111: ExtINT
- */
- dest_mode : 1, /* 0: physical, 1: logical */
- delivery_status : 1,
- polarity : 1,
- irr : 1,
- trigger : 1, /* 0: edge, 1: level */
- mask : 1, /* 0: enabled, 1: disabled */
- __reserved_2 : 15;
-
- union { struct { __u32
- __reserved_1 : 24,
- physical_dest : 4,
- __reserved_2 : 4;
- } physical;
-
- struct { __u32
- __reserved_1 : 24,
- logical_dest : 8;
- } logical;
- } dest;
-
-} __attribute__ ((packed));
-
-/*
- * MP-BIOS irq configuration table structures:
- */
-
-/* I/O APIC entries */
-extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
-
-/* # of MP IRQ source entries */
-extern int mp_irq_entries;
-
-/* MP IRQ source entries */
-extern struct mpc_config_intsrc *mp_irqs;
-
-/* non-0 if default (table-less) MP configuration */
-extern int mpc_default_type;
-
-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
-{
- *IO_APIC_BASE(apic) = reg;
- return *(IO_APIC_BASE(apic)+4);
-}
-
-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
-{
- *IO_APIC_BASE(apic) = reg;
- *(IO_APIC_BASE(apic)+4) = value;
-}
-
-/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
-static inline void io_apic_sync(unsigned int apic)
-{
- (void) *(IO_APIC_BASE(apic)+4);
-}
-
-/*
- * If we use the IO-APIC for IRQ routing, disable automatic
- * assignment of PCI IRQ's.
- */
-#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup)
-
-#ifdef CONFIG_ACPI_BOOT
-extern int io_apic_get_unique_id (int ioapic, int apic_id);
-extern int io_apic_get_version (int ioapic);
-extern int io_apic_get_redir_entries (int ioapic);
-extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low);
-#endif
-
-extern int skip_ioapic_setup; /* 1 for "noapic" */
-
-static inline void disable_ioapic_setup(void)
-{
- skip_ioapic_setup = 1;
-}
-
-static inline int ioapic_setup_disabled(void)
-{
- return skip_ioapic_setup;
-}
-
-#else /* !CONFIG_X86_IO_APIC */
-#define io_apic_assign_pci_irqs 0
-
-static inline void disable_ioapic_setup(void)
-{ }
-
-#endif /* !CONFIG_X86_IO_APIC */
-
-#endif
+++ /dev/null
-#ifndef _ASM_HW_IRQ_H
-#define _ASM_HW_IRQ_H
-
-/* (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar */
-
-#include <xen/config.h>
-#include <asm/atomic.h>
-
-#define SA_INTERRUPT 0x20000000
-#define SA_SHIRQ 0x04000000
-#define SA_NOPROFILE 0x02000000
-
-#define SA_SAMPLE_RANDOM 0 /* Linux driver compatibility */
-
-#define TIMER_IRQ 0
-
-extern void disable_irq(unsigned int);
-extern void disable_irq_nosync(unsigned int);
-extern void enable_irq(unsigned int);
-
-/*
- * IDT vectors usable for external interrupt sources start
- * at 0x20:
- */
-#define FIRST_EXTERNAL_VECTOR 0x30
-
-#define NR_IRQS (256 - FIRST_EXTERNAL_VECTOR)
-
-#define HYPERVISOR_CALL_VECTOR 0x82
-
-/*
- * Vectors 0x30-0x3f are used for ISA interrupts.
- */
-
-/*
- * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
- *
- * some of the following vectors are 'rare', they are merged
- * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
- * TLB, reschedule and local APIC vectors are performance-critical.
- *
- * Vectors 0xf0-0xfa are free (reserved for future Linux use).
- */
-#define SPURIOUS_APIC_VECTOR 0xff
-#define ERROR_APIC_VECTOR 0xfe
-#define INVALIDATE_TLB_VECTOR 0xfd
-#define EVENT_CHECK_VECTOR 0xfc
-#define CALL_FUNCTION_VECTOR 0xfb
-#define KDB_VECTOR 0xfa
-
-/*
- * Local APIC timer IRQ vector is on a different priority level,
- * to work around the 'lost local interrupt if more than 2 IRQ
- * sources per level' errata.
- */
-#define LOCAL_TIMER_VECTOR 0xef
-
-/*
- * First APIC vector available to drivers: (vectors 0x40-0xee)
- * we start at 0x41 to spread out vectors evenly between priority
- * levels. (0x82 is the syscall vector)
- */
-#define FIRST_DEVICE_VECTOR 0x41
-#define FIRST_SYSTEM_VECTOR 0xef
-
-extern int irq_vector[NR_IRQS];
-#define IO_APIC_VECTOR(irq) irq_vector[irq]
-
-/*
- * Various low-level irq details needed by irq.c, process.c,
- * time.c, io_apic.c and smp.c
- *
- * Interrupt entry/exit code at both C and assembly level
- */
-
-extern void mask_irq(unsigned int irq);
-extern void unmask_irq(unsigned int irq);
-extern void disable_8259A_irq(unsigned int irq);
-extern void enable_8259A_irq(unsigned int irq);
-extern int i8259A_irq_pending(unsigned int irq);
-extern void make_8259A_irq(unsigned int irq);
-extern void init_8259A(int aeoi);
-extern void FASTCALL(send_IPI_self(int vector));
-extern void init_VISWS_APIC_irqs(void);
-extern void setup_IO_APIC(void);
-extern void disable_IO_APIC(void);
-extern void print_IO_APIC(void);
-extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
-extern void send_IPI(int dest, int vector);
-
-extern unsigned long io_apic_irqs;
-
-extern atomic_t irq_err_count;
-extern atomic_t irq_mis_count;
-
-extern char _stext, _etext;
-
-#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
-
-#define __STR(x) #x
-#define STR(x) __STR(x)
-
-#define SAVE_ALL \
- "cld\n\t" \
- "pushl %gs\n\t" \
- "pushl %fs\n\t" \
- "pushl %es\n\t" \
- "pushl %ds\n\t" \
- "pushl %eax\n\t" \
- "pushl %ebp\n\t" \
- "pushl %edi\n\t" \
- "pushl %esi\n\t" \
- "pushl %edx\n\t" \
- "pushl %ecx\n\t" \
- "pushl %ebx\n\t" \
- "movl $" STR(__HYPERVISOR_DS) ",%edx\n\t" \
- "movl %edx,%ds\n\t" \
- "movl %edx,%es\n\t" \
- "movl %edx,%fs\n\t" \
- "movl %edx,%gs\n\t"
-
-#define IRQ_NAME2(nr) nr##_interrupt(void)
-#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
-
-/*
- * SMP has a few special interrupts for IPI messages
- */
-
- /* there is a second layer of macro just to get the symbolic
- name for the vector evaluated. This change is for RTLinux */
-#define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v)
-#define XBUILD_SMP_INTERRUPT(x,v)\
-asmlinkage void x(void); \
-asmlinkage void call_##x(void); \
-__asm__( \
-"\n"__ALIGN_STR"\n" \
-SYMBOL_NAME_STR(x) ":\n\t" \
- "pushl $"#v"-256\n\t" \
- SAVE_ALL \
- SYMBOL_NAME_STR(call_##x)":\n\t" \
- "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \
- "jmp ret_from_intr\n");
-
-#define BUILD_SMP_TIMER_INTERRUPT(x,v) XBUILD_SMP_TIMER_INTERRUPT(x,v)
-#define XBUILD_SMP_TIMER_INTERRUPT(x,v) \
-asmlinkage void x(struct pt_regs * regs); \
-asmlinkage void call_##x(void); \
-__asm__( \
-"\n"__ALIGN_STR"\n" \
-SYMBOL_NAME_STR(x) ":\n\t" \
- "pushl $"#v"-256\n\t" \
- SAVE_ALL \
- "movl %esp,%eax\n\t" \
- "pushl %eax\n\t" \
- SYMBOL_NAME_STR(call_##x)":\n\t" \
- "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \
- "addl $4,%esp\n\t" \
- "jmp ret_from_intr\n");
-
-#define BUILD_COMMON_IRQ() \
-asmlinkage void call_do_IRQ(void); \
-__asm__( \
- "\n" __ALIGN_STR"\n" \
- "common_interrupt:\n\t" \
- SAVE_ALL \
- SYMBOL_NAME_STR(call_do_IRQ)":\n\t" \
- "call " SYMBOL_NAME_STR(do_IRQ) "\n\t" \
- "jmp ret_from_intr\n");
-
-/*
- * subtle. orig_eax is used by the signal code to distinct between
- * system calls and interrupted 'random user-space'. Thus we have
- * to put a negative value into orig_eax here. (the problem is that
- * both system calls and IRQs want to have small integer numbers in
- * orig_eax, and the syscall code has won the optimization conflict ;)
- *
- * Subtle as a pigs ear. VY
- */
-
-#define BUILD_IRQ(nr) \
-asmlinkage void IRQ_NAME(nr); \
-__asm__( \
-"\n"__ALIGN_STR"\n" \
-SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \
- "pushl $"#nr"-256\n\t" \
- "jmp common_interrupt");
-
-extern unsigned long prof_cpu_mask;
-extern unsigned int * prof_buffer;
-extern unsigned long prof_len;
-extern unsigned long prof_shift;
-
-#include <xen/irq.h>
-
-#if defined(CONFIG_X86_IO_APIC)
-static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {
- if (IO_APIC_IRQ(i))
- send_IPI_self(IO_APIC_VECTOR(i));
-}
-#else
-static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {}
-#endif
-
-#endif /* _ASM_HW_IRQ_H */
+++ /dev/null
-#ifndef __ARCH_LDT_H
-#define __ARCH_LDT_H
-
-#ifndef __ASSEMBLY__
-
-static inline void load_LDT(struct task_struct *p)
-{
- unsigned int cpu;
- struct desc_struct *desc;
- unsigned long ents;
-
- if ( (ents = p->mm.ldt_ents) == 0 )
- {
- __asm__ __volatile__ ( "lldt %%ax" : : "a" (0) );
- }
- else
- {
- cpu = smp_processor_id();
- desc = (struct desc_struct *)GET_GDT_ADDRESS(p) + __LDT(cpu);
- desc->a = ((LDT_VIRT_START&0xffff)<<16) | (ents*8-1);
- desc->b = (LDT_VIRT_START&(0xff<<24)) | 0x8200 |
- ((LDT_VIRT_START&0xff0000)>>16);
- __asm__ __volatile__ ( "lldt %%ax" : : "a" (__LDT(cpu)<<3) );
- }
-}
-
-#endif /* !__ASSEMBLY__ */
-
-#endif
+++ /dev/null
-/*
- * Machine dependent access functions for RTC registers.
- */
-#ifndef _ASM_MC146818RTC_H
-#define _ASM_MC146818RTC_H
-
-#include <asm/io.h>
-#include <xen/spinlock.h>
-
-extern spinlock_t rtc_lock; /* serialize CMOS RAM access */
-
-/**********************************************************************
- * register summary
- **********************************************************************/
-#define RTC_SECONDS 0
-#define RTC_SECONDS_ALARM 1
-#define RTC_MINUTES 2
-#define RTC_MINUTES_ALARM 3
-#define RTC_HOURS 4
-#define RTC_HOURS_ALARM 5
-/* RTC_*_alarm is always true if 2 MSBs are set */
-# define RTC_ALARM_DONT_CARE 0xC0
-
-#define RTC_DAY_OF_WEEK 6
-#define RTC_DAY_OF_MONTH 7
-#define RTC_MONTH 8
-#define RTC_YEAR 9
-
-/* control registers - Moto names
- */
-#define RTC_REG_A 10
-#define RTC_REG_B 11
-#define RTC_REG_C 12
-#define RTC_REG_D 13
-
-/**********************************************************************
- * register details
- **********************************************************************/
-#define RTC_FREQ_SELECT RTC_REG_A
-
-/* update-in-progress - set to "1" 244 microsecs before RTC goes off the bus,
- * reset after update (may take 1.984ms @ 32768Hz RefClock) is complete,
- * totalling to a max high interval of 2.228 ms.
- */
-# define RTC_UIP 0x80
-# define RTC_DIV_CTL 0x70
- /* divider control: refclock values 4.194 / 1.049 MHz / 32.768 kHz */
-# define RTC_REF_CLCK_4MHZ 0x00
-# define RTC_REF_CLCK_1MHZ 0x10
-# define RTC_REF_CLCK_32KHZ 0x20
- /* 2 values for divider stage reset, others for "testing purposes only" */
-# define RTC_DIV_RESET1 0x60
-# define RTC_DIV_RESET2 0x70
- /* Periodic intr. / Square wave rate select. 0=none, 1=32.8kHz,... 15=2Hz */
-# define RTC_RATE_SELECT 0x0F
-
-/**********************************************************************/
-#define RTC_CONTROL RTC_REG_B
-# define RTC_SET 0x80 /* disable updates for clock setting */
-# define RTC_PIE 0x40 /* periodic interrupt enable */
-# define RTC_AIE 0x20 /* alarm interrupt enable */
-# define RTC_UIE 0x10 /* update-finished interrupt enable */
-# define RTC_SQWE 0x08 /* enable square-wave output */
-# define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */
-# define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */
-# define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */
-
-/**********************************************************************/
-#define RTC_INTR_FLAGS RTC_REG_C
-/* caution - cleared by read */
-# define RTC_IRQF 0x80 /* any of the following 3 is active */
-# define RTC_PF 0x40
-# define RTC_AF 0x20
-# define RTC_UF 0x10
-
-/**********************************************************************/
-#define RTC_VALID RTC_REG_D
-# define RTC_VRT 0x80 /* valid RAM and time */
-/**********************************************************************/
-
-/* example: !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)
- * determines if the following two #defines are needed
- */
-#ifndef BCD_TO_BIN
-#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10)
-#endif
-
-#ifndef BIN_TO_BCD
-#define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10)
-#endif
-
-
-#ifndef RTC_PORT
-#define RTC_PORT(x) (0x70 + (x))
-#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */
-#endif
-
-/*
- * The yet supported machines all access the RTC index register via
- * an ISA port access but the way to access the date register differs ...
- */
-#define CMOS_READ(addr) ({ \
-outb_p((addr),RTC_PORT(0)); \
-inb_p(RTC_PORT(1)); \
-})
-#define CMOS_WRITE(val, addr) ({ \
-outb_p((addr),RTC_PORT(0)); \
-outb_p((val),RTC_PORT(1)); \
-})
-
-#define RTC_IRQ 8
-
-#endif /* _ASM_MC146818RTC_H */
+++ /dev/null
-#ifndef __ASM_MPSPEC_H
-#define __ASM_MPSPEC_H
-
-#include <xen/config.h>
-#include <xen/types.h>
-
-/*
- * Structure definitions for SMP machines following the
- * Intel Multiprocessing Specification 1.1 and 1.4.
- */
-
-/*
- * This tag identifies where the SMP configuration
- * information is.
- */
-
-#define SMP_MAGIC_IDENT (('_'<<24)|('P'<<16)|('M'<<8)|'_')
-
-/*
- * a maximum of 16 APICs with the current APIC ID architecture.
- * xAPICs can have up to 256. SAPICs have 16 ID bits.
- */
-#ifdef CONFIG_X86_CLUSTERED_APIC
-#define MAX_APICS 256
-#else
-#define MAX_APICS 16
-#endif
-
-#define MAX_MPC_ENTRY 1024
-
-struct intel_mp_floating
-{
- char mpf_signature[4]; /* "_MP_" */
- unsigned long mpf_physptr; /* Configuration table address */
- unsigned char mpf_length; /* Our length (paragraphs) */
- unsigned char mpf_specification;/* Specification version */
- unsigned char mpf_checksum; /* Checksum (makes sum 0) */
- unsigned char mpf_feature1; /* Standard or configuration ? */
- unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */
- unsigned char mpf_feature3; /* Unused (0) */
- unsigned char mpf_feature4; /* Unused (0) */
- unsigned char mpf_feature5; /* Unused (0) */
-};
-
-struct mp_config_table
-{
- char mpc_signature[4];
-#define MPC_SIGNATURE "PCMP"
- unsigned short mpc_length; /* Size of table */
- char mpc_spec; /* 0x01 */
- char mpc_checksum;
- char mpc_oem[8];
- char mpc_productid[12];
- unsigned long mpc_oemptr; /* 0 if not present */
- unsigned short mpc_oemsize; /* 0 if not present */
- unsigned short mpc_oemcount;
- unsigned long mpc_lapic; /* APIC address */
- unsigned long reserved;
-};
-
-/* Followed by entries */
-
-#define MP_PROCESSOR 0
-#define MP_BUS 1
-#define MP_IOAPIC 2
-#define MP_INTSRC 3
-#define MP_LINTSRC 4
-#define MP_TRANSLATION 192 /* Used by IBM NUMA-Q to describe node locality */
-
-struct mpc_config_processor
-{
- unsigned char mpc_type;
- unsigned char mpc_apicid; /* Local APIC number */
- unsigned char mpc_apicver; /* Its versions */
- unsigned char mpc_cpuflag;
-#define CPU_ENABLED 1 /* Processor is available */
-#define CPU_BOOTPROCESSOR 2 /* Processor is the BP */
- unsigned long mpc_cpufeature;
-#define CPU_STEPPING_MASK 0x0F
-#define CPU_MODEL_MASK 0xF0
-#define CPU_FAMILY_MASK 0xF00
- unsigned long mpc_featureflag; /* CPUID feature value */
- unsigned long mpc_reserved[2];
-};
-
-struct mpc_config_bus
-{
- unsigned char mpc_type;
- unsigned char mpc_busid;
- unsigned char mpc_bustype[6] __attribute((packed));
-};
-
-/* List of Bus Type string values, Intel MP Spec. */
-#define BUSTYPE_EISA "EISA"
-#define BUSTYPE_ISA "ISA"
-#define BUSTYPE_INTERN "INTERN" /* Internal BUS */
-#define BUSTYPE_MCA "MCA"
-#define BUSTYPE_VL "VL" /* Local bus */
-#define BUSTYPE_PCI "PCI"
-#define BUSTYPE_PCMCIA "PCMCIA"
-#define BUSTYPE_CBUS "CBUS"
-#define BUSTYPE_CBUSII "CBUSII"
-#define BUSTYPE_FUTURE "FUTURE"
-#define BUSTYPE_MBI "MBI"
-#define BUSTYPE_MBII "MBII"
-#define BUSTYPE_MPI "MPI"
-#define BUSTYPE_MPSA "MPSA"
-#define BUSTYPE_NUBUS "NUBUS"
-#define BUSTYPE_TC "TC"
-#define BUSTYPE_VME "VME"
-#define BUSTYPE_XPRESS "XPRESS"
-
-struct mpc_config_ioapic
-{
- unsigned char mpc_type;
- unsigned char mpc_apicid;
- unsigned char mpc_apicver;
- unsigned char mpc_flags;
-#define MPC_APIC_USABLE 0x01
- unsigned long mpc_apicaddr;
-};
-
-struct mpc_config_intsrc
-{
- unsigned char mpc_type;
- unsigned char mpc_irqtype;
- unsigned short mpc_irqflag;
- unsigned char mpc_srcbus;
- unsigned char mpc_srcbusirq;
- unsigned char mpc_dstapic;
- unsigned char mpc_dstirq;
-};
-
-enum mp_irq_source_types {
- mp_INT = 0,
- mp_NMI = 1,
- mp_SMI = 2,
- mp_ExtINT = 3
-};
-
-#define MP_IRQDIR_DEFAULT 0
-#define MP_IRQDIR_HIGH 1
-#define MP_IRQDIR_LOW 3
-
-
-struct mpc_config_lintsrc
-{
- unsigned char mpc_type;
- unsigned char mpc_irqtype;
- unsigned short mpc_irqflag;
- unsigned char mpc_srcbusid;
- unsigned char mpc_srcbusirq;
- unsigned char mpc_destapic;
-#define MP_APIC_ALL 0xFF
- unsigned char mpc_destapiclint;
-};
-
-struct mp_config_oemtable
-{
- char oem_signature[4];
-#define MPC_OEM_SIGNATURE "_OEM"
- unsigned short oem_length; /* Size of table */
- char oem_rev; /* 0x01 */
- char oem_checksum;
- char mpc_oem[8];
-};
-
-struct mpc_config_translation
-{
- unsigned char mpc_type;
- unsigned char trans_len;
- unsigned char trans_type;
- unsigned char trans_quad;
- unsigned char trans_global;
- unsigned char trans_local;
- unsigned short trans_reserved;
-};
-
-/*
- * Default configurations
- *
- * 1 2 CPU ISA 82489DX
- * 2 2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining
- * 3 2 CPU EISA 82489DX
- * 4 2 CPU MCA 82489DX
- * 5 2 CPU ISA+PCI
- * 6 2 CPU EISA+PCI
- * 7 2 CPU MCA+PCI
- */
-
-#ifdef CONFIG_MULTIQUAD
-#define MAX_IRQ_SOURCES 512
-#else /* !CONFIG_MULTIQUAD */
-#define MAX_IRQ_SOURCES 256
-#endif /* CONFIG_MULTIQUAD */
-
-#define MAX_MP_BUSSES 32
-enum mp_bustype {
- MP_BUS_ISA = 1,
- MP_BUS_EISA,
- MP_BUS_PCI,
- MP_BUS_MCA
-};
-extern int *mp_bus_id_to_type;
-extern int *mp_bus_id_to_node;
-extern int *mp_bus_id_to_local;
-extern int *mp_bus_id_to_pci_bus;
-extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
-
-extern unsigned int boot_cpu_physical_apicid;
-/*extern unsigned long phys_cpu_present_map;*/
-extern int smp_found_config;
-extern void find_smp_config (void);
-extern void get_smp_config (void);
-/*extern int nr_ioapics;*/
-extern int apic_version [MAX_APICS];
-/*extern int mp_irq_entries;*/
-/*extern struct mpc_config_intsrc *mp_irqs;*/
-/*extern int mpc_default_type;*/
-extern int mp_current_pci_id;
-extern unsigned long mp_lapic_addr;
-/*extern int pic_mode;*/
-extern int using_apic_timer;
-
-#ifdef CONFIG_ACPI_BOOT
-extern void mp_register_lapic (u8 id, u8 enabled);
-extern void mp_register_lapic_address (u64 address);
-
-#ifdef CONFIG_X86_IO_APIC
-extern void mp_register_ioapic (u8 id, u32 address, u32 irq_base);
-extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 global_irq);
-extern void mp_config_acpi_legacy_irqs (void);
-extern void mp_config_ioapic_for_sci(int irq);
-extern void mp_parse_prt (void);
-#else /*!CONFIG_X86_IO_APIC*/
-static inline void mp_config_ioapic_for_sci(int irq) { }
-#endif /*!CONFIG_X86_IO_APIC*/
-
-#endif /*CONFIG_ACPI_BOOT*/
-
-#endif
-
+++ /dev/null
-#ifndef __ASM_MSR_H
-#define __ASM_MSR_H
-
-/*
- * Access to machine-specific registers (available on 586 and better only)
- * Note: the rd* operations modify the parameters directly (without using
- * pointer indirection), this allows gcc to optimize better
- */
-
-#define rdmsr(msr,val1,val2) \
- __asm__ __volatile__("rdmsr" \
- : "=a" (val1), "=d" (val2) \
- : "c" (msr))
-
-#define wrmsr(msr,val1,val2) \
- __asm__ __volatile__("wrmsr" \
- : /* no outputs */ \
- : "c" (msr), "a" (val1), "d" (val2))
-
-#define rdtsc(low,high) \
- __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
-
-#define rdtscl(low) \
- __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
-
-#define rdtscll(val) \
- __asm__ __volatile__("rdtsc" : "=A" (val))
-
-#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
-
-#define rdpmc(counter,low,high) \
- __asm__ __volatile__("rdpmc" \
- : "=a" (low), "=d" (high) \
- : "c" (counter))
-
-/* symbolic names for some interesting MSRs */
-/* Intel defined MSRs. */
-#define MSR_IA32_P5_MC_ADDR 0
-#define MSR_IA32_P5_MC_TYPE 1
-#define MSR_IA32_PLATFORM_ID 0x17
-#define MSR_IA32_EBL_CR_POWERON 0x2a
-
-#define MSR_IA32_APICBASE 0x1b
-#define MSR_IA32_APICBASE_BSP (1<<8)
-#define MSR_IA32_APICBASE_ENABLE (1<<11)
-#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
-
-#define MSR_IA32_UCODE_WRITE 0x79
-#define MSR_IA32_UCODE_REV 0x8b
-
-#define MSR_IA32_BBL_CR_CTL 0x119
-
-#define MSR_IA32_MCG_CAP 0x179
-#define MSR_IA32_MCG_STATUS 0x17a
-#define MSR_IA32_MCG_CTL 0x17b
-
-#define MSR_IA32_THERM_CONTROL 0x19a
-#define MSR_IA32_THERM_INTERRUPT 0x19b
-#define MSR_IA32_THERM_STATUS 0x19c
-#define MSR_IA32_MISC_ENABLE 0x1a0
-
-#define MSR_IA32_DEBUGCTLMSR 0x1d9
-#define MSR_IA32_LASTBRANCHFROMIP 0x1db
-#define MSR_IA32_LASTBRANCHTOIP 0x1dc
-#define MSR_IA32_LASTINTFROMIP 0x1dd
-#define MSR_IA32_LASTINTTOIP 0x1de
-
-#define MSR_IA32_MC0_CTL 0x400
-#define MSR_IA32_MC0_STATUS 0x401
-#define MSR_IA32_MC0_ADDR 0x402
-#define MSR_IA32_MC0_MISC 0x403
-
-#define MSR_P6_PERFCTR0 0xc1
-#define MSR_P6_PERFCTR1 0xc2
-#define MSR_P6_EVNTSEL0 0x186
-#define MSR_P6_EVNTSEL1 0x187
-
-/* AMD Defined MSRs */
-#define MSR_K6_EFER 0xC0000080
-#define MSR_K6_STAR 0xC0000081
-#define MSR_K6_WHCR 0xC0000082
-#define MSR_K6_UWCCR 0xC0000085
-#define MSR_K6_EPMR 0xC0000086
-#define MSR_K6_PSOR 0xC0000087
-#define MSR_K6_PFIR 0xC0000088
-
-#define MSR_K7_EVNTSEL0 0xC0010000
-#define MSR_K7_PERFCTR0 0xC0010004
-#define MSR_K7_HWCR 0xC0010015
-#define MSR_K7_CLK_CTL 0xC001001b
-#define MSR_K7_FID_VID_CTL 0xC0010041
-#define MSR_K7_VID_STATUS 0xC0010042
-
-/* Centaur-Hauls/IDT defined MSRs. */
-#define MSR_IDT_FCR1 0x107
-#define MSR_IDT_FCR2 0x108
-#define MSR_IDT_FCR3 0x109
-#define MSR_IDT_FCR4 0x10a
-
-#define MSR_IDT_MCR0 0x110
-#define MSR_IDT_MCR1 0x111
-#define MSR_IDT_MCR2 0x112
-#define MSR_IDT_MCR3 0x113
-#define MSR_IDT_MCR4 0x114
-#define MSR_IDT_MCR5 0x115
-#define MSR_IDT_MCR6 0x116
-#define MSR_IDT_MCR7 0x117
-#define MSR_IDT_MCR_CTRL 0x120
-
-/* VIA Cyrix defined MSRs*/
-#define MSR_VIA_FCR 0x1107
-#define MSR_VIA_LONGHAUL 0x110a
-#define MSR_VIA_BCR2 0x1147
-
-/* Transmeta defined MSRs */
-#define MSR_TMTA_LONGRUN_CTRL 0x80868010
-#define MSR_TMTA_LONGRUN_FLAGS 0x80868011
-#define MSR_TMTA_LRTI_READOUT 0x80868018
-#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a
-
-#endif /* __ASM_MSR_H */
+++ /dev/null
-#ifndef _I386_PAGE_H
-#define _I386_PAGE_H
-
-#define BUG() do { \
- printk("BUG at %s:%d\n", __FILE__, __LINE__); \
- __asm__ __volatile__("ud2"); \
-} while (0)
-
-#define L1_PAGETABLE_SHIFT 12
-#define L2_PAGETABLE_SHIFT 22
-
-#define ENTRIES_PER_L1_PAGETABLE 1024
-#define ENTRIES_PER_L2_PAGETABLE 1024
-
-#define PAGE_SHIFT L1_PAGETABLE_SHIFT
-#define PAGE_SIZE (1UL << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
-
-#define clear_page(_p) memset((void *)(_p), 0, PAGE_SIZE)
-#define copy_page(_t,_f) memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
-
-#ifndef __ASSEMBLY__
-#include <xen/config.h>
-typedef struct { unsigned long l1_lo; } l1_pgentry_t;
-typedef struct { unsigned long l2_lo; } l2_pgentry_t;
-typedef l1_pgentry_t *l1_pagetable_t;
-typedef l2_pgentry_t *l2_pagetable_t;
-typedef struct { unsigned long pt_lo; } pagetable_t;
-#endif /* !__ASSEMBLY__ */
-
-/* Strip type from a table entry. */
-#define l1_pgentry_val(_x) ((_x).l1_lo)
-#define l2_pgentry_val(_x) ((_x).l2_lo)
-#define pagetable_val(_x) ((_x).pt_lo)
-
-#define alloc_l1_pagetable() ((l1_pgentry_t *)get_free_page(GFP_KERNEL))
-#define alloc_l2_pagetable() ((l2_pgentry_t *)get_free_page(GFP_KERNEL))
-
-/* Add type to a table entry. */
-#define mk_l1_pgentry(_x) ( (l1_pgentry_t) { (_x) } )
-#define mk_l2_pgentry(_x) ( (l2_pgentry_t) { (_x) } )
-#define mk_pagetable(_x) ( (pagetable_t) { (_x) } )
-
-/* Turn a typed table entry into a page index. */
-#define l1_pgentry_to_pagenr(_x) (l1_pgentry_val(_x) >> PAGE_SHIFT)
-#define l2_pgentry_to_pagenr(_x) (l2_pgentry_val(_x) >> PAGE_SHIFT)
-
-/* Turn a typed table entry into a physical address. */
-#define l1_pgentry_to_phys(_x) (l1_pgentry_val(_x) & PAGE_MASK)
-#define l2_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PAGE_MASK)
-
-/* Dereference a typed level-2 entry to yield a typed level-1 table. */
-#define l2_pgentry_to_l1(_x) \
- ((l1_pgentry_t *)__va(l2_pgentry_val(_x) & PAGE_MASK))
-
-/* Given a virtual address, get an entry offset into a page table. */
-#define l1_table_offset(_a) \
- (((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1))
-#define l2_table_offset(_a) \
- ((_a) >> L2_PAGETABLE_SHIFT)
-
-/* Hypervisor table entries use zero to sugnify 'empty'. */
-#define l1_pgentry_empty(_x) (!l1_pgentry_val(_x))
-#define l2_pgentry_empty(_x) (!l2_pgentry_val(_x))
-
-#define __PAGE_OFFSET (0xFC400000)
-#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
-#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
-#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
-#define page_address(_p) (__va(((_p) - frame_table) << PAGE_SHIFT))
-#define virt_to_page(kaddr) (frame_table + (__pa(kaddr) >> PAGE_SHIFT))
-#define VALID_PAGE(page) ((page - frame_table) < max_mapnr)
-
-/*
- * NB. We don't currently track I/O holes in the physical RAM space.
- * For now we guess that I/O devices will be mapped in the first 1MB
- * (e.g., VGA buffers) or beyond the end of physical RAM.
- */
-#define pfn_is_ram(_pfn) (((_pfn) > 0x100) && ((_pfn) < max_page))
-
-/* High table entries are reserved by the hypervisor. */
-#define DOMAIN_ENTRIES_PER_L2_PAGETABLE \
- (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT)
-#define HYPERVISOR_ENTRIES_PER_L2_PAGETABLE \
- (ENTRIES_PER_L2_PAGETABLE - DOMAIN_ENTRIES_PER_L2_PAGETABLE)
-
-#ifndef __ASSEMBLY__
-#include <asm/processor.h>
-#include <asm/fixmap.h>
-#include <asm/bitops.h>
-#include <asm/flushtlb.h>
-
-#define linear_pg_table ((l1_pgentry_t *)LINEAR_PT_VIRT_START)
-#define linear_l2_table ((l2_pgentry_t *)(LINEAR_PT_VIRT_START+(LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT))))
-
-#define va_to_l1mfn(_va) (l2_pgentry_val(linear_l2_table[_va>>L2_PAGETABLE_SHIFT]) >> PAGE_SHIFT)
-
-extern l2_pgentry_t idle_pg_table[ENTRIES_PER_L2_PAGETABLE];
-extern void paging_init(void);
-
-#define __flush_tlb() \
- do { \
- __asm__ __volatile__ ( \
- "movl %%cr3, %%eax; movl %%eax, %%cr3" \
- : : : "memory", "eax" ); \
- tlb_clocktick(); \
- } while ( 0 )
-
-/* Flush global pages as well. */
-
-#define __pge_off() \
- do { \
- __asm__ __volatile__( \
- "movl %0, %%cr4; # turn off PGE " \
- :: "r" (mmu_cr4_features & ~X86_CR4_PGE)); \
- } while (0)
-
-#define __pge_on() \
- do { \
- __asm__ __volatile__( \
- "movl %0, %%cr4; # turn off PGE " \
- :: "r" (mmu_cr4_features)); \
- } while (0)
-
-
-#define __flush_tlb_pge() \
- do { \
- __pge_off(); \
- __flush_tlb(); \
- __pge_on(); \
- } while (0)
-
-#define __flush_tlb_one(__addr) \
-__asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr)))
-
-#endif /* !__ASSEMBLY__ */
-
-
-#define _PAGE_PRESENT 0x001
-#define _PAGE_RW 0x002
-#define _PAGE_USER 0x004
-#define _PAGE_PWT 0x008
-#define _PAGE_PCD 0x010
-#define _PAGE_ACCESSED 0x020
-#define _PAGE_DIRTY 0x040
-#define _PAGE_PAT 0x080
-#define _PAGE_PSE 0x080
-#define _PAGE_GLOBAL 0x100
-
-#define __PAGE_HYPERVISOR \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
-#define __PAGE_HYPERVISOR_NOCACHE \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED)
-#define __PAGE_HYPERVISOR_RO \
- (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED)
-
-#define MAKE_GLOBAL(_x) ((_x) | _PAGE_GLOBAL)
-
-#define PAGE_HYPERVISOR MAKE_GLOBAL(__PAGE_HYPERVISOR)
-#define PAGE_HYPERVISOR_RO MAKE_GLOBAL(__PAGE_HYPERVISOR_RO)
-#define PAGE_HYPERVISOR_NOCACHE MAKE_GLOBAL(__PAGE_HYPERVISOR_NOCACHE)
-
-#define mk_l2_writeable(_p) \
- (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) | _PAGE_RW))
-#define mk_l2_readonly(_p) \
- (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) & ~_PAGE_RW))
-#define mk_l1_writeable(_p) \
- (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) | _PAGE_RW))
-#define mk_l1_readonly(_p) \
- (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) & ~_PAGE_RW))
-
-
-#ifndef __ASSEMBLY__
-static __inline__ int get_order(unsigned long size)
-{
- int order;
-
- size = (size-1) >> (PAGE_SHIFT-1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
-}
-#endif
-
-#endif /* _I386_PAGE_H */
+++ /dev/null
-#ifndef _ASMi386_PARAM_H
-#define _ASMi386_PARAM_H
-
-#ifndef HZ
-#define HZ 100
-#endif
-
-#define EXEC_PAGESIZE 4096
-
-#ifndef NGROUPS
-#define NGROUPS 32
-#endif
-
-#ifndef NOGROUP
-#define NOGROUP (-1)
-#endif
-
-#define MAXHOSTNAMELEN 64 /* max length of hostname */
-
-#ifdef __KERNEL__
-# define CLOCKS_PER_SEC 100 /* frequency at which times() counts */
-#endif
-
-#endif
+++ /dev/null
-#ifndef __i386_PCI_H
-#define __i386_PCI_H
-
-#include <xen/config.h>
-
-#ifdef __KERNEL__
-
-/* Can be used to override the logic in pci_scan_bus for skipping
- already-configured bus numbers - to be used for buggy BIOSes
- or architectures with incomplete PCI setup by the loader */
-
-#ifdef CONFIG_PCI
-extern unsigned int pcibios_assign_all_busses(void);
-#else
-#define pcibios_assign_all_busses() 0
-#endif
-#define pcibios_scan_all_fns() 0
-
-extern unsigned long pci_mem_start;
-#define PCIBIOS_MIN_IO 0x1000
-#define PCIBIOS_MIN_MEM (pci_mem_start)
-
-void pcibios_config_init(void);
-struct pci_bus * pcibios_scan_root(int bus);
-extern int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value);
-extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value);
-
-void pcibios_set_master(struct pci_dev *dev);
-void pcibios_penalize_isa_irq(int irq);
-struct irq_routing_table *pcibios_get_irq_routing_table(void);
-int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
-
-/* Dynamic DMA mapping stuff.
- * i386 has everything mapped statically.
- */
-
-#include <xen/types.h>
-#include <xen/slab.h>
-#include <asm/scatterlist.h>
-/*#include <xen/string.h>*/
-#include <asm/io.h>
-
-struct pci_dev;
-
-/* The PCI address space does equal the physical memory
- * address space. The networking and block device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS (1)
-
-/* Allocate and map kernel buffer using consistent mode DMA for a device.
- * hwdev should be valid struct pci_dev pointer for PCI devices,
- * NULL for PCI-like buses (ISA, EISA).
- * Returns non-NULL cpu-view pointer to the buffer if successful and
- * sets *dma_addrp to the pci side dma address as well, else *dma_addrp
- * is undefined.
- */
-extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
- dma_addr_t *dma_handle);
-
-/* Free and unmap a consistent DMA buffer.
- * cpu_addr is what was returned from pci_alloc_consistent,
- * size must be the same as what as passed into pci_alloc_consistent,
- * and likewise dma_addr must be the same as what *dma_addrp was set to.
- *
- * References to the memory and mappings associated with cpu_addr/dma_addr
- * past this call are illegal.
- */
-extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
- void *vaddr, dma_addr_t dma_handle);
-
-/* Map a single buffer of the indicated size for DMA in streaming mode.
- * The 32-bit bus address to use is returned.
- *
- * Once the device is given the dma address, the device owns this memory
- * until either pci_unmap_single or pci_dma_sync_single is performed.
- */
-static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
- size_t size, int direction)
-{
- if (direction == PCI_DMA_NONE)
- out_of_line_bug();
- flush_write_buffers();
- return virt_to_bus(ptr);
-}
-
-/* Unmap a single streaming mode DMA translation. The dma_addr and size
- * must match what was provided for in a previous pci_map_single call. All
- * other usages are undefined.
- *
- * After this call, reads by the cpu to the buffer are guarenteed to see
- * whatever the device wrote there.
- */
-static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
- size_t size, int direction)
-{
- if (direction == PCI_DMA_NONE)
- out_of_line_bug();
- /* Nothing to do */
-}
-
-/*
- * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical
- * to pci_map_single, but takes a struct pfn_info instead of a virtual address
- */
-static inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct pfn_info *page,
- unsigned long offset, size_t size, int direction)
-{
- if (direction == PCI_DMA_NONE)
- out_of_line_bug();
-
- return (dma_addr_t)(page - frame_table) * PAGE_SIZE + offset;
-}
-
-static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address,
- size_t size, int direction)
-{
- if (direction == PCI_DMA_NONE)
- out_of_line_bug();
- /* Nothing to do */
-}
-
-/* pci_unmap_{page,single} is a nop so... */
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
-#define pci_unmap_addr(PTR, ADDR_NAME) (0)
-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0)
-#define pci_unmap_len(PTR, LEN_NAME) (0)
-#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0)
-
-/* Map a set of buffers described by scatterlist in streaming
- * mode for DMA. This is the scather-gather version of the
- * above pci_map_single interface. Here the scatter gather list
- * elements are each tagged with the appropriate dma address
- * and length. They are obtained via sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- * DMA address/length pairs than there are SG table elements.
- * (for example via virtual mapping capabilities)
- * The routine returns the number of addr/length pairs actually
- * used, at most nents.
- *
- * Device ownership issues as mentioned above for pci_map_single are
- * the same here.
- */
-static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
- int nents, int direction)
-{
- int i;
-
- if (direction == PCI_DMA_NONE)
- out_of_line_bug();
-
- /*
- * temporary 2.4 hack
- */
- for (i = 0; i < nents; i++ ) {
- if (sg[i].address && sg[i].page)
- out_of_line_bug();
-#if 0
- /* Invalid check, since address==0 is valid. */
- else if (!sg[i].address && !sg[i].page)
- out_of_line_bug();
-#endif
-
- /* XXX Switched round, since address==0 is valid. */
- if (sg[i].page)
- sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset;
- else
- sg[i].dma_address = virt_to_bus(sg[i].address);
- }
-
- flush_write_buffers();
- return nents;
-}
-
-/* Unmap a set of streaming mode DMA translations.
- * Again, cpu read rules concerning calls here are the same as for
- * pci_unmap_single() above.
- */
-static inline void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
- int nents, int direction)
-{
- if (direction == PCI_DMA_NONE)
- out_of_line_bug();
- /* Nothing to do */
-}
-
-/* Make physical memory consistent for a single
- * streaming mode DMA translation after a transfer.
- *
- * If you perform a pci_map_single() but wish to interrogate the
- * buffer using the cpu, yet do not wish to teardown the PCI dma
- * mapping, you must call this function before doing so. At the
- * next point you give the PCI dma address back to the card, the
- * device again owns the buffer.
- */
-static inline void pci_dma_sync_single(struct pci_dev *hwdev,
- dma_addr_t dma_handle,
- size_t size, int direction)
-{
- if (direction == PCI_DMA_NONE)
- out_of_line_bug();
- flush_write_buffers();
-}
-
-/* Make physical memory consistent for a set of streaming
- * mode DMA translations after a transfer.
- *
- * The same as pci_dma_sync_single but for a scatter-gather list,
- * same rules and usage.
- */
-static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
- struct scatterlist *sg,
- int nelems, int direction)
-{
- if (direction == PCI_DMA_NONE)
- out_of_line_bug();
- flush_write_buffers();
-}
-
-/* Return whether the given PCI device DMA address mask can
- * be supported properly. For example, if your device can
- * only drive the low 24-bits during PCI bus mastering, then
- * you would pass 0x00ffffff as the mask to this function.
- */
-static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
-{
- /*
- * we fall back to GFP_DMA when the mask isn't all 1s,
- * so we can't guarantee allocations that must be
- * within a tighter range than GFP_DMA..
- */
- if(mask < 0x00ffffff)
- return 0;
-
- return 1;
-}
-
-/* This is always fine. */
-#define pci_dac_dma_supported(pci_dev, mask) (1)
-
-static __inline__ dma64_addr_t
-pci_dac_page_to_dma(struct pci_dev *pdev, struct pfn_info *page, unsigned long offset, int direction)
-{
- return ((dma64_addr_t) page_to_bus(page) +
- (dma64_addr_t) offset);
-}
-
-static __inline__ struct pfn_info *
-pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
-{
- unsigned long poff = (dma_addr >> PAGE_SHIFT);
-
- return frame_table + poff;
-}
-
-static __inline__ unsigned long
-pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr)
-{
- return (dma_addr & ~PAGE_MASK);
-}
-
-static __inline__ void
-pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
-{
- flush_write_buffers();
-}
-
-/* These macros should be used after a pci_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns.
- */
-#define sg_dma_address(sg) ((sg)->dma_address)
-#define sg_dma_len(sg) ((sg)->length)
-
-/* Return the index of the PCI controller for device. */
-static inline int pci_controller_num(struct pci_dev *dev)
-{
- return 0;
-}
-
-#if 0 /* XXX Not in land of Xen XXX */
-#define HAVE_PCI_MMAP
-extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
- enum pci_mmap_state mmap_state, int write_combine);
-#endif
-
-#endif /* __KERNEL__ */
-
-#endif /* __i386_PCI_H */
+++ /dev/null
-
-/*
- * pervasive debugger
- * www.cl.cam.ac.uk/netos/pdb
- *
- * alex ho
- * 2004
- * university of cambridge computer laboratory
- */
-
-
-#ifndef __PDB_H__
-#define __PDB_H__
-
-#include <asm/ptrace.h>
-#include <xen/list.h>
-#include <hypervisor-ifs/dom0_ops.h>
-#include <hypervisor-ifs/hypervisor-if.h> /* for domain id */
-
-extern int pdb_initialized;
-extern int pdb_com_port;
-extern int pdb_high_bit;
-extern int pdb_page_fault_possible;
-extern int pdb_page_fault_scratch;
-extern int pdb_page_fault;
-
-extern void initialize_pdb(void);
-
-/* Get/set values from generic debug interface. */
-extern int pdb_set_values(u_char *buffer, int length,
- unsigned long cr3, unsigned long addr);
-extern int pdb_get_values(u_char *buffer, int length,
- unsigned long cr3, unsigned long addr);
-
-/* External entry points. */
-extern int pdb_handle_exception(int exceptionVector,
- struct pt_regs *xen_regs);
-extern int pdb_serial_input(u_char c, struct pt_regs *regs);
-extern void pdb_do_debug(dom0_op_t *op);
-
-/* PDB Context. */
-struct pdb_context
-{
- int valid;
- int domain;
- int process;
- int system_call; /* 0x01 break on enter, 0x02 break on exit */
- unsigned long ptbr;
-};
-extern struct pdb_context pdb_ctx;
-
-/* Breakpoints. */
-struct pdb_breakpoint
-{
- struct list_head list;
- unsigned long address;
- unsigned long cr3;
- domid_t domain;
-};
-extern void pdb_bkpt_add (unsigned long cr3, unsigned long address);
-extern struct pdb_breakpoint* pdb_bkpt_search (unsigned long cr3,
- unsigned long address);
-extern int pdb_bkpt_remove (unsigned long cr3, unsigned long address);
-
-/* Conversions. */
-extern int hex (char);
-extern char *mem2hex (char *, char *, int);
-extern char *hex2mem (char *, char *, int);
-extern int hexToInt (char **ptr, int *intValue);
-
-/* Temporary Linux specific definitions */
-extern int pdb_system_call;
-extern unsigned char pdb_system_call_enter_instr; /* original enter instr */
-extern unsigned char pdb_system_call_leave_instr; /* original next instr */
-extern unsigned long pdb_system_call_next_addr; /* instr after int 0x80 */
-extern unsigned long pdb_system_call_eflags_addr; /* saved eflags on stack */
-
-unsigned long pdb_linux_pid_ptbr (unsigned long cr3, int pid);
-void pdb_linux_get_values(char *buffer, int length, unsigned long address,
- int pid, unsigned long cr3);
-void pdb_linux_set_values(char *buffer, int length, unsigned long address,
- int pid, unsigned long cr3);
-void pdb_linux_syscall_enter_bkpt (struct pt_regs *regs, long error_code,
- trap_info_t *ti);
-void pdb_linux_syscall_exit_bkpt (struct pt_regs *regs,
- struct pdb_context *pdb_ctx);
-
-#endif /* __PDB_H__ */
+++ /dev/null
-#ifndef _I386_PGALLOC_H
-#define _I386_PGALLOC_H
-
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <asm/processor.h>
-#include <asm/fixmap.h>
-
-#define pgd_quicklist (current_cpu_data.pgd_quick)
-#define pmd_quicklist (current_cpu_data.pmd_quick)
-#define pte_quicklist (current_cpu_data.pte_quick)
-#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz)
-
-
-/*
- * Allocate and free page tables.
- */
-
-
-#define pte_free(pte) pte_free_fast(pte)
-#define pgd_alloc(mm) get_pgd_fast()
-#define pgd_free(pgd) free_pgd_fast(pgd)
-
-/*
- * allocating and freeing a pmd is trivial: the 1-entry pmd is
- * inside the pgd, so has no extra memory associated with it.
- * (In the PAE case we free the pmds as part of the pgd.)
- */
-
-#define pmd_alloc_one_fast(mm, addr) ({ BUG(); ((pmd_t *)1); })
-#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); })
-#define pmd_free_slow(x) do { } while (0)
-#define pmd_free_fast(x) do { } while (0)
-#define pmd_free(x) do { } while (0)
-#define pgd_populate(mm, pmd, pte) BUG()
-
-/*
- * TLB flushing:
- *
- * - flush_tlb() flushes the current mm struct TLBs
- * - flush_tlb_all() flushes all processes TLBs
- * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
- *
- * ..but the i386 has somewhat limited tlb flushing capabilities,
- * and page-granular flushes are available only on i486 and up.
- */
-
-#ifndef CONFIG_SMP
-
-#define flush_tlb() __flush_tlb()
-#define flush_tlb_all() __flush_tlb()
-#define flush_tlb_all_pge() __flush_tlb_pge()
-#define local_flush_tlb() __flush_tlb()
-#define flush_tlb_cpu(_cpu) __flush_tlb()
-#define flush_tlb_mask(_mask) __flush_tlb()
-#define try_flush_tlb_mask(_mask) __flush_tlb()
-
-#else
-
-#include <xen/smp.h>
-
-extern int try_flush_tlb_mask(unsigned long mask);
-extern void flush_tlb_mask(unsigned long mask);
-extern void flush_tlb_all_pge(void);
-
-#define flush_tlb() __flush_tlb()
-#define flush_tlb_all() flush_tlb_mask((1 << smp_num_cpus) - 1)
-#define local_flush_tlb() __flush_tlb()
-#define flush_tlb_cpu(_cpu) flush_tlb_mask(1 << (_cpu))
-
-#endif
-
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- /* i386 does not keep any page table caches in TLB */
-}
-
-#endif /* _I386_PGALLOC_H */
+++ /dev/null
-/*
- * include/asm-i386/processor.h
- *
- * Copyright (C) 1994 Linus Torvalds
- */
-
-#ifndef __ASM_I386_PROCESSOR_H
-#define __ASM_I386_PROCESSOR_H
-
-#include <asm/page.h>
-#include <asm/types.h>
-#include <asm/cpufeature.h>
-#include <asm/desc.h>
-#include <asm/flushtlb.h>
-#include <asm/pdb.h>
-#include <xen/config.h>
-#include <xen/spinlock.h>
-#include <hypervisor-ifs/hypervisor-if.h>
-
-struct task_struct;
-
-/*
- * Default implementation of macro that returns current
- * instruction pointer ("program counter").
- */
-#define current_text_addr() \
- ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; })
-
-/*
- * CPU type and hardware bug flags. Kept separately for each CPU.
- * Members of this structure are referenced in head.S, so think twice
- * before touching them. [mj]
- */
-
-struct cpuinfo_x86 {
- __u8 x86; /* CPU family */
- __u8 x86_vendor; /* CPU vendor */
- __u8 x86_model;
- __u8 x86_mask;
- char wp_works_ok; /* It doesn't on 386's */
- char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */
- char hard_math;
- char rfu;
- int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */
- __u32 x86_capability[NCAPINTS];
- char x86_vendor_id[16];
- char x86_model_id[64];
- int x86_cache_size; /* in KB - valid for CPUS which support this
- call */
- int fdiv_bug;
- int f00f_bug;
- int coma_bug;
- unsigned long loops_per_jiffy;
- unsigned long *pgd_quick;
- unsigned long *pmd_quick;
- unsigned long *pte_quick;
- unsigned long pgtable_cache_sz;
-} __attribute__((__aligned__(SMP_CACHE_BYTES)));
-
-#define X86_VENDOR_INTEL 0
-#define X86_VENDOR_CYRIX 1
-#define X86_VENDOR_AMD 2
-#define X86_VENDOR_UMC 3
-#define X86_VENDOR_NEXGEN 4
-#define X86_VENDOR_CENTAUR 5
-#define X86_VENDOR_RISE 6
-#define X86_VENDOR_TRANSMETA 7
-#define X86_VENDOR_NSC 8
-#define X86_VENDOR_SIS 9
-#define X86_VENDOR_UNKNOWN 0xff
-
-/*
- * capabilities of CPUs
- */
-
-extern struct cpuinfo_x86 boot_cpu_data;
-extern struct tss_struct init_tss[NR_CPUS];
-
-#ifdef CONFIG_SMP
-extern struct cpuinfo_x86 cpu_data[];
-#define current_cpu_data cpu_data[smp_processor_id()]
-#else
-#define cpu_data (&boot_cpu_data)
-#define current_cpu_data boot_cpu_data
-#endif
-
-extern char ignore_irq13;
-
-extern void identify_cpu(struct cpuinfo_x86 *);
-extern void print_cpu_info(struct cpuinfo_x86 *);
-extern void dodgy_tsc(void);
-
-/*
- * EFLAGS bits
- */
-#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
-#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
-#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */
-#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
-#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
-#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
-#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
-#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
-#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
-#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
-#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
-#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
-#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
-#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
-#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
-#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
-#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
-
-/*
- * Generic CPUID function
- */
-static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
-{
- __asm__("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (op));
-}
-
-/*
- * CPUID functions returning a single datum
- */
-static inline unsigned int cpuid_eax(unsigned int op)
-{
- unsigned int eax;
-
- __asm__("cpuid"
- : "=a" (eax)
- : "0" (op)
- : "bx", "cx", "dx");
- return eax;
-}
-static inline unsigned int cpuid_ebx(unsigned int op)
-{
- unsigned int eax, ebx;
-
- __asm__("cpuid"
- : "=a" (eax), "=b" (ebx)
- : "0" (op)
- : "cx", "dx" );
- return ebx;
-}
-static inline unsigned int cpuid_ecx(unsigned int op)
-{
- unsigned int eax, ecx;
-
- __asm__("cpuid"
- : "=a" (eax), "=c" (ecx)
- : "0" (op)
- : "bx", "dx" );
- return ecx;
-}
-static inline unsigned int cpuid_edx(unsigned int op)
-{
- unsigned int eax, edx;
-
- __asm__("cpuid"
- : "=a" (eax), "=d" (edx)
- : "0" (op)
- : "bx", "cx");
- return edx;
-}
-
-
-/*
- * Intel CPU flags in CR0
- */
-#define X86_CR0_PE 0x00000001 /* Enable Protected Mode (RW) */
-#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor (RW) */
-#define X86_CR0_EM 0x00000004 /* Require FPU Emulation (RO) */
-#define X86_CR0_TS 0x00000008 /* Task Switched (RW) */
-#define X86_CR0_NE 0x00000020 /* Numeric Error Reporting (RW) */
-#define X86_CR0_WP 0x00010000 /* Supervisor Write Protect (RW) */
-#define X86_CR0_AM 0x00040000 /* Alignment Checking (RW) */
-#define X86_CR0_NW 0x20000000 /* Not Write-Through (RW) */
-#define X86_CR0_CD 0x40000000 /* Cache Disable (RW) */
-#define X86_CR0_PG 0x80000000 /* Paging (RW) */
-
-#define read_cr0() ({ \
- unsigned int __dummy; \
- __asm__( \
- "movl %%cr0,%0\n\t" \
- :"=r" (__dummy)); \
- __dummy; \
-})
-
-#define write_cr0(x) \
- __asm__("movl %0,%%cr0": :"r" (x));
-
-
-/*
- * Intel CPU features in CR4
- */
-#define X86_CR4_VME 0x0001 /* enable vm86 extensions */
-#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
-#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
-#define X86_CR4_DE 0x0008 /* enable debugging extensions */
-#define X86_CR4_PSE 0x0010 /* enable page size extensions */
-#define X86_CR4_PAE 0x0020 /* enable physical address extensions */
-#define X86_CR4_MCE 0x0040 /* Machine check enable */
-#define X86_CR4_PGE 0x0080 /* enable global pages */
-#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
-#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */
-#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */
-
-/*
- * Save the cr4 feature set we're using (ie
- * Pentium 4MB enable and PPro Global page
- * enable), so that any CPU's that boot up
- * after us can get the correct flags.
- */
-extern unsigned long mmu_cr4_features;
-
-static inline void set_in_cr4 (unsigned long mask)
-{
- mmu_cr4_features |= mask;
- __asm__("movl %%cr4,%%eax\n\t"
- "orl %0,%%eax\n\t"
- "movl %%eax,%%cr4\n"
- : : "irg" (mask)
- :"ax");
-}
-
-static inline void clear_in_cr4 (unsigned long mask)
-{
- mmu_cr4_features &= ~mask;
- __asm__("movl %%cr4,%%eax\n\t"
- "andl %0,%%eax\n\t"
- "movl %%eax,%%cr4\n"
- : : "irg" (~mask)
- :"ax");
-}
-
-
-
-/*
- * Cyrix CPU configuration register indexes
- */
-#define CX86_CCR0 0xc0
-#define CX86_CCR1 0xc1
-#define CX86_CCR2 0xc2
-#define CX86_CCR3 0xc3
-#define CX86_CCR4 0xe8
-#define CX86_CCR5 0xe9
-#define CX86_CCR6 0xea
-#define CX86_CCR7 0xeb
-#define CX86_DIR0 0xfe
-#define CX86_DIR1 0xff
-#define CX86_ARR_BASE 0xc4
-#define CX86_RCR_BASE 0xdc
-
-/*
- * Cyrix CPU indexed register access macros
- */
-
-#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); })
-
-#define setCx86(reg, data) do { \
- outb((reg), 0x22); \
- outb((data), 0x23); \
-} while (0)
-
-#define EISA_bus (0)
-#define MCA_bus (0)
-
-/* from system description table in BIOS. Mostly for MCA use, but
-others may find it useful. */
-extern unsigned int machine_id;
-extern unsigned int machine_submodel_id;
-extern unsigned int BIOS_revision;
-extern unsigned int mca_pentium_flag;
-
-/*
- * User space process size: 3GB (default).
- */
-#define TASK_SIZE (PAGE_OFFSET)
-
-/* This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
- */
-#define TASK_UNMAPPED_BASE (TASK_SIZE / 3)
-
-/*
- * Size of io_bitmap in longwords:
- * For Xen we support the full 8kbyte IO bitmap but use the io_bitmap_sel field
- * of the task_struct to avoid a full 8kbyte copy when switching to / from
- * domains with bits cleared.
- */
-#define IO_BITMAP_SIZE 2048
-#define IO_BITMAP_BYTES (IO_BITMAP_SIZE * 4)
-#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
-#define INVALID_IO_BITMAP_OFFSET 0x8000
-
-struct i387_fsave_struct {
- long cwd;
- long swd;
- long twd;
- long fip;
- long fcs;
- long foo;
- long fos;
- long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */
- long status; /* software status information */
-};
-
-struct i387_fxsave_struct {
- unsigned short cwd;
- unsigned short swd;
- unsigned short twd;
- unsigned short fop;
- long fip;
- long fcs;
- long foo;
- long fos;
- long mxcsr;
- long reserved;
- long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
- long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */
- long padding[56];
-} __attribute__ ((aligned (16)));
-
-struct i387_soft_struct {
- long cwd;
- long swd;
- long twd;
- long fip;
- long fcs;
- long foo;
- long fos;
- long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */
- unsigned char ftop, changed, lookahead, no_update, rm, alimit;
- struct info *info;
- unsigned long entry_eip;
-};
-
-union i387_union {
- struct i387_fsave_struct fsave;
- struct i387_fxsave_struct fxsave;
- struct i387_soft_struct soft;
-};
-
-typedef struct {
- unsigned long seg;
-} mm_segment_t;
-
-struct tss_struct {
- unsigned short back_link,__blh;
- unsigned long esp0;
- unsigned short ss0,__ss0h;
- unsigned long esp1;
- unsigned short ss1,__ss1h;
- unsigned long esp2;
- unsigned short ss2,__ss2h;
- unsigned long __cr3;
- unsigned long eip;
- unsigned long eflags;
- unsigned long eax,ecx,edx,ebx;
- unsigned long esp;
- unsigned long ebp;
- unsigned long esi;
- unsigned long edi;
- unsigned short es, __esh;
- unsigned short cs, __csh;
- unsigned short ss, __ssh;
- unsigned short ds, __dsh;
- unsigned short fs, __fsh;
- unsigned short gs, __gsh;
- unsigned short ldt, __ldth;
- unsigned short trace, bitmap;
- unsigned long io_bitmap[IO_BITMAP_SIZE+1];
- /*
- * pads the TSS to be cacheline-aligned (total size is 0x2080)
- */
- unsigned long __cacheline_filler[5];
-};
-
-struct thread_struct {
- unsigned long guestos_sp, guestos_ss;
-/* Hardware debugging registers */
- unsigned long debugreg[8]; /* %%db0-7 debug registers */
-/* floating point info */
- union i387_union i387;
-/* Trap info. */
- int fast_trap_idx;
- struct desc_struct fast_trap_desc;
- trap_info_t traps[256];
-};
-
-#define IDT_ENTRIES 256
-extern struct desc_struct idt_table[];
-extern struct desc_struct *idt_tables[];
-
-#define SET_DEFAULT_FAST_TRAP(_p) \
- (_p)->fast_trap_idx = 0x20; \
- (_p)->fast_trap_desc.a = 0; \
- (_p)->fast_trap_desc.b = 0;
-
-#define CLEAR_FAST_TRAP(_p) \
- (memset(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \
- 0, 8))
-
-#ifdef XEN_DEBUGGER
-#define SET_FAST_TRAP(_p) \
- (pdb_initialized ? (void *) 0 : \
- (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \
- &((_p)->fast_trap_desc), 8)))
-#else
-#define SET_FAST_TRAP(_p) \
- (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \
- &((_p)->fast_trap_desc), 8))
-#endif
-
-long set_fast_trap(struct task_struct *p, int idx);
-
-#define INIT_THREAD { \
- 0, 0, \
- { [0 ... 7] = 0 }, /* debugging registers */ \
- { { 0, }, }, /* 387 state */ \
- 0x20, { 0, 0 }, /* DEFAULT_FAST_TRAP */ \
- { {0} } /* io permissions */ \
-}
-
-#define INIT_TSS { \
- 0,0, /* back_link, __blh */ \
- 0, /* esp0 */ \
- 0, 0, /* ss0 */ \
- 0,0,0,0,0,0, /* stack1, stack2 */ \
- 0, /* cr3 */ \
- 0,0, /* eip,eflags */ \
- 0,0,0,0, /* eax,ecx,edx,ebx */ \
- 0,0,0,0, /* esp,ebp,esi,edi */ \
- 0,0,0,0,0,0, /* es,cs,ss */ \
- 0,0,0,0,0,0, /* ds,fs,gs */ \
- 0,0, /* ldt */ \
- 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \
- { [0 ... IO_BITMAP_SIZE] = ~0UL }, /* ioperm */ \
-}
-
-struct mm_struct {
- /*
- * Every domain has a L1 pagetable of its own. Per-domain mappings
- * are put in this table (eg. the current GDT is mapped here).
- */
- l1_pgentry_t *perdomain_pt;
- pagetable_t pagetable;
-
- /* shadow mode status and controls */
- unsigned int shadow_mode; /* flags to control shadow table operation */
- pagetable_t shadow_table;
- spinlock_t shadow_lock;
- unsigned int shadow_max_page_count; // currently unused
-
- /* shadow hashtable */
- struct shadow_status *shadow_ht;
- struct shadow_status *shadow_ht_free;
- struct shadow_status *shadow_ht_extras; /* extra allocation units */
- unsigned int shadow_extras_count;
-
- /* shadow dirty bitmap */
- unsigned long *shadow_dirty_bitmap;
- unsigned int shadow_dirty_bitmap_size; /* in pages, bit per page */
-
- /* shadow mode stats */
- unsigned int shadow_page_count;
- unsigned int shadow_fault_count;
- unsigned int shadow_dirty_count;
-
-
- /* Current LDT details. */
- unsigned long ldt_base, ldt_ents, shadow_ldt_mapcnt;
- /* Next entry is passed to LGDT on domain switch. */
- char gdt[6];
-};
-
-static inline void write_ptbase(struct mm_struct *mm)
-{
- unsigned long pa;
-
- if ( unlikely(mm->shadow_mode) )
- pa = pagetable_val(mm->shadow_table);
- else
- pa = pagetable_val(mm->pagetable);
-
- __asm__ __volatile__ ( "movl %0, %%cr3" : : "r" (pa) : "memory" );
-}
-
-#define IDLE0_MM \
-{ \
- perdomain_pt: 0, \
- pagetable: mk_pagetable(__pa(idle_pg_table)) \
-}
-
-/* Convenient accessor for mm.gdt. */
-#define SET_GDT_ENTRIES(_p, _e) ((*(u16 *)((_p)->mm.gdt + 0)) = (_e))
-#define SET_GDT_ADDRESS(_p, _a) ((*(u32 *)((_p)->mm.gdt + 2)) = (_a))
-#define GET_GDT_ENTRIES(_p) ((*(u16 *)((_p)->mm.gdt + 0)))
-#define GET_GDT_ADDRESS(_p) ((*(u32 *)((_p)->mm.gdt + 2)))
-
-long set_gdt(struct task_struct *p,
- unsigned long *frames,
- unsigned int entries);
-
-long set_debugreg(struct task_struct *p, int reg, unsigned long value);
-
-struct microcode {
- unsigned int hdrver;
- unsigned int rev;
- unsigned int date;
- unsigned int sig;
- unsigned int cksum;
- unsigned int ldrver;
- unsigned int pf;
- unsigned int reserved[5];
- unsigned int bits[500];
-};
-
-/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */
-#define MICROCODE_IOCFREE _IO('6',0)
-
-/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static inline void rep_nop(void)
-{
- __asm__ __volatile__("rep;nop");
-}
-
-#define cpu_relax() rep_nop()
-
-/* Prefetch instructions for Pentium III and AMD Athlon */
-#ifdef CONFIG_MPENTIUMIII
-
-#define ARCH_HAS_PREFETCH
-extern inline void prefetch(const void *x)
-{
- __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x));
-}
-
-#elif CONFIG_X86_USE_3DNOW
-
-#define ARCH_HAS_PREFETCH
-#define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
-
-extern inline void prefetch(const void *x)
-{
- __asm__ __volatile__ ("prefetch (%0)" : : "r"(x));
-}
-
-extern inline void prefetchw(const void *x)
-{
- __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x));
-}
-#define spin_lock_prefetch(x) prefetchw(x)
-
-#endif
-
-#endif /* __ASM_I386_PROCESSOR_H */
+++ /dev/null
-#ifndef _I386_PTRACE_H
-#define _I386_PTRACE_H
-
-struct pt_regs {
- long ebx;
- long ecx;
- long edx;
- long esi;
- long edi;
- long ebp;
- long eax;
- int xds;
- int xes;
- int xfs;
- int xgs;
- long orig_eax;
- long eip;
- int xcs;
- long eflags;
- long esp;
- int xss;
-};
-
-enum EFLAGS {
- EF_CF = 0x00000001,
- EF_PF = 0x00000004,
- EF_AF = 0x00000010,
- EF_ZF = 0x00000040,
- EF_SF = 0x00000080,
- EF_TF = 0x00000100,
- EF_IE = 0x00000200,
- EF_DF = 0x00000400,
- EF_OF = 0x00000800,
- EF_IOPL = 0x00003000,
- EF_IOPL_RING0 = 0x00000000,
- EF_IOPL_RING1 = 0x00001000,
- EF_IOPL_RING2 = 0x00002000,
- EF_NT = 0x00004000, /* nested task */
- EF_RF = 0x00010000, /* resume */
- EF_VM = 0x00020000, /* virtual mode */
- EF_AC = 0x00040000, /* alignment */
- EF_VIF = 0x00080000, /* virtual interrupt */
- EF_VIP = 0x00100000, /* virtual interrupt pending */
- EF_ID = 0x00200000, /* id */
-};
-
-#ifdef __KERNEL__
-#define user_mode(regs) ((3 & (regs)->xcs))
-#endif
-
-#endif
+++ /dev/null
-/* include/asm-i386/rwlock.h
- *
- * Helpers used by both rw spinlocks and rw semaphores.
- *
- * Based in part on code from semaphore.h and
- * spinlock.h Copyright 1996 Linus Torvalds.
- *
- * Copyright 1999 Red Hat, Inc.
- *
- * Written by Benjamin LaHaise.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#ifndef _ASM_I386_RWLOCK_H
-#define _ASM_I386_RWLOCK_H
-
-#define RW_LOCK_BIAS 0x01000000
-#define RW_LOCK_BIAS_STR "0x01000000"
-
-#define __build_read_lock_ptr(rw, helper) \
- asm volatile(LOCK "subl $1,(%0)\n\t" \
- "js 2f\n" \
- "1:\n" \
- ".section .text.lock,\"ax\"\n" \
- "2:\tcall " helper "\n\t" \
- "jmp 1b\n" \
- ".previous" \
- ::"a" (rw) : "memory")
-
-#define __build_read_lock_const(rw, helper) \
- asm volatile(LOCK "subl $1,%0\n\t" \
- "js 2f\n" \
- "1:\n" \
- ".section .text.lock,\"ax\"\n" \
- "2:\tpushl %%eax\n\t" \
- "leal %0,%%eax\n\t" \
- "call " helper "\n\t" \
- "popl %%eax\n\t" \
- "jmp 1b\n" \
- ".previous" \
- :"=m" (*(volatile int *)rw) : : "memory")
-
-#define __build_read_lock(rw, helper) do { \
- if (__builtin_constant_p(rw)) \
- __build_read_lock_const(rw, helper); \
- else \
- __build_read_lock_ptr(rw, helper); \
- } while (0)
-
-#define __build_write_lock_ptr(rw, helper) \
- asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
- "jnz 2f\n" \
- "1:\n" \
- ".section .text.lock,\"ax\"\n" \
- "2:\tcall " helper "\n\t" \
- "jmp 1b\n" \
- ".previous" \
- ::"a" (rw) : "memory")
-
-#define __build_write_lock_const(rw, helper) \
- asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
- "jnz 2f\n" \
- "1:\n" \
- ".section .text.lock,\"ax\"\n" \
- "2:\tpushl %%eax\n\t" \
- "leal %0,%%eax\n\t" \
- "call " helper "\n\t" \
- "popl %%eax\n\t" \
- "jmp 1b\n" \
- ".previous" \
- :"=m" (*(volatile int *)rw) : : "memory")
-
-#define __build_write_lock(rw, helper) do { \
- if (__builtin_constant_p(rw)) \
- __build_write_lock_const(rw, helper); \
- else \
- __build_write_lock_ptr(rw, helper); \
- } while (0)
-
-#endif
+++ /dev/null
-#ifndef _I386_SCATTERLIST_H
-#define _I386_SCATTERLIST_H
-
-struct scatterlist {
- char * address; /* Location data is to be transferred to, NULL for
- * highmem page */
- struct pfn_info * page; /* Location for highmem page, if any */
- unsigned int offset;/* for highmem, page offset */
-
- dma_addr_t dma_address;
- unsigned int length;
-};
-
-#define ISA_DMA_THRESHOLD (0x00ffffff)
-
-#endif /* !(_I386_SCATTERLIST_H) */
+++ /dev/null
-#ifndef __ASM_SMP_H
-#define __ASM_SMP_H
-
-/*
- * We need the APIC definitions automatically as part of 'smp.h'
- */
-#ifndef __ASSEMBLY__
-#include <xen/config.h>
-/*#include <xen/threads.h>*/
-#include <asm/ptrace.h>
-#endif
-
-#ifdef CONFIG_X86_LOCAL_APIC
-#ifndef __ASSEMBLY__
-#include <asm/fixmap.h>
-#include <asm/bitops.h>
-#include <asm/mpspec.h>
-#ifdef CONFIG_X86_IO_APIC
-#include <asm/io_apic.h>
-#endif
-#include <asm/apic.h>
-#endif
-#endif
-
-#ifdef CONFIG_SMP
-#ifndef __ASSEMBLY__
-
-/*
- * Private routines/data
- */
-
-extern void smp_alloc_memory(void);
-extern unsigned long phys_cpu_present_map;
-extern unsigned long cpu_online_map;
-extern volatile unsigned long smp_invalidate_needed;
-extern int pic_mode;
-extern int smp_num_siblings;
-extern int cpu_sibling_map[];
-
-extern void smp_flush_tlb(void);
-extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
-extern void smp_send_reschedule(int cpu);
-extern void smp_invalidate_rcv(void); /* Process an NMI */
-extern void (*mtrr_hook) (void);
-extern void zap_low_mappings (void);
-
-/*
- * On x86 all CPUs are mapped 1:1 to the APIC space.
- * This simplifies scheduling and IPI sending and
- * compresses data structures.
- */
-static inline int cpu_logical_map(int cpu)
-{
- return cpu;
-}
-static inline int cpu_number_map(int cpu)
-{
- return cpu;
-}
-
-/*
- * Some lowlevel functions might want to know about
- * the real APIC ID <-> CPU # mapping.
- */
-#define MAX_APICID 256
-extern volatile int cpu_to_physical_apicid[NR_CPUS];
-extern volatile int physical_apicid_to_cpu[MAX_APICID];
-extern volatile int cpu_to_logical_apicid[NR_CPUS];
-extern volatile int logical_apicid_to_cpu[MAX_APICID];
-
-/*
- * General functions that each host system must provide.
- */
-
-/*extern void smp_boot_cpus(void);*/
-extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial udelay numbers */
-
-/*
- * This function is needed by all SMP systems. It must _always_ be valid
- * from the initial startup. We map APIC_BASE very early in page_setup(),
- * so this is correct in the x86 case.
- */
-
-#define smp_processor_id() (current->processor)
-
-static __inline int hard_smp_processor_id(void)
-{
- /* we don't want to mark this access volatile - bad code generation */
- return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
-}
-
-static __inline int logical_smp_processor_id(void)
-{
- /* we don't want to mark this access volatile - bad code generation */
- return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
-}
-
-#endif /* !__ASSEMBLY__ */
-
-#define NO_PROC_ID 0xFF /* No processor magic marker */
-
-/*
- * This magic constant controls our willingness to transfer
- * a process across CPUs. Such a transfer incurs misses on the L1
- * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My
- * gut feeling is this will vary by board in value. For a board
- * with separate L2 cache it probably depends also on the RSS, and
- * for a board with shared L2 cache it ought to decay fast as other
- * processes are run.
- */
-
-#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */
-
-#endif
-#endif
+++ /dev/null
-#ifndef __ASM_SMPBOOT_H
-#define __ASM_SMPBOOT_H
-
-/*emum for clustered_apic_mode values*/
-enum{
- CLUSTERED_APIC_NONE = 0,
- CLUSTERED_APIC_XAPIC,
- CLUSTERED_APIC_NUMAQ
-};
-
-#ifdef CONFIG_X86_CLUSTERED_APIC
-extern unsigned int apic_broadcast_id;
-extern unsigned char clustered_apic_mode;
-extern unsigned char esr_disable;
-extern unsigned char int_delivery_mode;
-extern unsigned int int_dest_addr_mode;
-extern int cyclone_setup(char*);
-
-static inline void detect_clustered_apic(char* oem, char* prod)
-{
- /*
- * Can't recognize Summit xAPICs at present, so use the OEM ID.
- */
- if (!strncmp(oem, "IBM ENSW", 8) && !strncmp(prod, "VIGIL SMP", 9)){
- clustered_apic_mode = CLUSTERED_APIC_XAPIC;
- apic_broadcast_id = APIC_BROADCAST_ID_XAPIC;
- int_dest_addr_mode = APIC_DEST_PHYSICAL;
- int_delivery_mode = dest_Fixed;
- esr_disable = 1;
- /*Start cyclone clock*/
- cyclone_setup(0);
- /* check for ACPI tables */
- } else if (!strncmp(oem, "IBM", 3) &&
- (!strncmp(prod, "SERVIGIL", 8) ||
- !strncmp(prod, "EXA", 3) ||
- !strncmp(prod, "RUTHLESS", 8))){
- clustered_apic_mode = CLUSTERED_APIC_XAPIC;
- apic_broadcast_id = APIC_BROADCAST_ID_XAPIC;
- int_dest_addr_mode = APIC_DEST_PHYSICAL;
- int_delivery_mode = dest_Fixed;
- esr_disable = 1;
- /*Start cyclone clock*/
- cyclone_setup(0);
- } else if (!strncmp(oem, "IBM NUMA", 8)){
- clustered_apic_mode = CLUSTERED_APIC_NUMAQ;
- apic_broadcast_id = APIC_BROADCAST_ID_APIC;
- int_dest_addr_mode = APIC_DEST_LOGICAL;
- int_delivery_mode = dest_LowestPrio;
- esr_disable = 1;
- }
-}
-#define INT_DEST_ADDR_MODE (int_dest_addr_mode)
-#define INT_DELIVERY_MODE (int_delivery_mode)
-#else /* CONFIG_X86_CLUSTERED_APIC */
-#define apic_broadcast_id (APIC_BROADCAST_ID_APIC)
-#define clustered_apic_mode (CLUSTERED_APIC_NONE)
-#define esr_disable (0)
-#define detect_clustered_apic(x,y)
-#define INT_DEST_ADDR_MODE (APIC_DEST_LOGICAL) /* logical delivery */
-#define INT_DELIVERY_MODE (dest_LowestPrio)
-#endif /* CONFIG_X86_CLUSTERED_APIC */
-#define BAD_APICID 0xFFu
-
-#define TRAMPOLINE_LOW phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0x8:0x467)
-#define TRAMPOLINE_HIGH phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0xa:0x469)
-
-#define boot_cpu_apicid ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?boot_cpu_logical_apicid:boot_cpu_physical_apicid)
-
-extern unsigned char raw_phys_apicid[NR_CPUS];
-
-/*
- * How to map from the cpu_present_map
- */
-static inline int cpu_present_to_apicid(int mps_cpu)
-{
- if (clustered_apic_mode == CLUSTERED_APIC_XAPIC)
- return raw_phys_apicid[mps_cpu];
- if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
- return (mps_cpu/4)*16 + (1<<(mps_cpu%4));
- return mps_cpu;
-}
-
-static inline unsigned long apicid_to_phys_cpu_present(int apicid)
-{
- if(clustered_apic_mode)
- return 1UL << (((apicid >> 4) << 2) + (apicid & 0x3));
- return 1UL << apicid;
-}
-
-#define physical_to_logical_apicid(phys_apic) ( (1ul << (phys_apic & 0x3)) | (phys_apic & 0xF0u) )
-
-/*
- * Mappings between logical cpu number and logical / physical apicid
- * The first four macros are trivial, but it keeps the abstraction consistent
- */
-extern volatile int logical_apicid_2_cpu[];
-extern volatile int cpu_2_logical_apicid[];
-extern volatile int physical_apicid_2_cpu[];
-extern volatile int cpu_2_physical_apicid[];
-
-#define logical_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid]
-#define cpu_to_logical_apicid(cpu) cpu_2_logical_apicid[cpu]
-#define physical_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
-#define cpu_to_physical_apicid(cpu) cpu_2_physical_apicid[cpu]
-#ifdef CONFIG_MULTIQUAD /* use logical IDs to bootstrap */
-#define boot_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid]
-#define cpu_to_boot_apicid(cpu) cpu_2_logical_apicid[cpu]
-#else /* !CONFIG_MULTIQUAD */ /* use physical IDs to bootstrap */
-#define boot_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
-#define cpu_to_boot_apicid(cpu) cpu_2_physical_apicid[cpu]
-#endif /* CONFIG_MULTIQUAD */
-
-#ifdef CONFIG_X86_CLUSTERED_APIC
-static inline int target_cpus(void)
-{
- static int cpu;
- switch(clustered_apic_mode){
- case CLUSTERED_APIC_NUMAQ:
- /* Broadcast intrs to local quad only. */
- return APIC_BROADCAST_ID_APIC;
- case CLUSTERED_APIC_XAPIC:
- /*round robin the interrupts*/
- cpu = (cpu+1)%smp_num_cpus;
- return cpu_to_physical_apicid(cpu);
- default:
- }
- return cpu_online_map;
-}
-#else
-#define target_cpus() (cpu_online_map)
-#endif
-#endif
+++ /dev/null
-#ifndef __ASM_SOFTIRQ_H
-#define __ASM_SOFTIRQ_H
-
-#include <asm/atomic.h>
-#include <asm/hardirq.h>
-
-#define cpu_bh_enable(cpu) \
- do { barrier(); local_bh_count(cpu)--; } while (0)
-#define cpu_bh_disable(cpu) \
- do { local_bh_count(cpu)++; barrier(); } while (0)
-
-#define local_bh_disable() cpu_bh_disable(smp_processor_id())
-#define local_bh_enable() cpu_bh_enable(smp_processor_id())
-
-#define in_softirq() (local_bh_count(smp_processor_id()) != 0)
-
-#endif /* __ASM_SOFTIRQ_H */
+++ /dev/null
-#ifndef __ASM_SPINLOCK_H
-#define __ASM_SPINLOCK_H
-
-#include <xen/config.h>
-#include <xen/lib.h>
-#include <asm/atomic.h>
-#include <asm/rwlock.h>
-
-#if 0
-#define SPINLOCK_DEBUG 1
-#else
-#define SPINLOCK_DEBUG 0
-#endif
-
-/*
- * Your basic SMP spinlocks, allowing only a single CPU anywhere
- */
-
-typedef struct {
- volatile unsigned int lock;
-#if SPINLOCK_DEBUG
- unsigned magic;
-#endif
-} spinlock_t;
-
-#define SPINLOCK_MAGIC 0xdead4ead
-
-#if SPINLOCK_DEBUG
-#define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC
-#else
-#define SPINLOCK_MAGIC_INIT /* */
-#endif
-
-#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT }
-
-#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
-
-/*
- * Simple spin lock operations. There are two variants, one clears IRQ's
- * on the local processor, one does not.
- *
- * We make no fairness assumptions. They have a cost.
- */
-
-#define spin_is_locked(x) (*(volatile char *)(&(x)->lock) <= 0)
-#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x))
-
-#define spin_lock_string \
- "\n1:\t" \
- "lock ; decb %0\n\t" \
- "js 2f\n" \
- ".section .text.lock,\"ax\"\n" \
- "2:\t" \
- "cmpb $0,%0\n\t" \
- "rep;nop\n\t" \
- "jle 2b\n\t" \
- "jmp 1b\n" \
- ".previous"
-
-/*
- * This works. Despite all the confusion.
- * (except on PPro SMP or if we are using OOSTORE)
- * (PPro errata 66, 92)
- */
-
-#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
-
-#define spin_unlock_string \
- "movb $1,%0" \
- :"=m" (lock->lock) : : "memory"
-
-
-static inline void spin_unlock(spinlock_t *lock)
-{
-#if SPINLOCK_DEBUG
- if (lock->magic != SPINLOCK_MAGIC)
- BUG();
- if (!spin_is_locked(lock))
- BUG();
-#endif
- __asm__ __volatile__(
- spin_unlock_string
- );
-}
-
-#else
-
-#define spin_unlock_string \
- "xchgb %b0, %1" \
- :"=q" (oldval), "=m" (lock->lock) \
- :"0" (oldval) : "memory"
-
-static inline void spin_unlock(spinlock_t *lock)
-{
- char oldval = 1;
-#if SPINLOCK_DEBUG
- if (lock->magic != SPINLOCK_MAGIC)
- BUG();
- if (!spin_is_locked(lock))
- BUG();
-#endif
- __asm__ __volatile__(
- spin_unlock_string
- );
-}
-
-#endif
-
-static inline int spin_trylock(spinlock_t *lock)
-{
- char oldval;
- __asm__ __volatile__(
- "xchgb %b0,%1"
- :"=q" (oldval), "=m" (lock->lock)
- :"0" (0) : "memory");
- return oldval > 0;
-}
-
-static inline void spin_lock(spinlock_t *lock)
-{
-#if SPINLOCK_DEBUG
- __label__ here;
-here:
- if (lock->magic != SPINLOCK_MAGIC) {
-printk("eip: %p\n", &&here);
- BUG();
- }
-#endif
- __asm__ __volatile__(
- spin_lock_string
- :"=m" (lock->lock) : : "memory");
-}
-
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- */
-typedef struct {
- volatile unsigned int lock;
-#if SPINLOCK_DEBUG
- unsigned magic;
-#endif
-} rwlock_t;
-
-#define RWLOCK_MAGIC 0xdeaf1eed
-
-#if SPINLOCK_DEBUG
-#define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC
-#else
-#define RWLOCK_MAGIC_INIT /* */
-#endif
-
-#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
-
-#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
-
-/*
- * On x86, we implement read-write locks as a 32-bit counter
- * with the high bit (sign) being the "contended" bit.
- *
- * The inline assembly is non-obvious. Think about it.
- *
- * Changed to use the same technique as rw semaphores. See
- * semaphore.h for details. -ben
- */
-/* the spinlock helpers are in arch/i386/kernel/semaphore.c */
-
-static inline void read_lock(rwlock_t *rw)
-{
-#if SPINLOCK_DEBUG
- if (rw->magic != RWLOCK_MAGIC)
- BUG();
-#endif
- __build_read_lock(rw, "__read_lock_failed");
-}
-
-static inline void write_lock(rwlock_t *rw)
-{
-#if SPINLOCK_DEBUG
- if (rw->magic != RWLOCK_MAGIC)
- BUG();
-#endif
- __build_write_lock(rw, "__write_lock_failed");
-}
-
-#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
-#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
-
-static inline int write_trylock(rwlock_t *lock)
-{
- atomic_t *count = (atomic_t *)lock;
- if (atomic_sub_and_test(RW_LOCK_BIAS, count))
- return 1;
- atomic_add(RW_LOCK_BIAS, count);
- return 0;
-}
-
-#endif /* __ASM_SPINLOCK_H */
+++ /dev/null
-#ifndef _I386_STRING_H_
-#define _I386_STRING_H_
-
-#ifdef __KERNEL__
-#include <xen/config.h>
-/*
- * On a 486 or Pentium, we are better off not using the
- * byte string operations. But on a 386 or a PPro the
- * byte string ops are faster than doing it by hand
- * (MUCH faster on a Pentium).
- *
- * Also, the byte strings actually work correctly. Forget
- * the i486 routines for now as they may be broken..
- */
-#if FIXED_486_STRING && defined(CONFIG_X86_USE_STRING_486)
-#include <asm/string-486.h>
-#else
-
-/*
- * This string-include defines all string functions as inline
- * functions. Use gcc. It also assumes ds=es=data space, this should be
- * normal. Most of the string-functions are rather heavily hand-optimized,
- * see especially strtok,strstr,str[c]spn. They should work, but are not
- * very easy to understand. Everything is done entirely within the register
- * set, making the functions fast and clean. String instructions have been
- * used through-out, making for "slightly" unclear code :-)
- *
- * NO Copyright (C) 1991, 1992 Linus Torvalds,
- * consider these trivial functions to be PD.
- */
-
-
-#define __HAVE_ARCH_STRCPY
-static inline char * strcpy(char * dest,const char *src)
-{
-int d0, d1, d2;
-__asm__ __volatile__(
- "1:\tlodsb\n\t"
- "stosb\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b"
- : "=&S" (d0), "=&D" (d1), "=&a" (d2)
- :"0" (src),"1" (dest) : "memory");
-return dest;
-}
-
-#define __HAVE_ARCH_STRNCPY
-static inline char * strncpy(char * dest,const char *src,size_t count)
-{
-int d0, d1, d2, d3;
-__asm__ __volatile__(
- "1:\tdecl %2\n\t"
- "js 2f\n\t"
- "lodsb\n\t"
- "stosb\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b\n\t"
- "rep\n\t"
- "stosb\n"
- "2:"
- : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
- :"0" (src),"1" (dest),"2" (count) : "memory");
-return dest;
-}
-
-#define __HAVE_ARCH_STRCAT
-static inline char * strcat(char * dest,const char * src)
-{
-int d0, d1, d2, d3;
-__asm__ __volatile__(
- "repne\n\t"
- "scasb\n\t"
- "decl %1\n"
- "1:\tlodsb\n\t"
- "stosb\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b"
- : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
- : "0" (src), "1" (dest), "2" (0), "3" (0xffffffff):"memory");
-return dest;
-}
-
-#define __HAVE_ARCH_STRNCAT
-static inline char * strncat(char * dest,const char * src,size_t count)
-{
-int d0, d1, d2, d3;
-__asm__ __volatile__(
- "repne\n\t"
- "scasb\n\t"
- "decl %1\n\t"
- "movl %8,%3\n"
- "1:\tdecl %3\n\t"
- "js 2f\n\t"
- "lodsb\n\t"
- "stosb\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b\n"
- "2:\txorl %2,%2\n\t"
- "stosb"
- : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
- : "0" (src),"1" (dest),"2" (0),"3" (0xffffffff), "g" (count)
- : "memory");
-return dest;
-}
-
-#define __HAVE_ARCH_STRCMP
-static inline int strcmp(const char * cs,const char * ct)
-{
-int d0, d1;
-register int __res;
-__asm__ __volatile__(
- "1:\tlodsb\n\t"
- "scasb\n\t"
- "jne 2f\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b\n\t"
- "xorl %%eax,%%eax\n\t"
- "jmp 3f\n"
- "2:\tsbbl %%eax,%%eax\n\t"
- "orb $1,%%al\n"
- "3:"
- :"=a" (__res), "=&S" (d0), "=&D" (d1)
- :"1" (cs),"2" (ct));
-return __res;
-}
-
-#define __HAVE_ARCH_STRNCMP
-static inline int strncmp(const char * cs,const char * ct,size_t count)
-{
-register int __res;
-int d0, d1, d2;
-__asm__ __volatile__(
- "1:\tdecl %3\n\t"
- "js 2f\n\t"
- "lodsb\n\t"
- "scasb\n\t"
- "jne 3f\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b\n"
- "2:\txorl %%eax,%%eax\n\t"
- "jmp 4f\n"
- "3:\tsbbl %%eax,%%eax\n\t"
- "orb $1,%%al\n"
- "4:"
- :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
- :"1" (cs),"2" (ct),"3" (count));
-return __res;
-}
-
-#define __HAVE_ARCH_STRCHR
-static inline char * strchr(const char * s, int c)
-{
-int d0;
-register char * __res;
-__asm__ __volatile__(
- "movb %%al,%%ah\n"
- "1:\tlodsb\n\t"
- "cmpb %%ah,%%al\n\t"
- "je 2f\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b\n\t"
- "movl $1,%1\n"
- "2:\tmovl %1,%0\n\t"
- "decl %0"
- :"=a" (__res), "=&S" (d0) : "1" (s),"0" (c));
-return __res;
-}
-
-#define __HAVE_ARCH_STRRCHR
-static inline char * strrchr(const char * s, int c)
-{
-int d0, d1;
-register char * __res;
-__asm__ __volatile__(
- "movb %%al,%%ah\n"
- "1:\tlodsb\n\t"
- "cmpb %%ah,%%al\n\t"
- "jne 2f\n\t"
- "leal -1(%%esi),%0\n"
- "2:\ttestb %%al,%%al\n\t"
- "jne 1b"
- :"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c));
-return __res;
-}
-
-#define __HAVE_ARCH_STRLEN
-static inline size_t strlen(const char * s)
-{
-int d0;
-register int __res;
-__asm__ __volatile__(
- "repne\n\t"
- "scasb\n\t"
- "notl %0\n\t"
- "decl %0"
- :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff));
-return __res;
-}
-
-static inline void * __memcpy(void * to, const void * from, size_t n)
-{
-int d0, d1, d2;
-__asm__ __volatile__(
- "rep ; movsl\n\t"
- "testb $2,%b4\n\t"
- "je 1f\n\t"
- "movsw\n"
- "1:\ttestb $1,%b4\n\t"
- "je 2f\n\t"
- "movsb\n"
- "2:"
- : "=&c" (d0), "=&D" (d1), "=&S" (d2)
- :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
- : "memory");
-return (to);
-}
-
-/*
- * This looks horribly ugly, but the compiler can optimize it totally,
- * as the count is constant.
- */
-static inline void * __constant_memcpy(void * to, const void * from, size_t n)
-{
- switch (n) {
- case 0:
- return to;
- case 1:
- *(unsigned char *)to = *(const unsigned char *)from;
- return to;
- case 2:
- *(unsigned short *)to = *(const unsigned short *)from;
- return to;
- case 3:
- *(unsigned short *)to = *(const unsigned short *)from;
- *(2+(unsigned char *)to) = *(2+(const unsigned char *)from);
- return to;
- case 4:
- *(unsigned long *)to = *(const unsigned long *)from;
- return to;
- case 6: /* for Ethernet addresses */
- *(unsigned long *)to = *(const unsigned long *)from;
- *(2+(unsigned short *)to) = *(2+(const unsigned short *)from);
- return to;
- case 8:
- *(unsigned long *)to = *(const unsigned long *)from;
- *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
- return to;
- case 12:
- *(unsigned long *)to = *(const unsigned long *)from;
- *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
- *(2+(unsigned long *)to) = *(2+(const unsigned long *)from);
- return to;
- case 16:
- *(unsigned long *)to = *(const unsigned long *)from;
- *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
- *(2+(unsigned long *)to) = *(2+(const unsigned long *)from);
- *(3+(unsigned long *)to) = *(3+(const unsigned long *)from);
- return to;
- case 20:
- *(unsigned long *)to = *(const unsigned long *)from;
- *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
- *(2+(unsigned long *)to) = *(2+(const unsigned long *)from);
- *(3+(unsigned long *)to) = *(3+(const unsigned long *)from);
- *(4+(unsigned long *)to) = *(4+(const unsigned long *)from);
- return to;
- }
-#define COMMON(x) \
-__asm__ __volatile__( \
- "rep ; movsl" \
- x \
- : "=&c" (d0), "=&D" (d1), "=&S" (d2) \
- : "0" (n/4),"1" ((long) to),"2" ((long) from) \
- : "memory");
-{
- int d0, d1, d2;
- switch (n % 4) {
- case 0: COMMON(""); return to;
- case 1: COMMON("\n\tmovsb"); return to;
- case 2: COMMON("\n\tmovsw"); return to;
- default: COMMON("\n\tmovsw\n\tmovsb"); return to;
- }
-}
-
-#undef COMMON
-}
-
-#define __HAVE_ARCH_MEMCPY
-
-#define memcpy(t, f, n) \
-(__builtin_constant_p(n) ? \
- __constant_memcpy((t),(f),(n)) : \
- __memcpy((t),(f),(n)))
-
-
-/*
- * struct_cpy(x,y), copy structure *x into (matching structure) *y.
- *
- * We get link-time errors if the structure sizes do not match.
- * There is no runtime overhead, it's all optimized away at
- * compile time.
- */
-//extern void __struct_cpy_bug (void);
-
-/*
-#define struct_cpy(x,y) \
-({ \
- if (sizeof(*(x)) != sizeof(*(y))) \
- __struct_cpy_bug; \
- memcpy(x, y, sizeof(*(x))); \
-})
-*/
-
-#define __HAVE_ARCH_MEMMOVE
-static inline void * memmove(void * dest,const void * src, size_t n)
-{
-int d0, d1, d2;
-if (dest<src)
-__asm__ __volatile__(
- "rep\n\t"
- "movsb"
- : "=&c" (d0), "=&S" (d1), "=&D" (d2)
- :"0" (n),"1" (src),"2" (dest)
- : "memory");
-else
-__asm__ __volatile__(
- "std\n\t"
- "rep\n\t"
- "movsb\n\t"
- "cld"
- : "=&c" (d0), "=&S" (d1), "=&D" (d2)
- :"0" (n),
- "1" (n-1+(const char *)src),
- "2" (n-1+(char *)dest)
- :"memory");
-return dest;
-}
-
-#define memcmp __builtin_memcmp
-
-#define __HAVE_ARCH_MEMCHR
-static inline void * memchr(const void * cs,int c,size_t count)
-{
-int d0;
-register void * __res;
-if (!count)
- return NULL;
-__asm__ __volatile__(
- "repne\n\t"
- "scasb\n\t"
- "je 1f\n\t"
- "movl $1,%0\n"
- "1:\tdecl %0"
- :"=D" (__res), "=&c" (d0) : "a" (c),"0" (cs),"1" (count));
-return __res;
-}
-
-static inline void * __memset_generic(void * s, char c,size_t count)
-{
-int d0, d1;
-__asm__ __volatile__(
- "rep\n\t"
- "stosb"
- : "=&c" (d0), "=&D" (d1)
- :"a" (c),"1" (s),"0" (count)
- :"memory");
-return s;
-}
-
-/* we might want to write optimized versions of these later */
-#define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count))
-
-/*
- * memset(x,0,y) is a reasonably common thing to do, so we want to fill
- * things 32 bits at a time even when we don't know the size of the
- * area at compile-time..
- */
-static inline void * __constant_c_memset(void * s, unsigned long c, size_t count)
-{
-int d0, d1;
-__asm__ __volatile__(
- "rep ; stosl\n\t"
- "testb $2,%b3\n\t"
- "je 1f\n\t"
- "stosw\n"
- "1:\ttestb $1,%b3\n\t"
- "je 2f\n\t"
- "stosb\n"
- "2:"
- : "=&c" (d0), "=&D" (d1)
- :"a" (c), "q" (count), "0" (count/4), "1" ((long) s)
- :"memory");
-return (s);
-}
-
-/* Added by Gertjan van Wingerde to make minix and sysv module work */
-#define __HAVE_ARCH_STRNLEN
-static inline size_t strnlen(const char * s, size_t count)
-{
-int d0;
-register int __res;
-__asm__ __volatile__(
- "movl %2,%0\n\t"
- "jmp 2f\n"
- "1:\tcmpb $0,(%0)\n\t"
- "je 3f\n\t"
- "incl %0\n"
- "2:\tdecl %1\n\t"
- "cmpl $-1,%1\n\t"
- "jne 1b\n"
- "3:\tsubl %2,%0"
- :"=a" (__res), "=&d" (d0)
- :"c" (s),"1" (count));
-return __res;
-}
-/* end of additional stuff */
-
-//#define __HAVE_ARCH_STRSTR
-
-//extern char *strstr(const char *cs, const char *ct);
-
-/*
- * This looks horribly ugly, but the compiler can optimize it totally,
- * as we by now know that both pattern and count is constant..
- */
-static inline void * __constant_c_and_count_memset(void * s, unsigned long pattern, size_t count)
-{
- switch (count) {
- case 0:
- return s;
- case 1:
- *(unsigned char *)s = pattern;
- return s;
- case 2:
- *(unsigned short *)s = pattern;
- return s;
- case 3:
- *(unsigned short *)s = pattern;
- *(2+(unsigned char *)s) = pattern;
- return s;
- case 4:
- *(unsigned long *)s = pattern;
- return s;
- }
-#define COMMON(x) \
-__asm__ __volatile__( \
- "rep ; stosl" \
- x \
- : "=&c" (d0), "=&D" (d1) \
- : "a" (pattern),"0" (count/4),"1" ((long) s) \
- : "memory")
-{
- int d0, d1;
- switch (count % 4) {
- case 0: COMMON(""); return s;
- case 1: COMMON("\n\tstosb"); return s;
- case 2: COMMON("\n\tstosw"); return s;
- default: COMMON("\n\tstosw\n\tstosb"); return s;
- }
-}
-
-#undef COMMON
-}
-
-#define __constant_c_x_memset(s, c, count) \
-(__builtin_constant_p(count) ? \
- __constant_c_and_count_memset((s),(c),(count)) : \
- __constant_c_memset((s),(c),(count)))
-
-#define __memset(s, c, count) \
-(__builtin_constant_p(count) ? \
- __constant_count_memset((s),(c),(count)) : \
- __memset_generic((s),(c),(count)))
-
-#define __HAVE_ARCH_MEMSET
-#define memset(s, c, count) \
-(__builtin_constant_p(c) ? \
- __constant_c_x_memset((s),(0x01010101UL*(unsigned char)(c)),(count)) : \
- __memset((s),(c),(count)))
-
-/*
- * find the first occurrence of byte 'c', or 1 past the area if none
- */
-#define __HAVE_ARCH_MEMSCAN
-static inline void * memscan(void * addr, int c, size_t size)
-{
- if (!size)
- return addr;
- __asm__("repnz; scasb\n\t"
- "jnz 1f\n\t"
- "dec %%edi\n"
- "1:"
- : "=D" (addr), "=c" (size)
- : "0" (addr), "1" (size), "a" (c));
- return addr;
-}
-
-#endif /* CONFIG_X86_USE_STRING_486 */
-#endif /* __KERNEL__ */
-
-#endif
+++ /dev/null
-#ifndef __ASM_SYSTEM_H
-#define __ASM_SYSTEM_H
-
-#include <xen/config.h>
-#include <asm/bitops.h>
-
-/* Clear and set 'TS' bit respectively */
-#define clts() __asm__ __volatile__ ("clts")
-#define stts() write_cr0(X86_CR0_TS|read_cr0())
-
-#define wbinvd() \
- __asm__ __volatile__ ("wbinvd": : :"memory");
-
-static inline unsigned long get_limit(unsigned long segment)
-{
- unsigned long __limit;
- __asm__("lsll %1,%0"
- :"=r" (__limit):"r" (segment));
- return __limit+1;
-}
-
-#define nop() __asm__ __volatile__ ("nop")
-
-#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
-
-struct __xchg_dummy { unsigned long a[100]; };
-#define __xg(x) ((struct __xchg_dummy *)(x))
-
-
-/*
- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
- * Note 2: xchg has side effect, so that attribute volatile is necessary,
- * but generally the primitive is invalid, *ptr is output argument. --ANK
- */
-static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
-{
- switch (size) {
- case 1:
- __asm__ __volatile__("xchgb %b0,%1"
- :"=q" (x)
- :"m" (*__xg(ptr)), "0" (x)
- :"memory");
- break;
- case 2:
- __asm__ __volatile__("xchgw %w0,%1"
- :"=r" (x)
- :"m" (*__xg(ptr)), "0" (x)
- :"memory");
- break;
- case 4:
- __asm__ __volatile__("xchgl %0,%1"
- :"=r" (x)
- :"m" (*__xg(ptr)), "0" (x)
- :"memory");
- break;
- }
- return x;
-}
-
-/*
- * Atomic compare and exchange. Compare OLD with MEM, if identical,
- * store NEW in MEM. Return the initial value in MEM. Success is
- * indicated by comparing RETURN with OLD.
- */
-
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
- unsigned long new, int size)
-{
- unsigned long prev;
- switch (size) {
- case 1:
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
- : "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 2:
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
- : "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 4:
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
- : "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- }
- return old;
-}
-
-#define cmpxchg(ptr,o,n)\
- ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
- (unsigned long)(n),sizeof(*(ptr))))
-
-
-/*
- * This function causes longword _o to be changed to _n at location _p.
- * If this access causes a fault then we return 1, otherwise we return 0.
- * If no fault occurs then _o is updated to teh value we saw at _p. If this
- * is the same as the initial value of _o then _n is written to location _p.
- */
-#define cmpxchg_user(_p,_o,_n) \
-({ \
- int _rc; \
- __asm__ __volatile__ ( \
- "1: " LOCK_PREFIX "cmpxchgl %2,%3\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl $1,%1\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 1b,3b\n" \
- ".previous" \
- : "=a" (_o), "=r" (_rc) \
- : "q" (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \
- : "memory"); \
- _rc; \
-})
-
-/*
- * Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
- * to devices.
- *
- * For now, "wmb()" doesn't actually do anything, as all
- * Intel CPU's follow what Intel calls a *Processor Order*,
- * in which all writes are seen in the program order even
- * outside the CPU.
- *
- * I expect future Intel CPU's to have a weaker ordering,
- * but I'd also expect them to finally get their act together
- * and add some real memory barriers if so.
- *
- * Some non intel clones support out of order store. wmb() ceases to be a
- * nop for these.
- */
-
-#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
-#define rmb() mb()
-
-#ifdef CONFIG_X86_OOSTORE
-#define wmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
-#else
-#define wmb() __asm__ __volatile__ ("": : :"memory")
-#endif
-
-#ifdef CONFIG_SMP
-#define smp_mb() mb()
-#define smp_rmb() rmb()
-#define smp_wmb() wmb()
-#else
-#define smp_mb() barrier()
-#define smp_rmb() barrier()
-#define smp_wmb() barrier()
-#endif
-
-#define set_mb(var, value) do { xchg(&var, value); } while (0)
-#define set_wmb(var, value) do { var = value; wmb(); } while (0)
-
-/* interrupt control.. */
-#define __save_flags(x) __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */)
-#define __restore_flags(x) __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc")
-#define __cli() __asm__ __volatile__("cli": : :"memory")
-#define __sti() __asm__ __volatile__("sti": : :"memory")
-/* used in the idle loop; sti takes one instruction cycle to complete */
-#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory")
-
-/* For spinlocks etc */
-#define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
-#define local_irq_restore(x) __restore_flags(x)
-#define local_irq_disable() __cli()
-#define local_irq_enable() __sti()
-
-static inline int local_irq_is_enabled(void)
-{
- unsigned long flags;
- __save_flags(flags);
- return !!(flags & (1<<9)); /* EFLAGS_IF */
-}
-
-#ifdef CONFIG_SMP
-
-extern void __global_cli(void);
-extern void __global_sti(void);
-extern unsigned long __global_save_flags(void);
-extern void __global_restore_flags(unsigned long);
-#define cli() __global_cli()
-#define sti() __global_sti()
-#define save_flags(x) ((x)=__global_save_flags())
-#define restore_flags(x) __global_restore_flags(x)
-
-#else
-
-#define cli() __cli()
-#define sti() __sti()
-#define save_flags(x) __save_flags(x)
-#define restore_flags(x) __restore_flags(x)
-
-#endif
-
-/*
- * disable hlt during certain critical i/o operations
- */
-#define HAVE_DISABLE_HLT
-void disable_hlt(void);
-void enable_hlt(void);
-
-#define BROKEN_ACPI_Sx 0x0001
-#define BROKEN_INIT_AFTER_S1 0x0002
-
-#endif
+++ /dev/null
-/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
- ****************************************************************************
- * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
- ****************************************************************************
- *
- * File: time.h
- * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
- *
- * Environment: Xen Hypervisor
- * Description: Architecture dependent definition of time variables
- */
-
-#ifndef _ASM_TIME_H_
-#define _ASM_TIME_H_
-
-#include <asm/types.h>
-#include <asm/msr.h>
-
-typedef s64 s_time_t; /* system time */
-
-#endif /* _ASM_TIME_H_ */
+++ /dev/null
-/*
- * linux/include/asm-i386/timex.h
- *
- * i386 architecture timex specifications
- */
-#ifndef _ASMi386_TIMEX_H
-#define _ASMi386_TIMEX_H
-
-#include <xen/config.h>
-#include <asm/msr.h>
-
-#ifdef CONFIG_MELAN
-# define CLOCK_TICK_RATE 1189200 /* AMD Elan has different frequency! */
-#else
-# define CLOCK_TICK_RATE 1193180 /* Underlying HZ */
-#endif
-
-#define CLOCK_TICK_FACTOR 20 /* Factor of both 1000000 and CLOCK_TICK_RATE */
-#define FINETUNE ((((((long)LATCH * HZ - CLOCK_TICK_RATE) << SHIFT_HZ) * \
- (1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \
- << (SHIFT_SCALE-SHIFT_HZ)) / HZ)
-
-/*
- * Standard way to access the cycle counter on i586+ CPUs.
- * Currently only used on SMP.
- *
- * If you really have a SMP machine with i486 chips or older,
- * compile for that, and this will just always return zero.
- * That's ok, it just means that the nicer scheduling heuristics
- * won't work for you.
- *
- * We only use the low 32 bits, and we'd simply better make sure
- * that we reschedule before that wraps. Scheduling at least every
- * four billion cycles just basically sounds like a good idea,
- * regardless of how fast the machine is.
- */
-typedef unsigned long long cycles_t;
-
-extern cycles_t cacheflush_time;
-
-static inline cycles_t get_cycles (void)
-{
-#ifndef CONFIG_X86_TSC
- return 0;
-#else
- unsigned long long ret;
-
- rdtscll(ret);
- return ret;
-#endif
-}
-
-extern unsigned long cpu_khz;
-
-#define vxtime_lock() do {} while (0)
-#define vxtime_unlock() do {} while (0)
-
-#endif
+++ /dev/null
-#ifndef _I386_TYPES_H
-#define _I386_TYPES_H
-
-typedef unsigned short umode_t;
-
-typedef unsigned int size_t;
-
-/*
- * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
- * header files exported to user space
- */
-
-typedef __signed__ char __s8;
-typedef unsigned char __u8;
-
-typedef __signed__ short __s16;
-typedef unsigned short __u16;
-
-typedef __signed__ int __s32;
-typedef unsigned int __u32;
-
-#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
-typedef __signed__ long long __s64;
-typedef unsigned long long __u64;
-#endif
-
-#include <xen/config.h>
-
-typedef signed char s8;
-typedef unsigned char u8;
-
-typedef signed short s16;
-typedef unsigned short u16;
-
-typedef signed int s32;
-typedef unsigned int u32;
-
-typedef signed long long s64;
-typedef unsigned long long u64;
-
-#define BITS_PER_LONG 32
-
-/* DMA addresses come in generic and 64-bit flavours. */
-
-#ifdef CONFIG_HIGHMEM
-typedef u64 dma_addr_t;
-#else
-typedef u32 dma_addr_t;
-#endif
-typedef u64 dma64_addr_t;
-
-#endif
+++ /dev/null
-#ifndef __i386_UACCESS_H
-#define __i386_UACCESS_H
-
-/*
- * User space memory access functions
- */
-#include <xen/config.h>
-#include <xen/errno.h>
-#include <xen/sched.h>
-#include <xen/prefetch.h>
-#include <asm/page.h>
-
-#define VERIFY_READ 0
-#define VERIFY_WRITE 1
-
-/*
- * The fs value determines whether argument validity checking should be
- * performed or not. If get_fs() == USER_DS, checking is performed, with
- * get_fs() == KERNEL_DS, checking is bypassed.
- *
- * For historical reasons, these macros are grossly misnamed.
- */
-
-#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
-
-
-#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF)
-#define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
-
-#define get_ds() (KERNEL_DS)
-#define get_fs() (current->addr_limit)
-#define set_fs(x) (current->addr_limit = (x))
-
-#define segment_eq(a,b) ((a).seg == (b).seg)
-
-extern int __verify_write(const void *, unsigned long);
-
-#define __addr_ok(addr) ((unsigned long)(addr) < (current->addr_limit.seg))
-
-/*
- * Uhhuh, this needs 33-bit arithmetic. We have a carry..
- */
-#define __range_ok(addr,size) ({ \
- unsigned long flag,sum; \
- asm("addl %3,%1 ; sbbl %0,%0; cmpl %1,%4; sbbl $0,%0" \
- :"=&r" (flag), "=r" (sum) \
- :"1" (addr),"g" ((int)(size)),"g" (current->addr_limit.seg)); \
- flag; })
-
-#define access_ok(type,addr,size) (__range_ok(addr,size) == 0)
-
-static inline int verify_area(int type, const void * addr, unsigned long size)
-{
- return access_ok(type,addr,size) ? 0 : -EFAULT;
-}
-
-
-/*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue. No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
- *
- * All the routines below use bits of fixup code that are out of line
- * with the main instruction path. This means when everything is well,
- * we don't even have to jump over them. Further, they do not intrude
- * on our cache or tlb entries.
- */
-
-struct exception_table_entry
-{
- unsigned long insn, fixup;
-};
-
-/* Returns 0 if exception not found and fixup otherwise. */
-extern unsigned long search_exception_table(unsigned long);
-
-
-/*
- * These are the main single-value transfer routines. They automatically
- * use the right size if we just have the right pointer type.
- *
- * This gets kind of ugly. We want to return _two_ values in "get_user()"
- * and yet we don't want to do any pointers, because that is too much
- * of a performance impact. Thus we have a few rather ugly macros here,
- * and hide all the uglyness from the user.
- *
- * The "__xxx" versions of the user access functions are versions that
- * do not verify the address space, that must have been done previously
- * with a separate "access_ok()" call (this is used when we do multiple
- * accesses to the same area of user memory).
- */
-
-extern void __get_user_1(void);
-extern void __get_user_2(void);
-extern void __get_user_4(void);
-
-#define __get_user_x(size,ret,x,ptr) \
- __asm__ __volatile__("call __get_user_" #size \
- :"=a" (ret),"=d" (x) \
- :"0" (ptr))
-
-/* Careful: we have to cast the result to the type of the pointer for sign reasons */
-#define get_user(x,ptr) \
-({ int __ret_gu=1,__val_gu; \
- switch(sizeof (*(ptr))) { \
- case 1: __ret_gu=copy_from_user(&__val_gu,ptr,1); break; \
- case 2: __ret_gu=copy_from_user(&__val_gu,ptr,2); break; \
- case 4: __ret_gu=copy_from_user(&__val_gu,ptr,4); break; \
- default: __ret_gu=copy_from_user(&__val_gu,ptr,8); break; \
- /*case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break;*/ \
- /*case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break;*/ \
- /*case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break;*/ \
- /*default: __get_user_x(X,__ret_gu,__val_gu,ptr); break;*/ \
- } \
- (x) = (__typeof__(*(ptr)))__val_gu; \
- __ret_gu; \
-})
-
-extern void __put_user_1(void);
-extern void __put_user_2(void);
-extern void __put_user_4(void);
-extern void __put_user_8(void);
-
-extern void __put_user_bad(void);
-
-#define put_user(x,ptr) \
- __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
-#define __get_user(x,ptr) \
- __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
-#define __put_user(x,ptr) \
- __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
-#define __put_user_nocheck(x,ptr,size) \
-({ \
- long __pu_err; \
- __put_user_size((x),(ptr),(size),__pu_err); \
- __pu_err; \
-})
-
-
-#define __put_user_check(x,ptr,size) \
-({ \
- long __pu_err = -EFAULT; \
- __typeof__(*(ptr)) *__pu_addr = (ptr); \
- if (access_ok(VERIFY_WRITE,__pu_addr,size)) \
- __put_user_size((x),__pu_addr,(size),__pu_err); \
- __pu_err; \
-})
-
-#define __put_user_u64(x, addr, err) \
- __asm__ __volatile__( \
- "1: movl %%eax,0(%2)\n" \
- "2: movl %%edx,4(%2)\n" \
- "3:\n" \
- ".section .fixup,\"ax\"\n" \
- "4: movl %3,%0\n" \
- " jmp 3b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 1b,4b\n" \
- " .long 2b,4b\n" \
- ".previous" \
- : "=r"(err) \
- : "A" (x), "r" (addr), "i"(-EFAULT), "0"(err))
-
-#define __put_user_size(x,ptr,size,retval) \
-do { \
- retval = 0; \
- switch (size) { \
- case 1: __put_user_asm(x,ptr,retval,"b","b","iq"); break; \
- case 2: __put_user_asm(x,ptr,retval,"w","w","ir"); break; \
- case 4: __put_user_asm(x,ptr,retval,"l","","ir"); break; \
- case 8: __put_user_u64(x,ptr,retval); break; \
- default: __put_user_bad(); \
- } \
-} while (0)
-
-struct __large_struct { unsigned long buf[100]; };
-#define __m(x) (*(struct __large_struct *)(x))
-
-/*
- * Tell gcc we read from memory instead of writing: this is because
- * we do not write to any memory gcc knows about, so there are no
- * aliasing issues.
- */
-#define __put_user_asm(x, addr, err, itype, rtype, ltype) \
- __asm__ __volatile__( \
- "1: mov"itype" %"rtype"1,%2\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl %3,%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 1b,3b\n" \
- ".previous" \
- : "=r"(err) \
- : ltype (x), "m"(__m(addr)), "i"(-EFAULT), "0"(err))
-
-
-#define __get_user_nocheck(x,ptr,size) \
-({ \
- long __gu_err, __gu_val; \
- __get_user_size(__gu_val,(ptr),(size),__gu_err); \
- (x) = (__typeof__(*(ptr)))__gu_val; \
- __gu_err; \
-})
-
-extern long __get_user_bad(void);
-
-#define __get_user_size(x,ptr,size,retval) \
-do { \
- retval = 0; \
- switch (size) { \
- case 1: __get_user_asm(x,ptr,retval,"b","b","=q"); break; \
- case 2: __get_user_asm(x,ptr,retval,"w","w","=r"); break; \
- case 4: __get_user_asm(x,ptr,retval,"l","","=r"); break; \
- default: (x) = __get_user_bad(); \
- } \
-} while (0)
-
-#define __get_user_asm(x, addr, err, itype, rtype, ltype) \
- __asm__ __volatile__( \
- "1: mov"itype" %2,%"rtype"1\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl %3,%0\n" \
- " xor"itype" %"rtype"1,%"rtype"1\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 1b,3b\n" \
- ".previous" \
- : "=r"(err), ltype (x) \
- : "m"(__m(addr)), "i"(-EFAULT), "0"(err))
-
-
-/*
- * Copy To/From Userspace
- */
-
-/* Generic arbitrary sized copy. */
-#define __copy_user(to,from,size) \
-do { \
- int __d0, __d1; \
- __asm__ __volatile__( \
- "0: rep; movsl\n" \
- " movl %3,%0\n" \
- "1: rep; movsb\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: lea 0(%3,%0,4),%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,3b\n" \
- " .long 1b,2b\n" \
- ".previous" \
- : "=&c"(size), "=&D" (__d0), "=&S" (__d1) \
- : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \
- : "memory"); \
-} while (0)
-
-#define __copy_user_zeroing(to,from,size) \
-do { \
- int __d0, __d1; \
- __asm__ __volatile__( \
- "0: rep; movsl\n" \
- " movl %3,%0\n" \
- "1: rep; movsb\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: lea 0(%3,%0,4),%0\n" \
- "4: pushl %0\n" \
- " pushl %%eax\n" \
- " xorl %%eax,%%eax\n" \
- " rep; stosb\n" \
- " popl %%eax\n" \
- " popl %0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,3b\n" \
- " .long 1b,4b\n" \
- ".previous" \
- : "=&c"(size), "=&D" (__d0), "=&S" (__d1) \
- : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \
- : "memory"); \
-} while (0)
-
-/* We let the __ versions of copy_from/to_user inline, because they're often
- * used in fast paths and have only a small space overhead.
- */
-static inline unsigned long
-__generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
-{
- __copy_user_zeroing(to,from,n);
- return n;
-}
-
-static inline unsigned long
-__generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
-{
- __copy_user(to,from,n);
- return n;
-}
-
-
-/* Optimize just a little bit when we know the size of the move. */
-#define __constant_copy_user(to, from, size) \
-do { \
- int __d0, __d1; \
- switch (size & 3) { \
- default: \
- __asm__ __volatile__( \
- "0: rep; movsl\n" \
- "1:\n" \
- ".section .fixup,\"ax\"\n" \
- "2: shl $2,%0\n" \
- " jmp 1b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,2b\n" \
- ".previous" \
- : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
- : "1"(from), "2"(to), "0"(size/4) \
- : "memory"); \
- break; \
- case 1: \
- __asm__ __volatile__( \
- "0: rep; movsl\n" \
- "1: movsb\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: shl $2,%0\n" \
- "4: incl %0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,3b\n" \
- " .long 1b,4b\n" \
- ".previous" \
- : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
- : "1"(from), "2"(to), "0"(size/4) \
- : "memory"); \
- break; \
- case 2: \
- __asm__ __volatile__( \
- "0: rep; movsl\n" \
- "1: movsw\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: shl $2,%0\n" \
- "4: addl $2,%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,3b\n" \
- " .long 1b,4b\n" \
- ".previous" \
- : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
- : "1"(from), "2"(to), "0"(size/4) \
- : "memory"); \
- break; \
- case 3: \
- __asm__ __volatile__( \
- "0: rep; movsl\n" \
- "1: movsw\n" \
- "2: movsb\n" \
- "3:\n" \
- ".section .fixup,\"ax\"\n" \
- "4: shl $2,%0\n" \
- "5: addl $2,%0\n" \
- "6: incl %0\n" \
- " jmp 3b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,4b\n" \
- " .long 1b,5b\n" \
- " .long 2b,6b\n" \
- ".previous" \
- : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
- : "1"(from), "2"(to), "0"(size/4) \
- : "memory"); \
- break; \
- } \
-} while (0)
-
-/* Optimize just a little bit when we know the size of the move. */
-#define __constant_copy_user_zeroing(to, from, size) \
-do { \
- int __d0, __d1; \
- switch (size & 3) { \
- default: \
- __asm__ __volatile__( \
- "0: rep; movsl\n" \
- "1:\n" \
- ".section .fixup,\"ax\"\n" \
- "2: pushl %0\n" \
- " pushl %%eax\n" \
- " xorl %%eax,%%eax\n" \
- " rep; stosl\n" \
- " popl %%eax\n" \
- " popl %0\n" \
- " shl $2,%0\n" \
- " jmp 1b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,2b\n" \
- ".previous" \
- : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
- : "1"(from), "2"(to), "0"(size/4) \
- : "memory"); \
- break; \
- case 1: \
- __asm__ __volatile__( \
- "0: rep; movsl\n" \
- "1: movsb\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: pushl %0\n" \
- " pushl %%eax\n" \
- " xorl %%eax,%%eax\n" \
- " rep; stosl\n" \
- " stosb\n" \
- " popl %%eax\n" \
- " popl %0\n" \
- " shl $2,%0\n" \
- " incl %0\n" \
- " jmp 2b\n" \
- "4: pushl %%eax\n" \
- " xorl %%eax,%%eax\n" \
- " stosb\n" \
- " popl %%eax\n" \
- " incl %0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,3b\n" \
- " .long 1b,4b\n" \
- ".previous" \
- : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
- : "1"(from), "2"(to), "0"(size/4) \
- : "memory"); \
- break; \
- case 2: \
- __asm__ __volatile__( \
- "0: rep; movsl\n" \
- "1: movsw\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: pushl %0\n" \
- " pushl %%eax\n" \
- " xorl %%eax,%%eax\n" \
- " rep; stosl\n" \
- " stosw\n" \
- " popl %%eax\n" \
- " popl %0\n" \
- " shl $2,%0\n" \
- " addl $2,%0\n" \
- " jmp 2b\n" \
- "4: pushl %%eax\n" \
- " xorl %%eax,%%eax\n" \
- " stosw\n" \
- " popl %%eax\n" \
- " addl $2,%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,3b\n" \
- " .long 1b,4b\n" \
- ".previous" \
- : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
- : "1"(from), "2"(to), "0"(size/4) \
- : "memory"); \
- break; \
- case 3: \
- __asm__ __volatile__( \
- "0: rep; movsl\n" \
- "1: movsw\n" \
- "2: movsb\n" \
- "3:\n" \
- ".section .fixup,\"ax\"\n" \
- "4: pushl %0\n" \
- " pushl %%eax\n" \
- " xorl %%eax,%%eax\n" \
- " rep; stosl\n" \
- " stosw\n" \
- " stosb\n" \
- " popl %%eax\n" \
- " popl %0\n" \
- " shl $2,%0\n" \
- " addl $3,%0\n" \
- " jmp 2b\n" \
- "5: pushl %%eax\n" \
- " xorl %%eax,%%eax\n" \
- " stosw\n" \
- " stosb\n" \
- " popl %%eax\n" \
- " addl $3,%0\n" \
- " jmp 2b\n" \
- "6: pushl %%eax\n" \
- " xorl %%eax,%%eax\n" \
- " stosb\n" \
- " popl %%eax\n" \
- " incl %0\n" \
- " jmp 3b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,4b\n" \
- " .long 1b,5b\n" \
- " .long 2b,6b\n" \
- ".previous" \
- : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
- : "1"(from), "2"(to), "0"(size/4) \
- : "memory"); \
- break; \
- } \
-} while (0)
-
-unsigned long __generic_copy_to_user(void *, const void *, unsigned long);
-unsigned long __generic_copy_from_user(void *, const void *, unsigned long);
-
-static inline unsigned long
-__constant_copy_to_user(void *to, const void *from, unsigned long n)
-{
- prefetch(from);
- if (access_ok(VERIFY_WRITE, to, n))
- __constant_copy_user(to,from,n);
- return n;
-}
-
-static inline unsigned long
-__constant_copy_from_user(void *to, const void *from, unsigned long n)
-{
- if (access_ok(VERIFY_READ, from, n))
- __constant_copy_user_zeroing(to,from,n);
- else
- memset(to, 0, n);
- return n;
-}
-
-static inline unsigned long
-__constant_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
-{
- __constant_copy_user(to,from,n);
- return n;
-}
-
-static inline unsigned long
-__constant_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
-{
- __constant_copy_user_zeroing(to,from,n);
- return n;
-}
-
-#define copy_to_user(to,from,n) \
- (__builtin_constant_p(n) ? \
- __constant_copy_to_user((to),(from),(n)) : \
- __generic_copy_to_user((to),(from),(n)))
-
-#define copy_from_user(to,from,n) \
- (__builtin_constant_p(n) ? \
- __constant_copy_from_user((to),(from),(n)) : \
- __generic_copy_from_user((to),(from),(n)))
-
-#define __copy_to_user(to,from,n) \
- (__builtin_constant_p(n) ? \
- __constant_copy_to_user_nocheck((to),(from),(n)) : \
- __generic_copy_to_user_nocheck((to),(from),(n)))
-
-#define __copy_from_user(to,from,n) \
- (__builtin_constant_p(n) ? \
- __constant_copy_from_user_nocheck((to),(from),(n)) : \
- __generic_copy_from_user_nocheck((to),(from),(n)))
-
-long strncpy_from_user(char *dst, const char *src, long count);
-long __strncpy_from_user(char *dst, const char *src, long count);
-#define strlen_user(str) strnlen_user(str, ~0UL >> 1)
-long strnlen_user(const char *str, long n);
-unsigned long clear_user(void *mem, unsigned long len);
-unsigned long __clear_user(void *mem, unsigned long len);
-
-#endif /* __i386_UACCESS_H */
+++ /dev/null
-#ifndef __I386_UNALIGNED_H
-#define __I386_UNALIGNED_H
-
-/*
- * The i386 can do unaligned accesses itself.
- *
- * The strange macros are there to make sure these can't
- * be misused in a way that makes them not work on other
- * architectures where unaligned accesses aren't as simple.
- */
-
-/**
- * get_unaligned - get value from possibly mis-aligned location
- * @ptr: pointer to value
- *
- * This macro should be used for accessing values larger in size than
- * single bytes at locations that are expected to be improperly aligned,
- * e.g. retrieving a u16 value from a location not u16-aligned.
- *
- * Note that unaligned accesses can be very expensive on some architectures.
- */
-#define get_unaligned(ptr) (*(ptr))
-
-/**
- * put_unaligned - put value to a possibly mis-aligned location
- * @val: value to place
- * @ptr: pointer to location
- *
- * This macro should be used for placing values larger in size than
- * single bytes at locations that are expected to be improperly aligned,
- * e.g. writing a u16 value to a location not u16-aligned.
- *
- * Note that unaligned accesses can be very expensive on some architectures.
- */
-#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) ))
-
-#endif
--- /dev/null
+/*
+ * asm-i386/acpi.h
+ *
+ * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#ifndef _ASM_ACPI_H
+#define _ASM_ACPI_H
+
+#ifdef __KERNEL__
+
+#define COMPILER_DEPENDENT_INT64 long long
+#define COMPILER_DEPENDENT_UINT64 unsigned long long
+
+/*
+ * Calling conventions:
+ *
+ * ACPI_SYSTEM_XFACE - Interfaces to host OS (handlers, threads)
+ * ACPI_EXTERNAL_XFACE - External ACPI interfaces
+ * ACPI_INTERNAL_XFACE - Internal ACPI interfaces
+ * ACPI_INTERNAL_VAR_XFACE - Internal variable-parameter list interfaces
+ */
+#define ACPI_SYSTEM_XFACE
+#define ACPI_EXTERNAL_XFACE
+#define ACPI_INTERNAL_XFACE
+#define ACPI_INTERNAL_VAR_XFACE
+
+/* Asm macros */
+
+#define ACPI_ASM_MACROS
+#define BREAKPOINT3
+#define ACPI_DISABLE_IRQS() __cli()
+#define ACPI_ENABLE_IRQS() __sti()
+#define ACPI_FLUSH_CPU_CACHE() wbinvd()
+
+/*
+ * A brief explanation as GNU inline assembly is a bit hairy
+ * %0 is the output parameter in EAX ("=a")
+ * %1 and %2 are the input parameters in ECX ("c")
+ * and an immediate value ("i") respectively
+ * All actual register references are preceded with "%%" as in "%%edx"
+ * Immediate values in the assembly are preceded by "$" as in "$0x1"
+ * The final asm parameter are the operation altered non-output registers.
+ */
+#define ACPI_ACQUIRE_GLOBAL_LOCK(GLptr, Acq) \
+ do { \
+ int dummy; \
+ asm("1: movl (%1),%%eax;" \
+ "movl %%eax,%%edx;" \
+ "andl %2,%%edx;" \
+ "btsl $0x1,%%edx;" \
+ "adcl $0x0,%%edx;" \
+ "lock; cmpxchgl %%edx,(%1);" \
+ "jnz 1b;" \
+ "cmpb $0x3,%%dl;" \
+ "sbbl %%eax,%%eax" \
+ :"=a"(Acq),"=c"(dummy):"c"(GLptr),"i"(~1L):"dx"); \
+ } while(0)
+
+#define ACPI_RELEASE_GLOBAL_LOCK(GLptr, Acq) \
+ do { \
+ int dummy; \
+ asm("1: movl (%1),%%eax;" \
+ "movl %%eax,%%edx;" \
+ "andl %2,%%edx;" \
+ "lock; cmpxchgl %%edx,(%1);" \
+ "jnz 1b;" \
+ "andl $0x1,%%eax" \
+ :"=a"(Acq),"=c"(dummy):"c"(GLptr),"i"(~3L):"dx"); \
+ } while(0)
+
+
+/*
+ * Math helper asm macros
+ */
+#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \
+ asm("divl %2;" \
+ :"=a"(q32), "=d"(r32) \
+ :"r"(d32), \
+ "0"(n_lo), "1"(n_hi))
+
+
+#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \
+ asm("shrl $1,%2;" \
+ "rcrl $1,%3;" \
+ :"=r"(n_hi), "=r"(n_lo) \
+ :"0"(n_hi), "1"(n_lo))
+
+
+#ifdef CONFIG_ACPI_BOOT
+extern int acpi_lapic;
+extern int acpi_ioapic;
+extern int acpi_noirq;
+
+/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */
+#define FIX_ACPI_PAGES 4
+
+#else /* !CONFIG_ACPI_BOOT */
+# define acpi_lapic 0
+# define acpi_ioapic 0
+
+#endif /* !CONFIG_ACPI_BOOT */
+
+#ifdef CONFIG_ACPI_PCI
+static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
+extern int acpi_irq_balance_set(char *str);
+#else
+static inline void acpi_noirq_set(void) { }
+static inline int acpi_irq_balance_set(char *str) { return 0; }
+#endif
+
+#ifdef CONFIG_ACPI_SLEEP
+
+extern unsigned long saved_eip;
+extern unsigned long saved_esp;
+extern unsigned long saved_ebp;
+extern unsigned long saved_ebx;
+extern unsigned long saved_esi;
+extern unsigned long saved_edi;
+
+static inline void acpi_save_register_state(unsigned long return_point)
+{
+ saved_eip = return_point;
+ asm volatile ("movl %%esp,(%0)" : "=m" (saved_esp));
+ asm volatile ("movl %%ebp,(%0)" : "=m" (saved_ebp));
+ asm volatile ("movl %%ebx,(%0)" : "=m" (saved_ebx));
+ asm volatile ("movl %%edi,(%0)" : "=m" (saved_edi));
+ asm volatile ("movl %%esi,(%0)" : "=m" (saved_esi));
+}
+
+#define acpi_restore_register_state() do {} while (0)
+
+
+/* routines for saving/restoring kernel state */
+extern int acpi_save_state_mem(void);
+extern int acpi_save_state_disk(void);
+extern void acpi_restore_state_mem(void);
+
+extern unsigned long acpi_wakeup_address;
+
+extern void do_suspend_lowlevel_s4bios(int resume);
+
+/* early initialization routine */
+extern void acpi_reserve_bootmem(void);
+
+#endif /*CONFIG_ACPI_SLEEP*/
+
+
+#endif /*__KERNEL__*/
+
+#endif /*_ASM_ACPI_H*/
--- /dev/null
+#ifndef __ASM_APIC_H
+#define __ASM_APIC_H
+
+#include <xen/config.h>
+#include <asm/ptrace.h>
+#include <asm/apicdef.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+#define APIC_DEBUG 0
+
+#if APIC_DEBUG
+#define Dprintk(x...) printk(x)
+#else
+#define Dprintk(x...)
+#endif
+
+/*
+ * Basic functions accessing APICs.
+ */
+
+static __inline void apic_write(unsigned long reg, u32 v)
+{
+ *((volatile u32 *)(APIC_BASE+reg)) = v;
+}
+
+static __inline void apic_write_atomic(unsigned long reg, u32 v)
+{
+ xchg((volatile u32 *)(APIC_BASE+reg), v);
+}
+
+static __inline u32 apic_read(unsigned long reg)
+{
+ return *((volatile u32 *)(APIC_BASE+reg));
+}
+
+static __inline__ void apic_wait_icr_idle(void)
+{
+ do { } while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY );
+}
+
+#ifdef CONFIG_X86_GOOD_APIC
+# define FORCE_READ_AROUND_WRITE 0
+# define apic_read_around(x)
+# define apic_write_around(x,y) apic_write((x),(y))
+#else
+# define FORCE_READ_AROUND_WRITE 1
+# define apic_read_around(x) apic_read(x)
+# define apic_write_around(x,y) apic_write_atomic((x),(y))
+#endif
+
+static inline void ack_APIC_irq(void)
+{
+ /*
+ * ack_APIC_irq() actually gets compiled as a single instruction:
+ * - a single rmw on Pentium/82489DX
+ * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
+ * ... yummie.
+ */
+
+ /* Docs say use 0 for future compatibility */
+ apic_write_around(APIC_EOI, 0);
+}
+
+extern int get_maxlvt(void);
+extern void clear_local_APIC(void);
+extern void connect_bsp_APIC (void);
+extern void disconnect_bsp_APIC (void);
+extern void disable_local_APIC (void);
+extern int verify_local_APIC (void);
+extern void cache_APIC_registers (void);
+extern void sync_Arb_IDs (void);
+extern void init_bsp_APIC (void);
+extern void setup_local_APIC (void);
+extern void init_apic_mappings (void);
+extern void smp_local_timer_interrupt (struct pt_regs * regs);
+extern void setup_APIC_clocks (void);
+extern void setup_apic_nmi_watchdog (void);
+extern inline void nmi_watchdog_tick (struct pt_regs * regs);
+extern int APIC_init_uniprocessor (void);
+extern void disable_APIC_timer(void);
+extern void enable_APIC_timer(void);
+
+/*extern struct pm_dev *apic_pm_register(pm_dev_t, unsigned long, pm_callback);*/
+/*extern void apic_pm_unregister(struct pm_dev*);*/
+
+extern unsigned int watchdog_on;
+
+extern unsigned int apic_timer_irqs [NR_CPUS];
+extern int check_nmi_watchdog (void);
+
+extern unsigned int nmi_watchdog;
+#define NMI_NONE 0
+#define NMI_IO_APIC 1
+#define NMI_LOCAL_APIC 2
+#define NMI_INVALID 3
+
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#endif /* __ASM_APIC_H */
--- /dev/null
+#ifndef __ASM_APICDEF_H
+#define __ASM_APICDEF_H
+
+/*
+ * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
+ *
+ * Alan Cox <Alan.Cox@linux.org>, 1995.
+ * Ingo Molnar <mingo@redhat.com>, 1999, 2000
+ */
+
+#define APIC_DEFAULT_PHYS_BASE 0xfee00000
+
+#define APIC_ID 0x20
+#define APIC_ID_MASK (0x0F<<24)
+#define GET_APIC_ID(x) (((x)>>24)&0x0F)
+#define APIC_LVR 0x30
+#define APIC_LVR_MASK 0xFF00FF
+#define GET_APIC_VERSION(x) ((x)&0xFF)
+#define GET_APIC_MAXLVT(x) (((x)>>16)&0xFF)
+#define APIC_INTEGRATED(x) ((x)&0xF0)
+#define APIC_XAPIC_SUPPORT(x) ((x)>=0x14)
+#define APIC_TASKPRI 0x80
+#define APIC_TPRI_MASK 0xFF
+#define APIC_ARBPRI 0x90
+#define APIC_ARBPRI_MASK 0xFF
+#define APIC_PROCPRI 0xA0
+#define APIC_EOI 0xB0
+#define APIC_EIO_ACK 0x0 /* Write this to the EOI register */
+#define APIC_RRR 0xC0
+#define APIC_LDR 0xD0
+#define APIC_LDR_MASK (0xFF<<24)
+#define GET_APIC_LOGICAL_ID(x) (((x)>>24)&0xFF)
+#define SET_APIC_LOGICAL_ID(x) (((x)<<24))
+#define APIC_ALL_CPUS 0xFF
+#define APIC_DFR 0xE0
+#define APIC_DFR_CLUSTER 0x0FFFFFFFul /* Clustered */
+#define APIC_DFR_FLAT 0xFFFFFFFFul /* Flat mode */
+#define APIC_SPIV 0xF0
+#define APIC_SPIV_FOCUS_DISABLED (1<<9)
+#define APIC_SPIV_APIC_ENABLED (1<<8)
+#define APIC_ISR 0x100
+#define APIC_TMR 0x180
+#define APIC_IRR 0x200
+#define APIC_ESR 0x280
+#define APIC_ESR_SEND_CS 0x00001
+#define APIC_ESR_RECV_CS 0x00002
+#define APIC_ESR_SEND_ACC 0x00004
+#define APIC_ESR_RECV_ACC 0x00008
+#define APIC_ESR_SENDILL 0x00020
+#define APIC_ESR_RECVILL 0x00040
+#define APIC_ESR_ILLREGA 0x00080
+#define APIC_ICR 0x300
+#define APIC_DEST_SELF 0x40000
+#define APIC_DEST_ALLINC 0x80000
+#define APIC_DEST_ALLBUT 0xC0000
+#define APIC_ICR_RR_MASK 0x30000
+#define APIC_ICR_RR_INVALID 0x00000
+#define APIC_ICR_RR_INPROG 0x10000
+#define APIC_ICR_RR_VALID 0x20000
+#define APIC_INT_LEVELTRIG 0x08000
+#define APIC_INT_ASSERT 0x04000
+#define APIC_ICR_BUSY 0x01000
+#define APIC_DEST_PHYSICAL 0x00000
+#define APIC_DEST_LOGICAL 0x00800
+#define APIC_DM_FIXED 0x00000
+#define APIC_DM_LOWEST 0x00100
+#define APIC_DM_SMI 0x00200
+#define APIC_DM_REMRD 0x00300
+#define APIC_DM_NMI 0x00400
+#define APIC_DM_INIT 0x00500
+#define APIC_DM_STARTUP 0x00600
+#define APIC_DM_EXTINT 0x00700
+#define APIC_VECTOR_MASK 0x000FF
+#define APIC_ICR2 0x310
+#define GET_APIC_DEST_FIELD(x) (((x)>>24)&0xFF)
+#define SET_APIC_DEST_FIELD(x) ((x)<<24)
+#define APIC_LVTT 0x320
+#define APIC_LVTPC 0x340
+#define APIC_LVT0 0x350
+#define APIC_LVT_TIMER_BASE_MASK (0x3<<18)
+#define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3)
+#define SET_APIC_TIMER_BASE(x) (((x)<<18))
+#define APIC_TIMER_BASE_CLKIN 0x0
+#define APIC_TIMER_BASE_TMBASE 0x1
+#define APIC_TIMER_BASE_DIV 0x2
+#define APIC_LVT_TIMER_PERIODIC (1<<17)
+#define APIC_LVT_MASKED (1<<16)
+#define APIC_LVT_LEVEL_TRIGGER (1<<15)
+#define APIC_LVT_REMOTE_IRR (1<<14)
+#define APIC_INPUT_POLARITY (1<<13)
+#define APIC_SEND_PENDING (1<<12)
+#define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7)
+#define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8))
+#define APIC_MODE_FIXED 0x0
+#define APIC_MODE_NMI 0x4
+#define APIC_MODE_EXINT 0x7
+#define APIC_LVT1 0x360
+#define APIC_LVTERR 0x370
+#define APIC_TMICT 0x380
+#define APIC_TMCCT 0x390
+#define APIC_TDCR 0x3E0
+#define APIC_TDR_DIV_TMBASE (1<<2)
+#define APIC_TDR_DIV_1 0xB
+#define APIC_TDR_DIV_2 0x0
+#define APIC_TDR_DIV_4 0x1
+#define APIC_TDR_DIV_8 0x2
+#define APIC_TDR_DIV_16 0x3
+#define APIC_TDR_DIV_32 0x8
+#define APIC_TDR_DIV_64 0x9
+#define APIC_TDR_DIV_128 0xA
+
+#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
+
+#ifdef CONFIG_X86_CLUSTERED_APIC
+#define MAX_IO_APICS 32
+#else
+#define MAX_IO_APICS 8
+#endif
+
+
+/*
+ * The broadcast ID is 0xF for old APICs and 0xFF for xAPICs. SAPICs
+ * don't broadcast (yet?), but if they did, they might use 0xFFFF.
+ */
+#define APIC_BROADCAST_ID_XAPIC (0xFF)
+#define APIC_BROADCAST_ID_APIC (0x0F)
+
+/*
+ * the local APIC register structure, memory mapped. Not terribly well
+ * tested, but we might eventually use this one in the future - the
+ * problem why we cannot use it right now is the P5 APIC, it has an
+ * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
+ */
+#define u32 unsigned int
+
+#define lapic ((volatile struct local_apic *)APIC_BASE)
+
+struct local_apic {
+
+/*000*/ struct { u32 __reserved[4]; } __reserved_01;
+
+/*010*/ struct { u32 __reserved[4]; } __reserved_02;
+
+/*020*/ struct { /* APIC ID Register */
+ u32 __reserved_1 : 24,
+ phys_apic_id : 4,
+ __reserved_2 : 4;
+ u32 __reserved[3];
+ } id;
+
+/*030*/ const
+ struct { /* APIC Version Register */
+ u32 version : 8,
+ __reserved_1 : 8,
+ max_lvt : 8,
+ __reserved_2 : 8;
+ u32 __reserved[3];
+ } version;
+
+/*040*/ struct { u32 __reserved[4]; } __reserved_03;
+
+/*050*/ struct { u32 __reserved[4]; } __reserved_04;
+
+/*060*/ struct { u32 __reserved[4]; } __reserved_05;
+
+/*070*/ struct { u32 __reserved[4]; } __reserved_06;
+
+/*080*/ struct { /* Task Priority Register */
+ u32 priority : 8,
+ __reserved_1 : 24;
+ u32 __reserved_2[3];
+ } tpr;
+
+/*090*/ const
+ struct { /* Arbitration Priority Register */
+ u32 priority : 8,
+ __reserved_1 : 24;
+ u32 __reserved_2[3];
+ } apr;
+
+/*0A0*/ const
+ struct { /* Processor Priority Register */
+ u32 priority : 8,
+ __reserved_1 : 24;
+ u32 __reserved_2[3];
+ } ppr;
+
+/*0B0*/ struct { /* End Of Interrupt Register */
+ u32 eoi;
+ u32 __reserved[3];
+ } eoi;
+
+/*0C0*/ struct { u32 __reserved[4]; } __reserved_07;
+
+/*0D0*/ struct { /* Logical Destination Register */
+ u32 __reserved_1 : 24,
+ logical_dest : 8;
+ u32 __reserved_2[3];
+ } ldr;
+
+/*0E0*/ struct { /* Destination Format Register */
+ u32 __reserved_1 : 28,
+ model : 4;
+ u32 __reserved_2[3];
+ } dfr;
+
+/*0F0*/ struct { /* Spurious Interrupt Vector Register */
+ u32 spurious_vector : 8,
+ apic_enabled : 1,
+ focus_cpu : 1,
+ __reserved_2 : 22;
+ u32 __reserved_3[3];
+ } svr;
+
+/*100*/ struct { /* In Service Register */
+/*170*/ u32 bitfield;
+ u32 __reserved[3];
+ } isr [8];
+
+/*180*/ struct { /* Trigger Mode Register */
+/*1F0*/ u32 bitfield;
+ u32 __reserved[3];
+ } tmr [8];
+
+/*200*/ struct { /* Interrupt Request Register */
+/*270*/ u32 bitfield;
+ u32 __reserved[3];
+ } irr [8];
+
+/*280*/ union { /* Error Status Register */
+ struct {
+ u32 send_cs_error : 1,
+ receive_cs_error : 1,
+ send_accept_error : 1,
+ receive_accept_error : 1,
+ __reserved_1 : 1,
+ send_illegal_vector : 1,
+ receive_illegal_vector : 1,
+ illegal_register_address : 1,
+ __reserved_2 : 24;
+ u32 __reserved_3[3];
+ } error_bits;
+ struct {
+ u32 errors;
+ u32 __reserved_3[3];
+ } all_errors;
+ } esr;
+
+/*290*/ struct { u32 __reserved[4]; } __reserved_08;
+
+/*2A0*/ struct { u32 __reserved[4]; } __reserved_09;
+
+/*2B0*/ struct { u32 __reserved[4]; } __reserved_10;
+
+/*2C0*/ struct { u32 __reserved[4]; } __reserved_11;
+
+/*2D0*/ struct { u32 __reserved[4]; } __reserved_12;
+
+/*2E0*/ struct { u32 __reserved[4]; } __reserved_13;
+
+/*2F0*/ struct { u32 __reserved[4]; } __reserved_14;
+
+/*300*/ struct { /* Interrupt Command Register 1 */
+ u32 vector : 8,
+ delivery_mode : 3,
+ destination_mode : 1,
+ delivery_status : 1,
+ __reserved_1 : 1,
+ level : 1,
+ trigger : 1,
+ __reserved_2 : 2,
+ shorthand : 2,
+ __reserved_3 : 12;
+ u32 __reserved_4[3];
+ } icr1;
+
+/*310*/ struct { /* Interrupt Command Register 2 */
+ union {
+ u32 __reserved_1 : 24,
+ phys_dest : 4,
+ __reserved_2 : 4;
+ u32 __reserved_3 : 24,
+ logical_dest : 8;
+ } dest;
+ u32 __reserved_4[3];
+ } icr2;
+
+/*320*/ struct { /* LVT - Timer */
+ u32 vector : 8,
+ __reserved_1 : 4,
+ delivery_status : 1,
+ __reserved_2 : 3,
+ mask : 1,
+ timer_mode : 1,
+ __reserved_3 : 14;
+ u32 __reserved_4[3];
+ } lvt_timer;
+
+/*330*/ struct { u32 __reserved[4]; } __reserved_15;
+
+/*340*/ struct { /* LVT - Performance Counter */
+ u32 vector : 8,
+ delivery_mode : 3,
+ __reserved_1 : 1,
+ delivery_status : 1,
+ __reserved_2 : 3,
+ mask : 1,
+ __reserved_3 : 15;
+ u32 __reserved_4[3];
+ } lvt_pc;
+
+/*350*/ struct { /* LVT - LINT0 */
+ u32 vector : 8,
+ delivery_mode : 3,
+ __reserved_1 : 1,
+ delivery_status : 1,
+ polarity : 1,
+ remote_irr : 1,
+ trigger : 1,
+ mask : 1,
+ __reserved_2 : 15;
+ u32 __reserved_3[3];
+ } lvt_lint0;
+
+/*360*/ struct { /* LVT - LINT1 */
+ u32 vector : 8,
+ delivery_mode : 3,
+ __reserved_1 : 1,
+ delivery_status : 1,
+ polarity : 1,
+ remote_irr : 1,
+ trigger : 1,
+ mask : 1,
+ __reserved_2 : 15;
+ u32 __reserved_3[3];
+ } lvt_lint1;
+
+/*370*/ struct { /* LVT - Error */
+ u32 vector : 8,
+ __reserved_1 : 4,
+ delivery_status : 1,
+ __reserved_2 : 3,
+ mask : 1,
+ __reserved_3 : 15;
+ u32 __reserved_4[3];
+ } lvt_error;
+
+/*380*/ struct { /* Timer Initial Count Register */
+ u32 initial_count;
+ u32 __reserved_2[3];
+ } timer_icr;
+
+/*390*/ const
+ struct { /* Timer Current Count Register */
+ u32 curr_count;
+ u32 __reserved_2[3];
+ } timer_ccr;
+
+/*3A0*/ struct { u32 __reserved[4]; } __reserved_16;
+
+/*3B0*/ struct { u32 __reserved[4]; } __reserved_17;
+
+/*3C0*/ struct { u32 __reserved[4]; } __reserved_18;
+
+/*3D0*/ struct { u32 __reserved[4]; } __reserved_19;
+
+/*3E0*/ struct { /* Timer Divide Configuration Register */
+ u32 divisor : 4,
+ __reserved_1 : 28;
+ u32 __reserved_2[3];
+ } timer_dcr;
+
+/*3F0*/ struct { u32 __reserved[4]; } __reserved_20;
+
+} __attribute__ ((packed));
+
+#undef u32
+
+#endif
--- /dev/null
+#ifndef __ARCH_X86_ATOMIC__
+#define __ARCH_X86_ATOMIC__
+
+#include <xen/config.h>
+
+/*
+ * Atomic operations that C can't guarantee us. Useful for
+ * resource counting etc..
+ */
+
+#ifdef CONFIG_SMP
+#define LOCK "lock ; "
+#else
+#define LOCK ""
+#endif
+
+/*
+ * Make sure gcc doesn't try to be clever and move things around
+ * on us. We need to use _exactly_ the address the user gave us,
+ * not some alias that contains the same information.
+ */
+typedef struct { volatile int counter; } atomic_t;
+
+#define ATOMIC_INIT(i) { (i) }
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically reads the value of @v. Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+#define atomic_read(v) ((v)->counter)
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i. Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+#define atomic_set(v,i) (((v)->counter) = (i))
+
+/**
+ * atomic_add - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v. Note that the guaranteed useful range
+ * of an atomic_t is only 24 bits.
+ */
+static __inline__ void atomic_add(int i, atomic_t *v)
+{
+ __asm__ __volatile__(
+ LOCK "addl %1,%0"
+ :"=m" (v->counter)
+ :"ir" (i), "m" (v->counter));
+}
+
+/**
+ * atomic_sub - subtract the atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v. Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ void atomic_sub(int i, atomic_t *v)
+{
+ __asm__ __volatile__(
+ LOCK "subl %1,%0"
+ :"=m" (v->counter)
+ :"ir" (i), "m" (v->counter));
+}
+
+/**
+ * atomic_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases. Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ LOCK "subl %2,%0; sete %1"
+ :"=m" (v->counter), "=qm" (c)
+ :"ir" (i), "m" (v->counter) : "memory");
+ return c;
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1. Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ void atomic_inc(atomic_t *v)
+{
+ __asm__ __volatile__(
+ LOCK "incl %0"
+ :"=m" (v->counter)
+ :"m" (v->counter));
+}
+
+/**
+ * atomic_dec - decrement atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1. Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ void atomic_dec(atomic_t *v)
+{
+ __asm__ __volatile__(
+ LOCK "decl %0"
+ :"=m" (v->counter)
+ :"m" (v->counter));
+}
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases. Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ int atomic_dec_and_test(atomic_t *v)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ LOCK "decl %0; sete %1"
+ :"=m" (v->counter), "=qm" (c)
+ :"m" (v->counter) : "memory");
+ return c != 0;
+}
+
+/**
+ * atomic_inc_and_test - increment and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases. Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ int atomic_inc_and_test(atomic_t *v)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ LOCK "incl %0; sete %1"
+ :"=m" (v->counter), "=qm" (c)
+ :"m" (v->counter) : "memory");
+ return c != 0;
+}
+
+/**
+ * atomic_add_negative - add and test if negative
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ *
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero. Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ int atomic_add_negative(int i, atomic_t *v)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ LOCK "addl %2,%0; sets %1"
+ :"=m" (v->counter), "=qm" (c)
+ :"ir" (i), "m" (v->counter) : "memory");
+ return c;
+}
+
+/* Atomic operations are already serializing on x86 */
+#define smp_mb__before_atomic_dec() barrier()
+#define smp_mb__after_atomic_dec() barrier()
+#define smp_mb__before_atomic_inc() barrier()
+#define smp_mb__after_atomic_inc() barrier()
+
+#endif /* __ARCH_X86_ATOMIC__ */
--- /dev/null
+#ifndef _X86_BITOPS_H
+#define _X86_BITOPS_H
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ */
+
+#include <xen/config.h>
+
+/*
+ * These have to be done with inline assembly: that way the bit-setting
+ * is guaranteed to be atomic. All bit operations return 0 if the bit
+ * was cleared before the operation and != 0 if it was not.
+ *
+ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
+ */
+
+#ifdef CONFIG_SMP
+#define LOCK_PREFIX "lock ; "
+#else
+#define LOCK_PREFIX ""
+#endif
+
+#define ADDR (*(volatile long *) addr)
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered. See __set_bit()
+ * if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static __inline__ void set_bit(long nr, volatile void * addr)
+{
+ __asm__ __volatile__( LOCK_PREFIX
+ "bts"__OS" %1,%0"
+ :"=m" (ADDR)
+ :"dIr" (nr));
+}
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void __set_bit(long nr, volatile void * addr)
+{
+ __asm__(
+ "bts"__OS" %1,%0"
+ :"=m" (ADDR)
+ :"dIr" (nr));
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered. However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static __inline__ void clear_bit(long nr, volatile void * addr)
+{
+ __asm__ __volatile__( LOCK_PREFIX
+ "btr"__OS" %1,%0"
+ :"=m" (ADDR)
+ :"dIr" (nr));
+}
+#define smp_mb__before_clear_bit() barrier()
+#define smp_mb__after_clear_bit() barrier()
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void __change_bit(long nr, volatile void * addr)
+{
+ __asm__ __volatile__(
+ "btc"__OS" %1,%0"
+ :"=m" (ADDR)
+ :"dIr" (nr));
+}
+
+/**
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static __inline__ void change_bit(long nr, volatile void * addr)
+{
+ __asm__ __volatile__( LOCK_PREFIX
+ "btc"__OS" %1,%0"
+ :"=m" (ADDR)
+ :"dIr" (nr));
+}
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_set_bit(long nr, volatile void * addr)
+{
+ long oldbit;
+
+ __asm__ __volatile__( LOCK_PREFIX
+ "bts"__OS" %2,%1\n\tsbb"__OS" %0,%0"
+ :"=r" (oldbit),"=m" (ADDR)
+ :"dIr" (nr) : "memory");
+ return oldbit;
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail. You must protect multiple accesses with a lock.
+ */
+static __inline__ int __test_and_set_bit(long nr, volatile void * addr)
+{
+ long oldbit;
+
+ __asm__(
+ "bts"__OS" %2,%1\n\tsbb"__OS" %0,%0"
+ :"=r" (oldbit),"=m" (ADDR)
+ :"dIr" (nr));
+ return oldbit;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_clear_bit(long nr, volatile void * addr)
+{
+ long oldbit;
+
+ __asm__ __volatile__( LOCK_PREFIX
+ "btr"__OS" %2,%1\n\tsbb"__OS" %0,%0"
+ :"=r" (oldbit),"=m" (ADDR)
+ :"dIr" (nr) : "memory");
+ return oldbit;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail. You must protect multiple accesses with a lock.
+ */
+static __inline__ int __test_and_clear_bit(long nr, volatile void * addr)
+{
+ long oldbit;
+
+ __asm__(
+ "btr"__OS" %2,%1\n\tsbb"__OS" %0,%0"
+ :"=r" (oldbit),"=m" (ADDR)
+ :"dIr" (nr));
+ return oldbit;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static __inline__ int __test_and_change_bit(long nr, volatile void * addr)
+{
+ long oldbit;
+
+ __asm__ __volatile__(
+ "btc"__OS" %2,%1\n\tsbb"__OS" %0,%0"
+ :"=r" (oldbit),"=m" (ADDR)
+ :"dIr" (nr) : "memory");
+ return oldbit;
+}
+
+/**
+ * test_and_change_bit - Change a bit and return its new value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_change_bit(long nr, volatile void * addr)
+{
+ long oldbit;
+
+ __asm__ __volatile__( LOCK_PREFIX
+ "btc"__OS" %2,%1\n\tsbb"__OS" %0,%0"
+ :"=r" (oldbit),"=m" (ADDR)
+ :"dIr" (nr) : "memory");
+ return oldbit;
+}
+
+
+static __inline__ int constant_test_bit(long nr, const volatile void * addr)
+{
+ return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
+}
+
+static __inline__ int variable_test_bit(long nr, volatile void * addr)
+{
+ long oldbit;
+
+ __asm__ __volatile__(
+ "bt"__OS" %2,%1\n\tsbb"__OS" %0,%0"
+ :"=r" (oldbit)
+ :"m" (ADDR),"dIr" (nr));
+ return oldbit;
+}
+
+#define test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ constant_test_bit((nr),(addr)) : \
+ variable_test_bit((nr),(addr)))
+
+/**
+ * find_first_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum bitnumber to search
+ *
+ * Returns the bit-number of the first zero bit, not the number of the byte
+ * containing a bit. -1 when none found.
+ */
+static __inline__ int find_first_zero_bit(void * addr, unsigned size)
+{
+ int d0, d1, d2;
+ int res;
+
+ if (!size)
+ return 0;
+ __asm__ __volatile__(
+ "movl $-1,%%eax\n\t"
+ "xorl %%edx,%%edx\n\t"
+ "repe; scasl\n\t"
+ "je 1f\n\t"
+ "xorl -4(%%"__OP"di),%%eax\n\t"
+ "sub"__OS" $4,%%"__OP"di\n\t"
+ "bsfl %%eax,%%edx\n"
+ "1:\tsub"__OS" %%"__OP"bx,%%"__OP"di\n\t"
+ "shl"__OS" $3,%%"__OP"di\n\t"
+ "add"__OS" %%"__OP"di,%%"__OP"dx"
+ :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
+ :"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory");
+ return res;
+}
+
+/**
+ * find_next_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
+{
+ unsigned int * p = ((unsigned int *) addr) + (offset >> 5);
+ int set = 0, bit = offset & 31, res;
+
+ if (bit) {
+ /*
+ * Look for zero in first byte
+ */
+ __asm__("bsfl %1,%0\n\t"
+ "jne 1f\n\t"
+ "movl $32, %0\n"
+ "1:"
+ : "=r" (set)
+ : "r" (~(*p >> bit)));
+ if (set < (32 - bit))
+ return set + offset;
+ set = 32 - bit;
+ p++;
+ }
+ /*
+ * No zero yet, search remaining full bytes for a zero
+ */
+ res = find_first_zero_bit (p, size - 32 * (p - (unsigned int *) addr));
+ return (offset + set + res);
+}
+
+/**
+ * ffz - find first zero in word.
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+static __inline__ unsigned long ffz(unsigned long word)
+{
+ __asm__("bsf"__OS" %1,%0"
+ :"=r" (word)
+ :"r" (~word));
+ return word;
+}
+
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ */
+static __inline__ int ffs(int x)
+{
+ int r;
+
+ __asm__("bsfl %1,%0\n\t"
+ "jnz 1f\n\t"
+ "movl $-1,%0\n"
+ "1:" : "=r" (r) : "g" (x));
+ return r+1;
+}
+
+/**
+ * hweightN - returns the hamming weight of a N-bit word
+ * @x: the word to weigh
+ *
+ * The Hamming Weight of a number is the total number of bits set in it.
+ */
+
+#define hweight32(x) generic_hweight32(x)
+#define hweight16(x) generic_hweight16(x)
+#define hweight8(x) generic_hweight8(x)
+
+#define ext2_set_bit __test_and_set_bit
+#define ext2_clear_bit __test_and_clear_bit
+#define ext2_test_bit test_bit
+#define ext2_find_first_zero_bit find_first_zero_bit
+#define ext2_find_next_zero_bit find_next_zero_bit
+
+/* Bitmap functions for the minix filesystem. */
+#define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,addr)
+#define minix_set_bit(nr,addr) __set_bit(nr,addr)
+#define minix_test_and_clear_bit(nr,addr) __test_and_clear_bit(nr,addr)
+#define minix_test_bit(nr,addr) test_bit(nr,addr)
+#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
+
+#endif /* _X86_BITOPS_H */
--- /dev/null
+/*
+ * include/asm-x86/cache.h
+ */
+#ifndef __ARCH_X86_CACHE_H
+#define __ARCH_X86_CACHE_H
+
+#include <xen/config.h>
+
+/* L1 cache line size */
+#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
+#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
+
+#endif
--- /dev/null
+/******************************************************************************
+ * config.h
+ *
+ * A Linux-style configuration list.
+ */
+
+#ifndef __XEN_I386_CONFIG_H__
+#define __XEN_I386_CONFIG_H__
+
+#define CONFIG_X86 1
+
+#define CONFIG_SMP 1
+#define CONFIG_X86_LOCAL_APIC 1
+#define CONFIG_X86_IO_APIC 1
+#define CONFIG_X86_L1_CACHE_SHIFT 5
+
+#define CONFIG_ACPI 1
+#define CONFIG_ACPI_BOOT 1
+
+#define CONFIG_PCI 1
+#define CONFIG_PCI_BIOS 1
+#define CONFIG_PCI_DIRECT 1
+
+#define CONFIG_IDE 1
+#define CONFIG_BLK_DEV_IDE 1
+#define CONFIG_BLK_DEV_IDEDMA 1
+#define CONFIG_BLK_DEV_IDEPCI 1
+#define CONFIG_IDEDISK_MULTI_MODE 1
+#define CONFIG_IDEDISK_STROKE 1
+#define CONFIG_IDEPCI_SHARE_IRQ 1
+#define CONFIG_BLK_DEV_IDEDMA_PCI 1
+#define CONFIG_IDEDMA_PCI_AUTO 1
+#define CONFIG_IDEDMA_AUTO 1
+#define CONFIG_IDEDMA_ONLYDISK 1
+#define CONFIG_BLK_DEV_IDE_MODES 1
+#define CONFIG_BLK_DEV_PIIX 1
+
+#define CONFIG_SCSI 1
+#define CONFIG_SCSI_LOGGING 1
+#define CONFIG_BLK_DEV_SD 1
+#define CONFIG_SD_EXTRA_DEVS 40
+#define CONFIG_SCSI_MULTI_LUN 1
+
+#define CONFIG_XEN_ATTENTION_KEY 1
+
+
+#define HZ 100
+
+/*
+ * Just to keep compiler happy.
+ * NB. DO NOT CHANGE SMP_CACHE_BYTES WITHOUT FIXING arch/i386/entry.S!!!
+ * It depends on size of irq_cpustat_t, for example, being 64 bytes. :-)
+ * Mmmm... so niiiiiice....
+ */
+#define SMP_CACHE_BYTES 64
+#define NR_CPUS 16
+#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
+#define ____cacheline_aligned __cacheline_aligned
+
+/*** Hypervisor owns top 64MB of virtual address space. ***/
+#define HYPERVISOR_VIRT_START (0xFC000000UL)
+
+/*
+ * First 4MB are mapped read-only for all. It's for the machine->physical
+ * mapping table (MPT table). The following are virtual addresses.
+ */
+#define READONLY_MPT_VIRT_START (HYPERVISOR_VIRT_START)
+#define READONLY_MPT_VIRT_END (READONLY_MPT_VIRT_START + (4*1024*1024))
+/*
+ * Next 12MB is fixed monitor space, which is part of a 40MB direct-mapped
+ * memory region. The following are machine addresses.
+ */
+#define MAX_MONITOR_ADDRESS (12*1024*1024)
+#define MAX_DIRECTMAP_ADDRESS (40*1024*1024)
+/* And the virtual addresses for the direct-map region... */
+#define DIRECTMAP_VIRT_START (READONLY_MPT_VIRT_END)
+#define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + MAX_DIRECTMAP_ADDRESS)
+#define MONITOR_VIRT_START (DIRECTMAP_VIRT_START)
+#define MONITOR_VIRT_END (MONITOR_VIRT_START + MAX_MONITOR_ADDRESS)
+#define RDWR_MPT_VIRT_START (MONITOR_VIRT_END)
+#define RDWR_MPT_VIRT_END (RDWR_MPT_VIRT_START + (4*1024*1024))
+#define FRAMETABLE_VIRT_START (RDWR_MPT_VIRT_END)
+#define FRAMETABLE_VIRT_END (DIRECTMAP_VIRT_END)
+/* Next 4MB of virtual address space is used as a linear p.t. mapping. */
+#define LINEAR_PT_VIRT_START (DIRECTMAP_VIRT_END)
+#define LINEAR_PT_VIRT_END (LINEAR_PT_VIRT_START + (4*1024*1024))
+/* Next 4MB of virtual address space is used as a shadow linear p.t. map. */
+#define SH_LINEAR_PT_VIRT_START (LINEAR_PT_VIRT_END)
+#define SH_LINEAR_PT_VIRT_END (SH_LINEAR_PT_VIRT_START + (4*1024*1024))
+/* Next 4MB of virtual address space used for per-domain mappings (eg. GDT). */
+#define PERDOMAIN_VIRT_START (SH_LINEAR_PT_VIRT_END)
+#define PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + (4*1024*1024))
+#define GDT_VIRT_START (PERDOMAIN_VIRT_START)
+#define GDT_VIRT_END (GDT_VIRT_START + (64*1024))
+#define LDT_VIRT_START (GDT_VIRT_END)
+#define LDT_VIRT_END (LDT_VIRT_START + (64*1024))
+/* Penultimate 4MB of virtual address space used for domain page mappings. */
+#define MAPCACHE_VIRT_START (PERDOMAIN_VIRT_END)
+#define MAPCACHE_VIRT_END (MAPCACHE_VIRT_START + (4*1024*1024))
+/* Final 4MB of virtual address space used for ioremap(). */
+#define IOREMAP_VIRT_START (MAPCACHE_VIRT_END)
+#define IOREMAP_VIRT_END (IOREMAP_VIRT_START + (4*1024*1024))
+
+/*
+ * Amount of slack domain memory to leave in system, in megabytes.
+ * Prevents a hard out-of-memory crunch for thinsg like network receive.
+ */
+#define SLACK_DOMAIN_MEM_KILOBYTES 2048
+
+/* Linkage for x86 */
+#define FASTCALL(x) x __attribute__((regparm(3)))
+#define asmlinkage __attribute__((regparm(0)))
+#define __ALIGN .align 16,0x90
+#define __ALIGN_STR ".align 16,0x90"
+#define SYMBOL_NAME_STR(X) #X
+#define SYMBOL_NAME(X) X
+#define SYMBOL_NAME_LABEL(X) X##:
+#ifdef __ASSEMBLY__
+#define ALIGN __ALIGN
+#define ALIGN_STR __ALIGN_STR
+#define ENTRY(name) \
+ .globl SYMBOL_NAME(name); \
+ ALIGN; \
+ SYMBOL_NAME_LABEL(name)
+#endif
+
+#define PGT_base_page_table PGT_l2_page_table
+
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+#define __HYPERVISOR_CS 0x0808
+#define __HYPERVISOR_DS 0x0810
+
+#define NR_syscalls 256
+
+#ifndef NDEBUG
+#define MEMORY_GUARD
+#endif
+
+#ifndef __ASSEMBLY__
+extern unsigned long _end; /* standard ELF symbol */
+extern void __out_of_line_bug(int line) __attribute__((noreturn));
+#define out_of_line_bug() __out_of_line_bug(__LINE__)
+#endif /* __ASSEMBLY__ */
+
+/* For generic assembly code: use macros to define operation/operand sizes. */
+#define __OS "l" /* Operation Suffix */
+#define __OP "e" /* Operand Prefix */
+
+#endif /* __XEN_I386_CONFIG_H__ */
--- /dev/null
+/*
+ * cpufeature.h
+ *
+ * Defines x86 CPU feature bits
+ */
+
+#ifndef __ASM_X86_CPUFEATURE_H
+#define __ASM_X86_CPUFEATURE_H
+
+/* Sample usage: CPU_FEATURE_P(cpu.x86_capability, FPU) */
+#define CPU_FEATURE_P(CAP, FEATURE) test_bit(CAP, X86_FEATURE_##FEATURE ##_BIT)
+
+#define NCAPINTS 6 /* Currently we have 6 32-bit words worth of info */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
+#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */
+#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */
+#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */
+#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN (0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */
+#define X86_FEATURE_DTES (0*32+21) /* Debug Trace Store */
+#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
+ /* of FPU context), and CR4.OSFXSR available */
+#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */
+#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */
+#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */
+#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */
+
+/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* Don't duplicate feature flags which are redundant with Intel! */
+#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP (1*32+19) /* MP Capable. */
+#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */
+
+/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */
+
+/* Other features, Linux-defined mapping, word 3 */
+/* This range is used for feature bits which conflict or are synthesized */
+#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */
+/* cpu types for specific tunings: */
+#define X86_FEATURE_K8 (3*32+ 4) /* Opteron, Athlon64 */
+#define X86_FEATURE_K7 (3*32+ 5) /* Athlon */
+#define X86_FEATURE_P3 (3*32+ 6) /* P3 */
+#define X86_FEATURE_P4 (3*32+ 7) /* P4 */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
+#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */
+
+/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
+#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */
+
+
+#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability)
+#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability)
+
+#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU)
+#define cpu_has_vme boot_cpu_has(X86_FEATURE_VME)
+#define cpu_has_de boot_cpu_has(X86_FEATURE_DE)
+#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE)
+#define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC)
+#define cpu_has_pae boot_cpu_has(X86_FEATURE_PAE)
+#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE)
+#define cpu_has_sse2 boot_cpu_has(X86_FEATURE_XMM2)
+#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC)
+#define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP)
+#define cpu_has_mtrr boot_cpu_has(X86_FEATURE_MTRR)
+#define cpu_has_mmx boot_cpu_has(X86_FEATURE_MMX)
+#define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR)
+#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM)
+#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT)
+#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP)
+#define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR)
+#define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR)
+#define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR)
+#define cpu_has_xstore boot_cpu_has(X86_FEATURE_XSTORE)
+
+#endif /* __ASM_X86_CPUFEATURE_H */
--- /dev/null
+#ifndef _X86_CURRENT_H
+#define _X86_CURRENT_H
+
+struct task_struct;
+
+#define STACK_RESERVED \
+ (sizeof(execution_context_t) + sizeof(struct task_struct *))
+
+static inline struct task_struct * get_current(void)
+{
+ struct task_struct *current;
+ __asm__ ( "orl %%esp,%0; andl $~3,%0; movl (%0),%0"
+ : "=r" (current) : "0" (STACK_SIZE-4) );
+ return current;
+}
+
+#define current get_current()
+
+static inline void set_current(struct task_struct *p)
+{
+ __asm__ ( "orl %%esp,%0; andl $~3,%0; movl %1,(%0)"
+ : : "r" (STACK_SIZE-4), "r" (p) );
+}
+
+static inline execution_context_t *get_execution_context(void)
+{
+ execution_context_t *execution_context;
+ __asm__ ( "andl %%esp,%0; addl %2,%0"
+ : "=r" (execution_context)
+ : "0" (~(STACK_SIZE-1)), "i" (STACK_SIZE-STACK_RESERVED) );
+ return execution_context;
+}
+
+static inline unsigned long get_stack_top(void)
+{
+ unsigned long p;
+ __asm__ ( "orl %%esp,%0; andl $~3,%0"
+ : "=r" (p) : "0" (STACK_SIZE-4) );
+ return p;
+}
+
+#define schedule_tail(_p) \
+ __asm__ __volatile__ ( \
+ "andl %%esp,%0; addl %2,%0; movl %0,%%esp; jmp *%1" \
+ : : "r" (~(STACK_SIZE-1)), \
+ "r" (unlikely(is_idle_task((_p))) ? \
+ continue_cpu_idle_loop : \
+ continue_nonidle_task), \
+ "i" (STACK_SIZE-STACK_RESERVED) )
+
+
+#endif /* _X86_CURRENT_H */
--- /dev/null
+#ifndef _X86_DEBUGREG_H
+#define _X86_DEBUGREG_H
+
+
+/* Indicate the register numbers for a number of the specific
+ debug registers. Registers 0-3 contain the addresses we wish to trap on */
+#define DR_FIRSTADDR 0 /* u_debugreg[DR_FIRSTADDR] */
+#define DR_LASTADDR 3 /* u_debugreg[DR_LASTADDR] */
+
+#define DR_STATUS 6 /* u_debugreg[DR_STATUS] */
+#define DR_CONTROL 7 /* u_debugreg[DR_CONTROL] */
+
+/* Define a few things for the status register. We can use this to determine
+ which debugging register was responsible for the trap. The other bits
+ are either reserved or not of interest to us. */
+
+#define DR_TRAP0 (0x1) /* db0 */
+#define DR_TRAP1 (0x2) /* db1 */
+#define DR_TRAP2 (0x4) /* db2 */
+#define DR_TRAP3 (0x8) /* db3 */
+
+#define DR_STEP (0x4000) /* single-step */
+#define DR_SWITCH (0x8000) /* task switch */
+
+/* Now define a bunch of things for manipulating the control register.
+ The top two bytes of the control register consist of 4 fields of 4
+ bits - each field corresponds to one of the four debug registers,
+ and indicates what types of access we trap on, and how large the data
+ field is that we are looking at */
+
+#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */
+#define DR_CONTROL_SIZE 4 /* 4 control bits per register */
+
+#define DR_RW_EXECUTE (0x0) /* Settings for the access types to trap on */
+#define DR_RW_WRITE (0x1)
+#define DR_RW_READ (0x3)
+
+#define DR_LEN_1 (0x0) /* Settings for data length to trap on */
+#define DR_LEN_2 (0x4)
+#define DR_LEN_4 (0xC)
+
+/* The low byte to the control register determine which registers are
+ enabled. There are 4 fields of two bits. One bit is "local", meaning
+ that the processor will reset the bit after a task switch and the other
+ is global meaning that we have to explicitly reset the bit. With linux,
+ you can use either one, since we explicitly zero the register when we enter
+ kernel mode. */
+
+#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
+#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
+#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
+
+#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
+#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */
+
+/* The second byte to the control register has a few special things.
+ We can slow the instruction pipeline for instructions coming via the
+ gdt or the ldt if we want to. I am not sure why this is an advantage */
+
+#define DR_CONTROL_RESERVED (~0xFFFF03FFUL) /* Reserved by Intel */
+#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
+#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
+
+#endif /* _X86_DEBUGREG_H */
--- /dev/null
+#ifndef _X86_DELAY_H
+#define _X86_DELAY_H
+
+/*
+ * Copyright (C) 1993 Linus Torvalds
+ *
+ * Delay routines calling functions in arch/i386/lib/delay.c
+ */
+
+extern unsigned long ticks_per_usec;
+extern void __udelay(unsigned long usecs);
+#define udelay(n) __udelay(n)
+
+#endif /* defined(_X86_DELAY_H) */
--- /dev/null
+#ifndef __ARCH_DESC_H
+#define __ARCH_DESC_H
+
+#define LDT_ENTRY_SIZE 8
+
+#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
+
+#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
+#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1)
+
+#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY)
+#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
+
+#define load_TR(n) __asm__ __volatile__ ("ltr %%ax" : : "a" (__TSS(n)<<3) )
+
+/*
+ * Guest OS must provide its own code selectors, or use the one we provide. The
+ * RPL must be 1, as we only create bounce frames to ring 1. Any LDT selector
+ * value is okay. Note that checking only the RPL is insufficient: if the
+ * selector is poked into an interrupt, trap or call gate then the RPL is
+ * ignored when the gate is accessed.
+ */
+#define VALID_SEL(_s) \
+ (((((_s)>>3) < FIRST_RESERVED_GDT_ENTRY) || \
+ (((_s)>>3) > LAST_RESERVED_GDT_ENTRY) || \
+ ((_s)&4)) && \
+ (((_s)&3) == 1))
+#define VALID_CODESEL(_s) ((_s) == FLAT_RING1_CS || VALID_SEL(_s))
+
+/* These are bitmasks for the first 32 bits of a descriptor table entry. */
+#define _SEGMENT_TYPE (15<< 8)
+#define _SEGMENT_S ( 1<<12) /* System descriptor (yes iff S==0) */
+#define _SEGMENT_DPL ( 3<<13) /* Descriptor Privilege Level */
+#define _SEGMENT_P ( 1<<15) /* Segment Present */
+#define _SEGMENT_G ( 1<<23) /* Granularity */
+
+#ifndef __ASSEMBLY__
+struct desc_struct {
+ unsigned long a,b;
+};
+
+extern struct desc_struct gdt_table[];
+extern struct desc_struct *idt, *gdt;
+
+struct Xgt_desc_struct {
+ unsigned short size;
+ unsigned long address __attribute__((packed));
+};
+
+#define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2))
+#define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2))
+
+extern void set_intr_gate(unsigned int irq, void * addr);
+extern void set_tss_desc(unsigned int n, void *addr);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
--- /dev/null
+#ifndef __I386_DIV64
+#define __I386_DIV64
+
+#define do_div(n,base) ({ \
+ unsigned long __upper, __low, __high, __mod; \
+ asm("":"=a" (__low), "=d" (__high):"A" (n)); \
+ __upper = __high; \
+ if (__high) { \
+ __upper = __high % (base); \
+ __high = __high / (base); \
+ } \
+ asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (base), "0" (__low), "1" (__upper)); \
+ asm("":"=A" (n):"a" (__low),"d" (__high)); \
+ __mod; \
+})
+
+#endif
--- /dev/null
+/******************************************************************************
+ * domain_page.h
+ *
+ * Allow temporary mapping of domain page frames into Xen space.
+ */
+
+#ifndef __ASM_DOMAIN_PAGE_H__
+#define __ASM_DOMAIN_PAGE_H__
+
+#include <xen/config.h>
+#include <xen/sched.h>
+
+extern unsigned long *mapcache;
+#define MAPCACHE_ENTRIES 1024
+
+/*
+ * Maps a given physical address, returning corresponding virtual address.
+ * The entire page containing that VA is now accessible until a
+ * corresponding call to unmap_domain_mem().
+ */
+extern void *map_domain_mem(unsigned long pa);
+
+/*
+ * Pass a VA within a page previously mapped with map_domain_mem().
+ * That page will then be removed from the mapping lists.
+ */
+extern void unmap_domain_mem(void *va);
+
+#endif /* __ASM_DOMAIN_PAGE_H__ */
--- /dev/null
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ */
+
+#ifndef _ASM_FIXMAP_H
+#define _ASM_FIXMAP_H
+
+#include <xen/config.h>
+#include <asm/acpi.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process. We allocate these special addresses
+ * from the end of virtual memory (0xfffff000) backwards.
+ * Also this lets us do fail-safe vmalloc(), we
+ * can guarantee that these special addresses and
+ * vmalloc()-ed addresses never overlap.
+ *
+ * these 'compile-time allocated' memory buffers are
+ * fixed-size 4k pages. (or larger if used with an increment
+ * highger than 1) use fixmap_set(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ * TLB entries of such buffers will not be flushed across
+ * task switches.
+ */
+
+/*
+ * on UP currently we will have no trace of the fixmap mechanizm,
+ * no page table allocations, etc. This might change in the
+ * future, say framebuffers for the console driver(s) could be
+ * fix-mapped?
+ */
+enum fixed_addresses {
+#ifdef CONFIG_X86_LOCAL_APIC
+ FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
+#endif
+#ifdef CONFIG_X86_IO_APIC
+ FIX_IO_APIC_BASE_0,
+ FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
+#endif
+#ifdef CONFIG_HIGHMEM
+ FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
+ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+#endif
+#ifdef CONFIG_ACPI_BOOT
+ FIX_ACPI_BEGIN,
+ FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
+#endif
+ __end_of_fixed_addresses
+};
+
+extern void __set_fixmap (enum fixed_addresses idx,
+ l1_pgentry_t entry);
+
+#define set_fixmap(idx, phys) \
+ __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR))
+/*
+ * Some hardware wants to get fixmapped without caching.
+ */
+#define set_fixmap_nocache(idx, phys) \
+ __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR_NOCACHE))
+/*
+ * used by vmalloc.c.
+ *
+ * Leave one empty page between vmalloc'ed areas and
+ * the start of the fixmap, and leave one page empty
+ * at the top of mem..
+ */
+#define FIXADDR_TOP (0xffffe000UL)
+#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+
+#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
+
+extern void __this_fixmap_does_not_exist(void);
+
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without tranlation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+static inline unsigned long fix_to_virt(const unsigned int idx)
+{
+ /*
+ * this branch gets completely eliminated after inlining,
+ * except when someone tries to use fixaddr indices in an
+ * illegal way. (such as mixing up address types or using
+ * out-of-range indices).
+ *
+ * If it doesn't get removed, the linker will complain
+ * loudly with a reasonably clear error message..
+ */
+ if (idx >= __end_of_fixed_addresses)
+ __this_fixmap_does_not_exist();
+
+ return __fix_to_virt(idx);
+}
+
+#endif
--- /dev/null
+/******************************************************************************
+ * flushtlb.h
+ *
+ * TLB flushes are timestamped using a global virtual 'clock' which ticks
+ * on any TLB flush on any processor.
+ *
+ * Copyright (c) 2003, K A Fraser
+ */
+
+#ifndef __FLUSHTLB_H__
+#define __FLUSHTLB_H__
+
+#include <xen/config.h>
+#include <xen/smp.h>
+
+/*
+ * Every time the TLB clock passes an "epoch", every CPU's TLB is flushed.
+ * Therefore, if the current TLB time and a previously-read timestamp differ
+ * in their significant bits (i.e., ~TLBCLOCK_EPOCH_MASK), then the TLB clock
+ * has wrapped at least once and every CPU's TLB is guaranteed to have been
+ * flushed meanwhile.
+ * This allows us to deal gracefully with a bounded (a.k.a. wrapping) clock.
+ */
+#define TLBCLOCK_EPOCH_MASK ((1U<<16)-1)
+
+/*
+ * 'cpu_stamp' is the current timestamp for the CPU we are testing.
+ * 'lastuse_stamp' is a timestamp taken when the PFN we are testing was last
+ * used for a purpose that may have caused the CPU's TLB to become tainted.
+ */
+static inline int NEED_FLUSH(u32 cpu_stamp, u32 lastuse_stamp)
+{
+ /*
+ * Why does this work?
+ * 1. XOR sets high-order bits determines if stamps from differing epochs.
+ * 2. Subtraction sets high-order bits if 'cpu_stamp > lastuse_stamp'.
+ * In either case a flush is unnecessary: we therefore OR the results from
+ * (1) and (2), mask the high-order bits, and return the inverse.
+ */
+ return !(((lastuse_stamp^cpu_stamp)|(lastuse_stamp-cpu_stamp)) &
+ ~TLBCLOCK_EPOCH_MASK);
+}
+
+extern u32 tlbflush_clock;
+extern u32 tlbflush_time[NR_CPUS];
+
+extern void tlb_clocktick(void);
+extern void new_tlbflush_clock_period(void);
+
+/*
+ * TLB flushing:
+ *
+ * - flush_tlb() flushes the current mm struct TLBs
+ * - flush_tlb_all() flushes all processes TLBs
+ * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
+ *
+ * ..but the i386 has somewhat limited tlb flushing capabilities,
+ * and page-granular flushes are available only on i486 and up.
+ */
+
+#ifndef CONFIG_SMP
+
+#define flush_tlb() __flush_tlb()
+#define flush_tlb_all() __flush_tlb()
+#define flush_tlb_all_pge() __flush_tlb_pge()
+#define local_flush_tlb() __flush_tlb()
+#define flush_tlb_cpu(_cpu) __flush_tlb()
+#define flush_tlb_mask(_mask) __flush_tlb()
+#define try_flush_tlb_mask(_mask) __flush_tlb()
+
+#else
+
+#include <xen/smp.h>
+
+extern int try_flush_tlb_mask(unsigned long mask);
+extern void flush_tlb_mask(unsigned long mask);
+extern void flush_tlb_all_pge(void);
+
+#define flush_tlb() __flush_tlb()
+#define flush_tlb_all() flush_tlb_mask((1 << smp_num_cpus) - 1)
+#define local_flush_tlb() __flush_tlb()
+#define flush_tlb_cpu(_cpu) flush_tlb_mask(1 << (_cpu))
+
+#endif
+
+#endif /* __FLUSHTLB_H__ */
--- /dev/null
+#ifndef __ASM_HARDIRQ_H
+#define __ASM_HARDIRQ_H
+
+#include <xen/config.h>
+#include <xen/irq.h>
+
+/* assembly code in softirq.h is sensitive to the offsets of these fields */
+typedef struct {
+ unsigned int __softirq_pending;
+ unsigned int __local_irq_count;
+ unsigned int __local_bh_count;
+ unsigned int __syscall_count;
+ unsigned int __nmi_count;
+ unsigned long idle_timestamp;
+} ____cacheline_aligned irq_cpustat_t;
+
+#include <xen/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
+
+/*
+ * Are we in an interrupt context? Either doing bottom half
+ * or hardware interrupt processing?
+ */
+#define in_interrupt() ({ int __cpu = smp_processor_id(); \
+ (local_irq_count(__cpu) + local_bh_count(__cpu) != 0); })
+
+#define in_irq() (local_irq_count(smp_processor_id()) != 0)
+
+#ifndef CONFIG_SMP
+
+#define hardirq_trylock(cpu) (local_irq_count(cpu) == 0)
+#define hardirq_endlock(cpu) do { } while (0)
+
+#define irq_enter(cpu, irq) (local_irq_count(cpu)++)
+#define irq_exit(cpu, irq) (local_irq_count(cpu)--)
+
+#define synchronize_irq() barrier()
+
+#else
+
+#include <asm/atomic.h>
+#include <asm/smp.h>
+
+extern unsigned char global_irq_holder;
+extern unsigned volatile long global_irq_lock; /* long for set_bit -RR */
+
+static inline int irqs_running (void)
+{
+ int i;
+
+ for (i = 0; i < smp_num_cpus; i++)
+ if (local_irq_count(i))
+ return 1;
+ return 0;
+}
+
+static inline void release_irqlock(int cpu)
+{
+ /* if we didn't own the irq lock, just ignore.. */
+ if (global_irq_holder == (unsigned char) cpu) {
+ global_irq_holder = NO_PROC_ID;
+ clear_bit(0,&global_irq_lock);
+ }
+}
+
+static inline void irq_enter(int cpu, int irq)
+{
+ ++local_irq_count(cpu);
+
+ smp_mb();
+
+ while (test_bit(0,&global_irq_lock)) {
+ cpu_relax();
+ }
+}
+
+static inline void irq_exit(int cpu, int irq)
+{
+ --local_irq_count(cpu);
+}
+
+static inline int hardirq_trylock(int cpu)
+{
+ return !local_irq_count(cpu) && !test_bit(0,&global_irq_lock);
+}
+
+#define hardirq_endlock(cpu) do { } while (0)
+
+extern void synchronize_irq(void);
+
+#endif /* CONFIG_SMP */
+
+#endif /* __ASM_HARDIRQ_H */
--- /dev/null
+/*
+ * include/asm-i386/i387.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#ifndef __ASM_I386_I387_H
+#define __ASM_I386_I387_H
+
+#include <xen/sched.h>
+#include <asm/processor.h>
+
+extern void init_fpu(void);
+extern void save_init_fpu( struct task_struct *tsk );
+extern void restore_fpu( struct task_struct *tsk );
+
+#define unlazy_fpu( tsk ) do { \
+ if ( test_bit(PF_USEDFPU, &tsk->flags) ) \
+ save_init_fpu( tsk ); \
+} while (0)
+
+#define clear_fpu( tsk ) do { \
+ if ( test_and_clear_bit(PF_USEDFPU, &tsk->flags) ) { \
+ asm volatile("fwait"); \
+ stts(); \
+ } \
+} while (0)
+
+#define load_mxcsr( val ) do { \
+ unsigned long __mxcsr = ((unsigned long)(val) & 0xffbf); \
+ asm volatile( "ldmxcsr %0" : : "m" (__mxcsr) ); \
+} while (0)
+
+#endif /* __ASM_I386_I387_H */
--- /dev/null
+#ifndef _ASM_IO_H
+#define _ASM_IO_H
+
+#include <xen/config.h>
+#include <asm/page.h>
+
+#define IO_SPACE_LIMIT 0xffff
+
+/**
+ * virt_to_phys - map virtual addresses to physical
+ * @address: address to remap
+ *
+ * The returned physical address is the physical (CPU) mapping for
+ * the memory address given. It is only valid to use this function on
+ * addresses directly mapped or allocated via kmalloc.
+ *
+ * This function does not give bus mappings for DMA transfers. In
+ * almost all conceivable cases a device driver should not be using
+ * this function
+ */
+
+static inline unsigned long virt_to_phys(volatile void * address)
+{
+ return __pa(address);
+}
+
+/**
+ * phys_to_virt - map physical address to virtual
+ * @address: address to remap
+ *
+ * The returned virtual address is a current CPU mapping for
+ * the memory address given. It is only valid to use this function on
+ * addresses that have a kernel mapping
+ *
+ * This function does not handle bus mappings for DMA transfers. In
+ * almost all conceivable cases a device driver should not be using
+ * this function
+ */
+
+static inline void * phys_to_virt(unsigned long address)
+{
+ return __va(address);
+}
+
+/*
+ * Change "struct pfn_info" to physical address.
+ */
+#ifdef CONFIG_HIGHMEM64G
+#define page_to_phys(page) ((u64)(page - frame_table) << PAGE_SHIFT)
+#else
+#define page_to_phys(page) ((page - frame_table) << PAGE_SHIFT)
+#endif
+
+#define page_to_pfn(_page) ((unsigned long)((_page) - frame_table))
+#define page_to_virt(_page) phys_to_virt(page_to_phys(_page))
+
+
+/*
+ * IO bus memory addresses are also 1:1 with the physical address
+ */
+#define virt_to_bus virt_to_phys
+#define bus_to_virt phys_to_virt
+#define page_to_bus page_to_phys
+
+#define __OUT1(s,x) \
+static inline void out##s(unsigned x value, unsigned short port) {
+
+#define __OUT2(s,s1,s2) \
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+
+#define __OUT(s,s1,x) \
+__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port));}
+
+#define __IN1(s) \
+static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
+
+#define __IN2(s,s1,s2) \
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+
+#define __IN(s,s1,i...) \
+__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; }
+
+#define RETURN_TYPE unsigned char
+__IN(b,"")
+#undef RETURN_TYPE
+#define RETURN_TYPE unsigned short
+__IN(w,"")
+#undef RETURN_TYPE
+#define RETURN_TYPE unsigned int
+__IN(l,"")
+#undef RETURN_TYPE
+
+__OUT(b,"b",char)
+__OUT(w,"w",short)
+__OUT(l,,int)
+
+#endif
--- /dev/null
+#ifndef __ASM_IO_APIC_H
+#define __ASM_IO_APIC_H
+
+#include <xen/config.h>
+#include <xen/types.h>
+
+/*
+ * Intel IO-APIC support for SMP and UP systems.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
+ */
+
+#ifdef CONFIG_X86_IO_APIC
+
+#define APIC_MISMATCH_DEBUG
+
+#define IO_APIC_BASE(idx) \
+ ((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \
+ + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK)))
+
+/*
+ * The structure of the IO-APIC:
+ */
+struct IO_APIC_reg_00 {
+ __u32 __reserved_2 : 14,
+ LTS : 1,
+ delivery_type : 1,
+ __reserved_1 : 8,
+ ID : 4,
+ __reserved_0 : 4;
+} __attribute__ ((packed));
+
+struct IO_APIC_reg_01 {
+ __u32 version : 8,
+ __reserved_2 : 7,
+ PRQ : 1,
+ entries : 8,
+ __reserved_1 : 8;
+} __attribute__ ((packed));
+
+struct IO_APIC_reg_02 {
+ __u32 __reserved_2 : 24,
+ arbitration : 4,
+ __reserved_1 : 4;
+} __attribute__ ((packed));
+
+struct IO_APIC_reg_03 {
+ __u32 boot_DT : 1,
+ __reserved_1 : 31;
+} __attribute__ ((packed));
+
+/*
+ * # of IO-APICs and # of IRQ routing registers
+ */
+extern int nr_ioapics;
+extern int nr_ioapic_registers[MAX_IO_APICS];
+
+enum ioapic_irq_destination_types {
+ dest_Fixed = 0,
+ dest_LowestPrio = 1,
+ dest_SMI = 2,
+ dest__reserved_1 = 3,
+ dest_NMI = 4,
+ dest_INIT = 5,
+ dest__reserved_2 = 6,
+ dest_ExtINT = 7
+};
+
+struct IO_APIC_route_entry {
+ __u32 vector : 8,
+ delivery_mode : 3, /* 000: FIXED
+ * 001: lowest prio
+ * 111: ExtINT
+ */
+ dest_mode : 1, /* 0: physical, 1: logical */
+ delivery_status : 1,
+ polarity : 1,
+ irr : 1,
+ trigger : 1, /* 0: edge, 1: level */
+ mask : 1, /* 0: enabled, 1: disabled */
+ __reserved_2 : 15;
+
+ union { struct { __u32
+ __reserved_1 : 24,
+ physical_dest : 4,
+ __reserved_2 : 4;
+ } physical;
+
+ struct { __u32
+ __reserved_1 : 24,
+ logical_dest : 8;
+ } logical;
+ } dest;
+
+} __attribute__ ((packed));
+
+/*
+ * MP-BIOS irq configuration table structures:
+ */
+
+/* I/O APIC entries */
+extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+extern int mp_irq_entries;
+
+/* MP IRQ source entries */
+extern struct mpc_config_intsrc *mp_irqs;
+
+/* non-0 if default (table-less) MP configuration */
+extern int mpc_default_type;
+
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+ *IO_APIC_BASE(apic) = reg;
+ return *(IO_APIC_BASE(apic)+4);
+}
+
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+ *IO_APIC_BASE(apic) = reg;
+ *(IO_APIC_BASE(apic)+4) = value;
+}
+
+/*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+static inline void io_apic_sync(unsigned int apic)
+{
+ (void) *(IO_APIC_BASE(apic)+4);
+}
+
+/*
+ * If we use the IO-APIC for IRQ routing, disable automatic
+ * assignment of PCI IRQ's.
+ */
+#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup)
+
+#ifdef CONFIG_ACPI_BOOT
+extern int io_apic_get_unique_id (int ioapic, int apic_id);
+extern int io_apic_get_version (int ioapic);
+extern int io_apic_get_redir_entries (int ioapic);
+extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low);
+#endif
+
+extern int skip_ioapic_setup; /* 1 for "noapic" */
+
+static inline void disable_ioapic_setup(void)
+{
+ skip_ioapic_setup = 1;
+}
+
+static inline int ioapic_setup_disabled(void)
+{
+ return skip_ioapic_setup;
+}
+
+#else /* !CONFIG_X86_IO_APIC */
+#define io_apic_assign_pci_irqs 0
+
+static inline void disable_ioapic_setup(void)
+{ }
+
+#endif /* !CONFIG_X86_IO_APIC */
+
+#endif
--- /dev/null
+#ifndef _ASM_HW_IRQ_H
+#define _ASM_HW_IRQ_H
+
+/* (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar */
+
+#include <xen/config.h>
+#include <asm/atomic.h>
+
+#define SA_INTERRUPT 0x20000000
+#define SA_SHIRQ 0x04000000
+#define SA_NOPROFILE 0x02000000
+
+#define SA_SAMPLE_RANDOM 0 /* Linux driver compatibility */
+
+#define TIMER_IRQ 0
+
+extern void disable_irq(unsigned int);
+extern void disable_irq_nosync(unsigned int);
+extern void enable_irq(unsigned int);
+
+/*
+ * IDT vectors usable for external interrupt sources start
+ * at 0x20:
+ */
+#define FIRST_EXTERNAL_VECTOR 0x30
+
+#define NR_IRQS (256 - FIRST_EXTERNAL_VECTOR)
+
+#define HYPERVISOR_CALL_VECTOR 0x82
+
+/*
+ * Vectors 0x30-0x3f are used for ISA interrupts.
+ */
+
+/*
+ * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
+ *
+ * some of the following vectors are 'rare', they are merged
+ * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
+ * TLB, reschedule and local APIC vectors are performance-critical.
+ *
+ * Vectors 0xf0-0xfa are free (reserved for future Linux use).
+ */
+#define SPURIOUS_APIC_VECTOR 0xff
+#define ERROR_APIC_VECTOR 0xfe
+#define INVALIDATE_TLB_VECTOR 0xfd
+#define EVENT_CHECK_VECTOR 0xfc
+#define CALL_FUNCTION_VECTOR 0xfb
+#define KDB_VECTOR 0xfa
+
+/*
+ * Local APIC timer IRQ vector is on a different priority level,
+ * to work around the 'lost local interrupt if more than 2 IRQ
+ * sources per level' errata.
+ */
+#define LOCAL_TIMER_VECTOR 0xef
+
+/*
+ * First APIC vector available to drivers: (vectors 0x40-0xee)
+ * we start at 0x41 to spread out vectors evenly between priority
+ * levels. (0x82 is the syscall vector)
+ */
+#define FIRST_DEVICE_VECTOR 0x41
+#define FIRST_SYSTEM_VECTOR 0xef
+
+extern int irq_vector[NR_IRQS];
+#define IO_APIC_VECTOR(irq) irq_vector[irq]
+
+/*
+ * Various low-level irq details needed by irq.c, process.c,
+ * time.c, io_apic.c and smp.c
+ *
+ * Interrupt entry/exit code at both C and assembly level
+ */
+
+extern void mask_irq(unsigned int irq);
+extern void unmask_irq(unsigned int irq);
+extern void disable_8259A_irq(unsigned int irq);
+extern void enable_8259A_irq(unsigned int irq);
+extern int i8259A_irq_pending(unsigned int irq);
+extern void make_8259A_irq(unsigned int irq);
+extern void init_8259A(int aeoi);
+extern void FASTCALL(send_IPI_self(int vector));
+extern void init_VISWS_APIC_irqs(void);
+extern void setup_IO_APIC(void);
+extern void disable_IO_APIC(void);
+extern void print_IO_APIC(void);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+extern void send_IPI(int dest, int vector);
+
+extern unsigned long io_apic_irqs;
+
+extern atomic_t irq_err_count;
+extern atomic_t irq_mis_count;
+
+extern char _stext, _etext;
+
+#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
+
+#define __STR(x) #x
+#define STR(x) __STR(x)
+
+#define SAVE_ALL \
+ "cld\n\t" \
+ "pushl %gs\n\t" \
+ "pushl %fs\n\t" \
+ "pushl %es\n\t" \
+ "pushl %ds\n\t" \
+ "pushl %eax\n\t" \
+ "pushl %ebp\n\t" \
+ "pushl %edi\n\t" \
+ "pushl %esi\n\t" \
+ "pushl %edx\n\t" \
+ "pushl %ecx\n\t" \
+ "pushl %ebx\n\t" \
+ "movl $" STR(__HYPERVISOR_DS) ",%edx\n\t" \
+ "movl %edx,%ds\n\t" \
+ "movl %edx,%es\n\t" \
+ "movl %edx,%fs\n\t" \
+ "movl %edx,%gs\n\t"
+
+#define IRQ_NAME2(nr) nr##_interrupt(void)
+#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+
+/*
+ * SMP has a few special interrupts for IPI messages
+ */
+
+ /* there is a second layer of macro just to get the symbolic
+ name for the vector evaluated. This change is for RTLinux */
+#define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v)
+#define XBUILD_SMP_INTERRUPT(x,v)\
+asmlinkage void x(void); \
+asmlinkage void call_##x(void); \
+__asm__( \
+"\n"__ALIGN_STR"\n" \
+SYMBOL_NAME_STR(x) ":\n\t" \
+ "pushl $"#v"-256\n\t" \
+ SAVE_ALL \
+ SYMBOL_NAME_STR(call_##x)":\n\t" \
+ "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \
+ "jmp ret_from_intr\n");
+
+#define BUILD_SMP_TIMER_INTERRUPT(x,v) XBUILD_SMP_TIMER_INTERRUPT(x,v)
+#define XBUILD_SMP_TIMER_INTERRUPT(x,v) \
+asmlinkage void x(struct pt_regs * regs); \
+asmlinkage void call_##x(void); \
+__asm__( \
+"\n"__ALIGN_STR"\n" \
+SYMBOL_NAME_STR(x) ":\n\t" \
+ "pushl $"#v"-256\n\t" \
+ SAVE_ALL \
+ "movl %esp,%eax\n\t" \
+ "pushl %eax\n\t" \
+ SYMBOL_NAME_STR(call_##x)":\n\t" \
+ "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \
+ "addl $4,%esp\n\t" \
+ "jmp ret_from_intr\n");
+
+#define BUILD_COMMON_IRQ() \
+asmlinkage void call_do_IRQ(void); \
+__asm__( \
+ "\n" __ALIGN_STR"\n" \
+ "common_interrupt:\n\t" \
+ SAVE_ALL \
+ SYMBOL_NAME_STR(call_do_IRQ)":\n\t" \
+ "call " SYMBOL_NAME_STR(do_IRQ) "\n\t" \
+ "jmp ret_from_intr\n");
+
+/*
+ * subtle. orig_eax is used by the signal code to distinct between
+ * system calls and interrupted 'random user-space'. Thus we have
+ * to put a negative value into orig_eax here. (the problem is that
+ * both system calls and IRQs want to have small integer numbers in
+ * orig_eax, and the syscall code has won the optimization conflict ;)
+ *
+ * Subtle as a pigs ear. VY
+ */
+
+#define BUILD_IRQ(nr) \
+asmlinkage void IRQ_NAME(nr); \
+__asm__( \
+"\n"__ALIGN_STR"\n" \
+SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \
+ "pushl $"#nr"-256\n\t" \
+ "jmp common_interrupt");
+
+extern unsigned long prof_cpu_mask;
+extern unsigned int * prof_buffer;
+extern unsigned long prof_len;
+extern unsigned long prof_shift;
+
+#include <xen/irq.h>
+
+#if defined(CONFIG_X86_IO_APIC)
+static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {
+ if (IO_APIC_IRQ(i))
+ send_IPI_self(IO_APIC_VECTOR(i));
+}
+#else
+static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {}
+#endif
+
+#endif /* _ASM_HW_IRQ_H */
--- /dev/null
+#ifndef __ARCH_LDT_H
+#define __ARCH_LDT_H
+
+#ifndef __ASSEMBLY__
+
+static inline void load_LDT(struct task_struct *p)
+{
+ unsigned int cpu;
+ struct desc_struct *desc;
+ unsigned long ents;
+
+ if ( (ents = p->mm.ldt_ents) == 0 )
+ {
+ __asm__ __volatile__ ( "lldt %%ax" : : "a" (0) );
+ }
+ else
+ {
+ cpu = smp_processor_id();
+ desc = (struct desc_struct *)GET_GDT_ADDRESS(p) + __LDT(cpu);
+ desc->a = ((LDT_VIRT_START&0xffff)<<16) | (ents*8-1);
+ desc->b = (LDT_VIRT_START&(0xff<<24)) | 0x8200 |
+ ((LDT_VIRT_START&0xff0000)>>16);
+ __asm__ __volatile__ ( "lldt %%ax" : : "a" (__LDT(cpu)<<3) );
+ }
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
--- /dev/null
+/*
+ * Machine dependent access functions for RTC registers.
+ */
+#ifndef _ASM_MC146818RTC_H
+#define _ASM_MC146818RTC_H
+
+#include <asm/io.h>
+#include <xen/spinlock.h>
+
+extern spinlock_t rtc_lock; /* serialize CMOS RAM access */
+
+/**********************************************************************
+ * register summary
+ **********************************************************************/
+#define RTC_SECONDS 0
+#define RTC_SECONDS_ALARM 1
+#define RTC_MINUTES 2
+#define RTC_MINUTES_ALARM 3
+#define RTC_HOURS 4
+#define RTC_HOURS_ALARM 5
+/* RTC_*_alarm is always true if 2 MSBs are set */
+# define RTC_ALARM_DONT_CARE 0xC0
+
+#define RTC_DAY_OF_WEEK 6
+#define RTC_DAY_OF_MONTH 7
+#define RTC_MONTH 8
+#define RTC_YEAR 9
+
+/* control registers - Moto names
+ */
+#define RTC_REG_A 10
+#define RTC_REG_B 11
+#define RTC_REG_C 12
+#define RTC_REG_D 13
+
+/**********************************************************************
+ * register details
+ **********************************************************************/
+#define RTC_FREQ_SELECT RTC_REG_A
+
+/* update-in-progress - set to "1" 244 microsecs before RTC goes off the bus,
+ * reset after update (may take 1.984ms @ 32768Hz RefClock) is complete,
+ * totalling to a max high interval of 2.228 ms.
+ */
+# define RTC_UIP 0x80
+# define RTC_DIV_CTL 0x70
+ /* divider control: refclock values 4.194 / 1.049 MHz / 32.768 kHz */
+# define RTC_REF_CLCK_4MHZ 0x00
+# define RTC_REF_CLCK_1MHZ 0x10
+# define RTC_REF_CLCK_32KHZ 0x20
+ /* 2 values for divider stage reset, others for "testing purposes only" */
+# define RTC_DIV_RESET1 0x60
+# define RTC_DIV_RESET2 0x70
+ /* Periodic intr. / Square wave rate select. 0=none, 1=32.8kHz,... 15=2Hz */
+# define RTC_RATE_SELECT 0x0F
+
+/**********************************************************************/
+#define RTC_CONTROL RTC_REG_B
+# define RTC_SET 0x80 /* disable updates for clock setting */
+# define RTC_PIE 0x40 /* periodic interrupt enable */
+# define RTC_AIE 0x20 /* alarm interrupt enable */
+# define RTC_UIE 0x10 /* update-finished interrupt enable */
+# define RTC_SQWE 0x08 /* enable square-wave output */
+# define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */
+# define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */
+# define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */
+
+/**********************************************************************/
+#define RTC_INTR_FLAGS RTC_REG_C
+/* caution - cleared by read */
+# define RTC_IRQF 0x80 /* any of the following 3 is active */
+# define RTC_PF 0x40
+# define RTC_AF 0x20
+# define RTC_UF 0x10
+
+/**********************************************************************/
+#define RTC_VALID RTC_REG_D
+# define RTC_VRT 0x80 /* valid RAM and time */
+/**********************************************************************/
+
+/* example: !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)
+ * determines if the following two #defines are needed
+ */
+#ifndef BCD_TO_BIN
+#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10)
+#endif
+
+#ifndef BIN_TO_BCD
+#define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10)
+#endif
+
+
+#ifndef RTC_PORT
+#define RTC_PORT(x) (0x70 + (x))
+#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */
+#endif
+
+/*
+ * The yet supported machines all access the RTC index register via
+ * an ISA port access but the way to access the date register differs ...
+ */
+#define CMOS_READ(addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+inb_p(RTC_PORT(1)); \
+})
+#define CMOS_WRITE(val, addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+outb_p((val),RTC_PORT(1)); \
+})
+
+#define RTC_IRQ 8
+
+#endif /* _ASM_MC146818RTC_H */
--- /dev/null
+#ifndef __ASM_MPSPEC_H
+#define __ASM_MPSPEC_H
+
+#include <xen/config.h>
+#include <xen/types.h>
+
+/*
+ * Structure definitions for SMP machines following the
+ * Intel Multiprocessing Specification 1.1 and 1.4.
+ */
+
+/*
+ * This tag identifies where the SMP configuration
+ * information is.
+ */
+
+#define SMP_MAGIC_IDENT (('_'<<24)|('P'<<16)|('M'<<8)|'_')
+
+/*
+ * a maximum of 16 APICs with the current APIC ID architecture.
+ * xAPICs can have up to 256. SAPICs have 16 ID bits.
+ */
+#ifdef CONFIG_X86_CLUSTERED_APIC
+#define MAX_APICS 256
+#else
+#define MAX_APICS 16
+#endif
+
+#define MAX_MPC_ENTRY 1024
+
+struct intel_mp_floating
+{
+ char mpf_signature[4]; /* "_MP_" */
+ unsigned int mpf_physptr; /* Configuration table address */
+ unsigned char mpf_length; /* Our length (paragraphs) */
+ unsigned char mpf_specification;/* Specification version */
+ unsigned char mpf_checksum; /* Checksum (makes sum 0) */
+ unsigned char mpf_feature1; /* Standard or configuration ? */
+ unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */
+ unsigned char mpf_feature3; /* Unused (0) */
+ unsigned char mpf_feature4; /* Unused (0) */
+ unsigned char mpf_feature5; /* Unused (0) */
+};
+
+struct mp_config_table
+{
+ char mpc_signature[4];
+#define MPC_SIGNATURE "PCMP"
+ unsigned short mpc_length; /* Size of table */
+ char mpc_spec; /* 0x01 */
+ char mpc_checksum;
+ char mpc_oem[8];
+ char mpc_productid[12];
+ unsigned int mpc_oemptr; /* 0 if not present */
+ unsigned short mpc_oemsize; /* 0 if not present */
+ unsigned short mpc_oemcount;
+ unsigned int mpc_lapic; /* APIC address */
+ unsigned int reserved;
+};
+
+/* Followed by entries */
+
+#define MP_PROCESSOR 0
+#define MP_BUS 1
+#define MP_IOAPIC 2
+#define MP_INTSRC 3
+#define MP_LINTSRC 4
+#define MP_TRANSLATION 192 /* Used by IBM NUMA-Q to describe node locality */
+
+struct mpc_config_processor
+{
+ unsigned char mpc_type;
+ unsigned char mpc_apicid; /* Local APIC number */
+ unsigned char mpc_apicver; /* Its versions */
+ unsigned char mpc_cpuflag;
+#define CPU_ENABLED 1 /* Processor is available */
+#define CPU_BOOTPROCESSOR 2 /* Processor is the BP */
+ unsigned int mpc_cpufeature;
+#define CPU_STEPPING_MASK 0x0F
+#define CPU_MODEL_MASK 0xF0
+#define CPU_FAMILY_MASK 0xF00
+ unsigned int mpc_featureflag; /* CPUID feature value */
+ unsigned int mpc_reserved[2];
+};
+
+struct mpc_config_bus
+{
+ unsigned char mpc_type;
+ unsigned char mpc_busid;
+ unsigned char mpc_bustype[6] __attribute((packed));
+};
+
+/* List of Bus Type string values, Intel MP Spec. */
+#define BUSTYPE_EISA "EISA"
+#define BUSTYPE_ISA "ISA"
+#define BUSTYPE_INTERN "INTERN" /* Internal BUS */
+#define BUSTYPE_MCA "MCA"
+#define BUSTYPE_VL "VL" /* Local bus */
+#define BUSTYPE_PCI "PCI"
+#define BUSTYPE_PCMCIA "PCMCIA"
+#define BUSTYPE_CBUS "CBUS"
+#define BUSTYPE_CBUSII "CBUSII"
+#define BUSTYPE_FUTURE "FUTURE"
+#define BUSTYPE_MBI "MBI"
+#define BUSTYPE_MBII "MBII"
+#define BUSTYPE_MPI "MPI"
+#define BUSTYPE_MPSA "MPSA"
+#define BUSTYPE_NUBUS "NUBUS"
+#define BUSTYPE_TC "TC"
+#define BUSTYPE_VME "VME"
+#define BUSTYPE_XPRESS "XPRESS"
+
+struct mpc_config_ioapic
+{
+ unsigned char mpc_type;
+ unsigned char mpc_apicid;
+ unsigned char mpc_apicver;
+ unsigned char mpc_flags;
+#define MPC_APIC_USABLE 0x01
+ unsigned int mpc_apicaddr;
+};
+
+struct mpc_config_intsrc
+{
+ unsigned char mpc_type;
+ unsigned char mpc_irqtype;
+ unsigned short mpc_irqflag;
+ unsigned char mpc_srcbus;
+ unsigned char mpc_srcbusirq;
+ unsigned char mpc_dstapic;
+ unsigned char mpc_dstirq;
+};
+
+enum mp_irq_source_types {
+ mp_INT = 0,
+ mp_NMI = 1,
+ mp_SMI = 2,
+ mp_ExtINT = 3
+};
+
+#define MP_IRQDIR_DEFAULT 0
+#define MP_IRQDIR_HIGH 1
+#define MP_IRQDIR_LOW 3
+
+
+struct mpc_config_lintsrc
+{
+ unsigned char mpc_type;
+ unsigned char mpc_irqtype;
+ unsigned short mpc_irqflag;
+ unsigned char mpc_srcbusid;
+ unsigned char mpc_srcbusirq;
+ unsigned char mpc_destapic;
+#define MP_APIC_ALL 0xFF
+ unsigned char mpc_destapiclint;
+};
+
+struct mp_config_oemtable
+{
+ char oem_signature[4];
+#define MPC_OEM_SIGNATURE "_OEM"
+ unsigned short oem_length; /* Size of table */
+ char oem_rev; /* 0x01 */
+ char oem_checksum;
+ char mpc_oem[8];
+};
+
+struct mpc_config_translation
+{
+ unsigned char mpc_type;
+ unsigned char trans_len;
+ unsigned char trans_type;
+ unsigned char trans_quad;
+ unsigned char trans_global;
+ unsigned char trans_local;
+ unsigned short trans_reserved;
+};
+
+/*
+ * Default configurations
+ *
+ * 1 2 CPU ISA 82489DX
+ * 2 2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining
+ * 3 2 CPU EISA 82489DX
+ * 4 2 CPU MCA 82489DX
+ * 5 2 CPU ISA+PCI
+ * 6 2 CPU EISA+PCI
+ * 7 2 CPU MCA+PCI
+ */
+
+#ifdef CONFIG_MULTIQUAD
+#define MAX_IRQ_SOURCES 512
+#else /* !CONFIG_MULTIQUAD */
+#define MAX_IRQ_SOURCES 256
+#endif /* CONFIG_MULTIQUAD */
+
+#define MAX_MP_BUSSES 32
+enum mp_bustype {
+ MP_BUS_ISA = 1,
+ MP_BUS_EISA,
+ MP_BUS_PCI,
+ MP_BUS_MCA
+};
+extern int *mp_bus_id_to_type;
+extern int *mp_bus_id_to_node;
+extern int *mp_bus_id_to_local;
+extern int *mp_bus_id_to_pci_bus;
+extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+
+extern unsigned int boot_cpu_physical_apicid;
+/*extern unsigned long phys_cpu_present_map;*/
+extern int smp_found_config;
+extern void find_smp_config (void);
+extern void get_smp_config (void);
+/*extern int nr_ioapics;*/
+extern int apic_version [MAX_APICS];
+/*extern int mp_irq_entries;*/
+/*extern struct mpc_config_intsrc *mp_irqs;*/
+/*extern int mpc_default_type;*/
+extern int mp_current_pci_id;
+extern unsigned long mp_lapic_addr;
+/*extern int pic_mode;*/
+extern int using_apic_timer;
+
+#ifdef CONFIG_ACPI_BOOT
+extern void mp_register_lapic (u8 id, u8 enabled);
+extern void mp_register_lapic_address (u64 address);
+
+#ifdef CONFIG_X86_IO_APIC
+extern void mp_register_ioapic (u8 id, u32 address, u32 irq_base);
+extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 global_irq);
+extern void mp_config_acpi_legacy_irqs (void);
+extern void mp_config_ioapic_for_sci(int irq);
+extern void mp_parse_prt (void);
+#else /*!CONFIG_X86_IO_APIC*/
+static inline void mp_config_ioapic_for_sci(int irq) { }
+#endif /*!CONFIG_X86_IO_APIC*/
+
+#endif /*CONFIG_ACPI_BOOT*/
+
+#endif
+
--- /dev/null
+#ifndef __ASM_MSR_H
+#define __ASM_MSR_H
+
+/*
+ * Access to machine-specific registers (available on 586 and better only)
+ * Note: the rd* operations modify the parameters directly (without using
+ * pointer indirection), this allows gcc to optimize better
+ */
+
+#define rdmsr(msr,val1,val2) \
+ __asm__ __volatile__("rdmsr" \
+ : "=a" (val1), "=d" (val2) \
+ : "c" (msr))
+
+#define rdmsrl(msr,val) do { unsigned long a__,b__; \
+ __asm__ __volatile__("rdmsr" \
+ : "=a" (a__), "=d" (b__) \
+ : "c" (msr)); \
+ val = a__ | (b__<<32); \
+} while(0);
+
+#define wrmsr(msr,val1,val2) \
+ __asm__ __volatile__("wrmsr" \
+ : /* no outputs */ \
+ : "c" (msr), "a" (val1), "d" (val2))
+
+#define rdtsc(low,high) \
+ __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
+
+#define rdtscl(low) \
+ __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
+
+#ifdef x86_32
+#define rdtscll(val) \
+ __asm__ __volatile__("rdtsc" : "=A" (val))
+#else
+#define rdtscll(val) do { \
+ unsigned int a,d; \
+ asm volatile("rdtsc" : "=a" (a), "=d" (d)); \
+ (val) = ((unsigned long)a) | (((unsigned long)d)<<32); \
+} while(0)
+#endif
+
+#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
+
+#define rdpmc(counter,low,high) \
+ __asm__ __volatile__("rdpmc" \
+ : "=a" (low), "=d" (high) \
+ : "c" (counter))
+
+/* symbolic names for some interesting MSRs */
+/* Intel defined MSRs. */
+#define MSR_IA32_P5_MC_ADDR 0
+#define MSR_IA32_P5_MC_TYPE 1
+#define MSR_IA32_PLATFORM_ID 0x17
+#define MSR_IA32_EBL_CR_POWERON 0x2a
+
+/* AMD/K8 specific MSRs */
+#define MSR_EFER 0xc0000080 /* extended feature register */
+#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */
+#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */
+#define MSR_CSTAR 0xc0000083 /* compatibility mode SYSCALL target */
+#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */
+#define MSR_FS_BASE 0xc0000100 /* 64bit GS base */
+#define MSR_GS_BASE 0xc0000101 /* 64bit FS base */
+#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow (or USER_GS from kernel) */
+/* EFER bits: */
+#define _EFER_SCE 0 /* SYSCALL/SYSRET */
+#define _EFER_LME 8 /* Long mode enable */
+#define _EFER_LMA 10 /* Long mode active (read-only) */
+#define _EFER_NX 11 /* No execute enable */
+
+#define EFER_SCE (1<<_EFER_SCE)
+#define EFER_LME (1<<EFER_LME)
+#define EFER_LMA (1<<EFER_LMA)
+#define EFER_NX (1<<_EFER_NX)
+
+/* Intel MSRs. Some also available on other CPUs */
+#define MSR_IA32_PLATFORM_ID 0x17
+
+#define MSR_IA32_PERFCTR0 0xc1
+#define MSR_IA32_PERFCTR1 0xc2
+
+#define MSR_MTRRcap 0x0fe
+#define MSR_IA32_BBL_CR_CTL 0x119
+
+#define MSR_IA32_MCG_CAP 0x179
+#define MSR_IA32_MCG_STATUS 0x17a
+#define MSR_IA32_MCG_CTL 0x17b
+
+#define MSR_IA32_EVNTSEL0 0x186
+#define MSR_IA32_EVNTSEL1 0x187
+
+#define MSR_IA32_DEBUGCTLMSR 0x1d9
+#define MSR_IA32_LASTBRANCHFROMIP 0x1db
+#define MSR_IA32_LASTBRANCHTOIP 0x1dc
+#define MSR_IA32_LASTINTFROMIP 0x1dd
+#define MSR_IA32_LASTINTTOIP 0x1de
+
+#define MSR_MTRRfix64K_00000 0x250
+#define MSR_MTRRfix16K_80000 0x258
+#define MSR_MTRRfix16K_A0000 0x259
+#define MSR_MTRRfix4K_C0000 0x268
+#define MSR_MTRRfix4K_C8000 0x269
+#define MSR_MTRRfix4K_D0000 0x26a
+#define MSR_MTRRfix4K_D8000 0x26b
+#define MSR_MTRRfix4K_E0000 0x26c
+#define MSR_MTRRfix4K_E8000 0x26d
+#define MSR_MTRRfix4K_F0000 0x26e
+#define MSR_MTRRfix4K_F8000 0x26f
+#define MSR_MTRRdefType 0x2ff
+
+#define MSR_IA32_MC0_CTL 0x400
+#define MSR_IA32_MC0_STATUS 0x401
+#define MSR_IA32_MC0_ADDR 0x402
+#define MSR_IA32_MC0_MISC 0x403
+
+#define MSR_IA32_APICBASE 0x1b
+#define MSR_IA32_APICBASE_BSP (1<<8)
+#define MSR_IA32_APICBASE_ENABLE (1<<11)
+#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+
+#define MSR_IA32_UCODE_WRITE 0x79
+#define MSR_IA32_UCODE_REV 0x8b
+
+#define MSR_IA32_BBL_CR_CTL 0x119
+
+#define MSR_IA32_MCG_CAP 0x179
+#define MSR_IA32_MCG_STATUS 0x17a
+#define MSR_IA32_MCG_CTL 0x17b
+
+#define MSR_IA32_THERM_CONTROL 0x19a
+#define MSR_IA32_THERM_INTERRUPT 0x19b
+#define MSR_IA32_THERM_STATUS 0x19c
+#define MSR_IA32_MISC_ENABLE 0x1a0
+
+#define MSR_IA32_DEBUGCTLMSR 0x1d9
+#define MSR_IA32_LASTBRANCHFROMIP 0x1db
+#define MSR_IA32_LASTBRANCHTOIP 0x1dc
+#define MSR_IA32_LASTINTFROMIP 0x1dd
+#define MSR_IA32_LASTINTTOIP 0x1de
+
+#define MSR_IA32_MC0_CTL 0x400
+#define MSR_IA32_MC0_STATUS 0x401
+#define MSR_IA32_MC0_ADDR 0x402
+#define MSR_IA32_MC0_MISC 0x403
+
+#define MSR_P6_PERFCTR0 0xc1
+#define MSR_P6_PERFCTR1 0xc2
+#define MSR_P6_EVNTSEL0 0x186
+#define MSR_P6_EVNTSEL1 0x187
+
+/* K7/K8 MSRs. Not complete. See the architecture manual for a more complete list. */
+#define MSR_K7_EVNTSEL0 0xC0010000
+#define MSR_K7_PERFCTR0 0xC0010004
+#define MSR_K7_EVNTSEL1 0xC0010001
+#define MSR_K7_PERFCTR1 0xC0010005
+#define MSR_K7_EVNTSEL2 0xC0010002
+#define MSR_K7_PERFCTR2 0xC0010006
+#define MSR_K7_EVNTSEL3 0xC0010003
+#define MSR_K7_PERFCTR3 0xC0010007
+#define MSR_K8_TOP_MEM1 0xC001001A
+#define MSR_K8_TOP_MEM2 0xC001001D
+#define MSR_K8_SYSCFG 0xC0000010
+#define MSR_K7_HWCR 0xC0010015
+#define MSR_K7_CLK_CTL 0xC001001b
+#define MSR_K7_FID_VID_CTL 0xC0010041
+#define MSR_K7_VID_STATUS 0xC0010042
+
+/* K6 MSRs */
+#define MSR_K6_EFER 0xC0000080
+#define MSR_K6_STAR 0xC0000081
+#define MSR_K6_WHCR 0xC0000082
+#define MSR_K6_UWCCR 0xC0000085
+#define MSR_K6_EPMR 0xC0000086
+#define MSR_K6_PSOR 0xC0000087
+#define MSR_K6_PFIR 0xC0000088
+
+/* Centaur-Hauls/IDT defined MSRs. */
+#define MSR_IDT_FCR1 0x107
+#define MSR_IDT_FCR2 0x108
+#define MSR_IDT_FCR3 0x109
+#define MSR_IDT_FCR4 0x10a
+
+#define MSR_IDT_MCR0 0x110
+#define MSR_IDT_MCR1 0x111
+#define MSR_IDT_MCR2 0x112
+#define MSR_IDT_MCR3 0x113
+#define MSR_IDT_MCR4 0x114
+#define MSR_IDT_MCR5 0x115
+#define MSR_IDT_MCR6 0x116
+#define MSR_IDT_MCR7 0x117
+#define MSR_IDT_MCR_CTRL 0x120
+
+/* VIA Cyrix defined MSRs*/
+#define MSR_VIA_FCR 0x1107
+#define MSR_VIA_LONGHAUL 0x110a
+#define MSR_VIA_BCR2 0x1147
+
+/* Transmeta defined MSRs */
+#define MSR_TMTA_LONGRUN_CTRL 0x80868010
+#define MSR_TMTA_LONGRUN_FLAGS 0x80868011
+#define MSR_TMTA_LRTI_READOUT 0x80868018
+#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a
+
+#endif /* __ASM_MSR_H */
--- /dev/null
+#ifndef _I386_PAGE_H
+#define _I386_PAGE_H
+
+#define BUG() do { \
+ printk("BUG at %s:%d\n", __FILE__, __LINE__); \
+ __asm__ __volatile__("ud2"); \
+} while (0)
+
+#define L1_PAGETABLE_SHIFT 12
+#define L2_PAGETABLE_SHIFT 22
+
+#define ENTRIES_PER_L1_PAGETABLE 1024
+#define ENTRIES_PER_L2_PAGETABLE 1024
+
+#define PAGE_SHIFT L1_PAGETABLE_SHIFT
+#define PAGE_SIZE (1UL << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE-1))
+
+#define clear_page(_p) memset((void *)(_p), 0, PAGE_SIZE)
+#define copy_page(_t,_f) memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
+
+#ifndef __ASSEMBLY__
+#include <xen/config.h>
+typedef struct { unsigned long l1_lo; } l1_pgentry_t;
+typedef struct { unsigned long l2_lo; } l2_pgentry_t;
+typedef l1_pgentry_t *l1_pagetable_t;
+typedef l2_pgentry_t *l2_pagetable_t;
+typedef struct { unsigned long pt_lo; } pagetable_t;
+#endif /* !__ASSEMBLY__ */
+
+/* Strip type from a table entry. */
+#define l1_pgentry_val(_x) ((_x).l1_lo)
+#define l2_pgentry_val(_x) ((_x).l2_lo)
+#define pagetable_val(_x) ((_x).pt_lo)
+
+#define alloc_l1_pagetable() ((l1_pgentry_t *)get_free_page(GFP_KERNEL))
+#define alloc_l2_pagetable() ((l2_pgentry_t *)get_free_page(GFP_KERNEL))
+
+/* Add type to a table entry. */
+#define mk_l1_pgentry(_x) ( (l1_pgentry_t) { (_x) } )
+#define mk_l2_pgentry(_x) ( (l2_pgentry_t) { (_x) } )
+#define mk_pagetable(_x) ( (pagetable_t) { (_x) } )
+
+/* Turn a typed table entry into a page index. */
+#define l1_pgentry_to_pagenr(_x) (l1_pgentry_val(_x) >> PAGE_SHIFT)
+#define l2_pgentry_to_pagenr(_x) (l2_pgentry_val(_x) >> PAGE_SHIFT)
+
+/* Turn a typed table entry into a physical address. */
+#define l1_pgentry_to_phys(_x) (l1_pgentry_val(_x) & PAGE_MASK)
+#define l2_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PAGE_MASK)
+
+/* Dereference a typed level-2 entry to yield a typed level-1 table. */
+#define l2_pgentry_to_l1(_x) \
+ ((l1_pgentry_t *)__va(l2_pgentry_val(_x) & PAGE_MASK))
+
+/* Given a virtual address, get an entry offset into a page table. */
+#define l1_table_offset(_a) \
+ (((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1))
+#define l2_table_offset(_a) \
+ ((_a) >> L2_PAGETABLE_SHIFT)
+
+/* Hypervisor table entries use zero to sugnify 'empty'. */
+#define l1_pgentry_empty(_x) (!l1_pgentry_val(_x))
+#define l2_pgentry_empty(_x) (!l2_pgentry_val(_x))
+
+#define __PAGE_OFFSET (0xFC400000)
+#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
+#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
+#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
+#define page_address(_p) (__va(((_p) - frame_table) << PAGE_SHIFT))
+#define virt_to_page(kaddr) (frame_table + (__pa(kaddr) >> PAGE_SHIFT))
+#define VALID_PAGE(page) ((page - frame_table) < max_mapnr)
+
+/*
+ * NB. We don't currently track I/O holes in the physical RAM space.
+ * For now we guess that I/O devices will be mapped in the first 1MB
+ * (e.g., VGA buffers) or beyond the end of physical RAM.
+ */
+#define pfn_is_ram(_pfn) (((_pfn) > 0x100) && ((_pfn) < max_page))
+
+/* High table entries are reserved by the hypervisor. */
+#define DOMAIN_ENTRIES_PER_L2_PAGETABLE \
+ (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT)
+#define HYPERVISOR_ENTRIES_PER_L2_PAGETABLE \
+ (ENTRIES_PER_L2_PAGETABLE - DOMAIN_ENTRIES_PER_L2_PAGETABLE)
+
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+#include <asm/bitops.h>
+#include <asm/flushtlb.h>
+
+#define linear_pg_table ((l1_pgentry_t *)LINEAR_PT_VIRT_START)
+#define linear_l2_table ((l2_pgentry_t *)(LINEAR_PT_VIRT_START+(LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT))))
+
+#define va_to_l1mfn(_va) (l2_pgentry_val(linear_l2_table[_va>>L2_PAGETABLE_SHIFT]) >> PAGE_SHIFT)
+
+extern l2_pgentry_t idle_pg_table[ENTRIES_PER_L2_PAGETABLE];
+extern void paging_init(void);
+
+#define __flush_tlb() \
+ do { \
+ __asm__ __volatile__ ( \
+ "movl %%cr3, %%eax; movl %%eax, %%cr3" \
+ : : : "memory", "eax" ); \
+ tlb_clocktick(); \
+ } while ( 0 )
+
+/* Flush global pages as well. */
+
+#define __pge_off() \
+ do { \
+ __asm__ __volatile__( \
+ "movl %0, %%cr4; # turn off PGE " \
+ :: "r" (mmu_cr4_features & ~X86_CR4_PGE)); \
+ } while (0)
+
+#define __pge_on() \
+ do { \
+ __asm__ __volatile__( \
+ "movl %0, %%cr4; # turn off PGE " \
+ :: "r" (mmu_cr4_features)); \
+ } while (0)
+
+
+#define __flush_tlb_pge() \
+ do { \
+ __pge_off(); \
+ __flush_tlb(); \
+ __pge_on(); \
+ } while (0)
+
+#define __flush_tlb_one(__addr) \
+__asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr)))
+
+#endif /* !__ASSEMBLY__ */
+
+
+#define _PAGE_PRESENT 0x001
+#define _PAGE_RW 0x002
+#define _PAGE_USER 0x004
+#define _PAGE_PWT 0x008
+#define _PAGE_PCD 0x010
+#define _PAGE_ACCESSED 0x020
+#define _PAGE_DIRTY 0x040
+#define _PAGE_PAT 0x080
+#define _PAGE_PSE 0x080
+#define _PAGE_GLOBAL 0x100
+
+#define __PAGE_HYPERVISOR \
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define __PAGE_HYPERVISOR_NOCACHE \
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED)
+#define __PAGE_HYPERVISOR_RO \
+ (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED)
+
+#define MAKE_GLOBAL(_x) ((_x) | _PAGE_GLOBAL)
+
+#define PAGE_HYPERVISOR MAKE_GLOBAL(__PAGE_HYPERVISOR)
+#define PAGE_HYPERVISOR_RO MAKE_GLOBAL(__PAGE_HYPERVISOR_RO)
+#define PAGE_HYPERVISOR_NOCACHE MAKE_GLOBAL(__PAGE_HYPERVISOR_NOCACHE)
+
+#define mk_l2_writeable(_p) \
+ (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) | _PAGE_RW))
+#define mk_l2_readonly(_p) \
+ (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) & ~_PAGE_RW))
+#define mk_l1_writeable(_p) \
+ (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) | _PAGE_RW))
+#define mk_l1_readonly(_p) \
+ (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) & ~_PAGE_RW))
+
+
+#ifndef __ASSEMBLY__
+static __inline__ int get_order(unsigned long size)
+{
+ int order;
+
+ size = (size-1) >> (PAGE_SHIFT-1);
+ order = -1;
+ do {
+ size >>= 1;
+ order++;
+ } while (size);
+ return order;
+}
+#endif
+
+#endif /* _I386_PAGE_H */
--- /dev/null
+#ifndef _ASM_PARAM_H
+#define _ASM_PARAM_H
+
+#ifndef HZ
+#define HZ 100
+#endif
+
+#define EXEC_PAGESIZE 4096
+
+#ifndef NGROUPS
+#define NGROUPS 32
+#endif
+
+#ifndef NOGROUP
+#define NOGROUP (-1)
+#endif
+
+#define MAXHOSTNAMELEN 64 /* max length of hostname */
+
+#ifdef __KERNEL__
+# define CLOCKS_PER_SEC 100 /* frequency at which times() counts */
+#endif
+
+#endif
--- /dev/null
+#ifndef __X86_PCI_H
+#define __X86_PCI_H
+
+#include <xen/config.h>
+
+/* Can be used to override the logic in pci_scan_bus for skipping
+ already-configured bus numbers - to be used for buggy BIOSes
+ or architectures with incomplete PCI setup by the loader */
+
+#ifdef CONFIG_PCI
+extern unsigned int pcibios_assign_all_busses(void);
+#else
+#define pcibios_assign_all_busses() 0
+#endif
+#define pcibios_scan_all_fns() 0
+
+extern unsigned long pci_mem_start;
+#define PCIBIOS_MIN_IO 0x1000
+#define PCIBIOS_MIN_MEM (pci_mem_start)
+
+void pcibios_config_init(void);
+struct pci_bus * pcibios_scan_root(int bus);
+extern int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value);
+extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value);
+
+void pcibios_set_master(struct pci_dev *dev);
+void pcibios_penalize_isa_irq(int irq);
+struct irq_routing_table *pcibios_get_irq_routing_table(void);
+int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
+
+#include <xen/types.h>
+#include <xen/slab.h>
+#include <asm/io.h>
+
+#endif /* __X86_PCI_H */
--- /dev/null
+
+/*
+ * pervasive debugger
+ * www.cl.cam.ac.uk/netos/pdb
+ *
+ * alex ho
+ * 2004
+ * university of cambridge computer laboratory
+ */
+
+
+#ifndef __PDB_H__
+#define __PDB_H__
+
+#include <asm/ptrace.h>
+#include <xen/list.h>
+#include <hypervisor-ifs/dom0_ops.h>
+#include <hypervisor-ifs/hypervisor-if.h> /* for domain id */
+
+extern int pdb_initialized;
+extern int pdb_com_port;
+extern int pdb_high_bit;
+extern int pdb_page_fault_possible;
+extern int pdb_page_fault_scratch;
+extern int pdb_page_fault;
+
+extern void initialize_pdb(void);
+
+/* Get/set values from generic debug interface. */
+extern int pdb_set_values(u_char *buffer, int length,
+ unsigned long cr3, unsigned long addr);
+extern int pdb_get_values(u_char *buffer, int length,
+ unsigned long cr3, unsigned long addr);
+
+/* External entry points. */
+extern int pdb_handle_exception(int exceptionVector,
+ struct pt_regs *xen_regs);
+extern int pdb_serial_input(u_char c, struct pt_regs *regs);
+extern void pdb_do_debug(dom0_op_t *op);
+
+/* PDB Context. */
+struct pdb_context
+{
+ int valid;
+ int domain;
+ int process;
+ int system_call; /* 0x01 break on enter, 0x02 break on exit */
+ unsigned long ptbr;
+};
+extern struct pdb_context pdb_ctx;
+
+/* Breakpoints. */
+struct pdb_breakpoint
+{
+ struct list_head list;
+ unsigned long address;
+ unsigned long cr3;
+ domid_t domain;
+};
+extern void pdb_bkpt_add (unsigned long cr3, unsigned long address);
+extern struct pdb_breakpoint* pdb_bkpt_search (unsigned long cr3,
+ unsigned long address);
+extern int pdb_bkpt_remove (unsigned long cr3, unsigned long address);
+
+/* Conversions. */
+extern int hex (char);
+extern char *mem2hex (char *, char *, int);
+extern char *hex2mem (char *, char *, int);
+extern int hexToInt (char **ptr, int *intValue);
+
+/* Temporary Linux specific definitions */
+extern int pdb_system_call;
+extern unsigned char pdb_system_call_enter_instr; /* original enter instr */
+extern unsigned char pdb_system_call_leave_instr; /* original next instr */
+extern unsigned long pdb_system_call_next_addr; /* instr after int 0x80 */
+extern unsigned long pdb_system_call_eflags_addr; /* saved eflags on stack */
+
+unsigned long pdb_linux_pid_ptbr (unsigned long cr3, int pid);
+void pdb_linux_get_values(char *buffer, int length, unsigned long address,
+ int pid, unsigned long cr3);
+void pdb_linux_set_values(char *buffer, int length, unsigned long address,
+ int pid, unsigned long cr3);
+void pdb_linux_syscall_enter_bkpt (struct pt_regs *regs, long error_code,
+ trap_info_t *ti);
+void pdb_linux_syscall_exit_bkpt (struct pt_regs *regs,
+ struct pdb_context *pdb_ctx);
+
+#endif /* __PDB_H__ */
--- /dev/null
+/*
+ * include/asm-i386/processor.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ */
+
+#ifndef __ASM_I386_PROCESSOR_H
+#define __ASM_I386_PROCESSOR_H
+
+#include <asm/page.h>
+#include <asm/types.h>
+#include <asm/cpufeature.h>
+#include <asm/desc.h>
+#include <asm/flushtlb.h>
+#include <asm/pdb.h>
+#include <xen/config.h>
+#include <xen/spinlock.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+
+struct task_struct;
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr() \
+ ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; })
+
+/*
+ * CPU type and hardware bug flags. Kept separately for each CPU.
+ * Members of this structure are referenced in head.S, so think twice
+ * before touching them. [mj]
+ */
+
+struct cpuinfo_x86 {
+ __u8 x86; /* CPU family */
+ __u8 x86_vendor; /* CPU vendor */
+ __u8 x86_model;
+ __u8 x86_mask;
+ char wp_works_ok; /* It doesn't on 386's */
+ char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */
+ char hard_math;
+ char rfu;
+ int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */
+ __u32 x86_capability[NCAPINTS];
+ char x86_vendor_id[16];
+ char x86_model_id[64];
+ int x86_cache_size; /* in KB - valid for CPUS which support this
+ call */
+ int fdiv_bug;
+ int f00f_bug;
+ int coma_bug;
+ unsigned long loops_per_jiffy;
+ unsigned long *pgd_quick;
+ unsigned long *pmd_quick;
+ unsigned long *pte_quick;
+ unsigned long pgtable_cache_sz;
+} __attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+#define X86_VENDOR_INTEL 0
+#define X86_VENDOR_CYRIX 1
+#define X86_VENDOR_AMD 2
+#define X86_VENDOR_UMC 3
+#define X86_VENDOR_NEXGEN 4
+#define X86_VENDOR_CENTAUR 5
+#define X86_VENDOR_RISE 6
+#define X86_VENDOR_TRANSMETA 7
+#define X86_VENDOR_NSC 8
+#define X86_VENDOR_SIS 9
+#define X86_VENDOR_UNKNOWN 0xff
+
+/*
+ * capabilities of CPUs
+ */
+
+extern struct cpuinfo_x86 boot_cpu_data;
+extern struct tss_struct init_tss[NR_CPUS];
+
+#ifdef CONFIG_SMP
+extern struct cpuinfo_x86 cpu_data[];
+#define current_cpu_data cpu_data[smp_processor_id()]
+#else
+#define cpu_data (&boot_cpu_data)
+#define current_cpu_data boot_cpu_data
+#endif
+
+extern char ignore_irq13;
+
+extern void identify_cpu(struct cpuinfo_x86 *);
+extern void print_cpu_info(struct cpuinfo_x86 *);
+extern void dodgy_tsc(void);
+
+/*
+ * EFLAGS bits
+ */
+#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
+#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
+#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */
+#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
+#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
+#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
+#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
+#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
+#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
+#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
+#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
+#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
+#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
+#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
+#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
+#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
+#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
+
+/*
+ * Generic CPUID function
+ */
+static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
+{
+ __asm__("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (op));
+}
+
+/*
+ * CPUID functions returning a single datum
+ */
+static inline unsigned int cpuid_eax(unsigned int op)
+{
+ unsigned int eax;
+
+ __asm__("cpuid"
+ : "=a" (eax)
+ : "0" (op)
+ : "bx", "cx", "dx");
+ return eax;
+}
+static inline unsigned int cpuid_ebx(unsigned int op)
+{
+ unsigned int eax, ebx;
+
+ __asm__("cpuid"
+ : "=a" (eax), "=b" (ebx)
+ : "0" (op)
+ : "cx", "dx" );
+ return ebx;
+}
+static inline unsigned int cpuid_ecx(unsigned int op)
+{
+ unsigned int eax, ecx;
+
+ __asm__("cpuid"
+ : "=a" (eax), "=c" (ecx)
+ : "0" (op)
+ : "bx", "dx" );
+ return ecx;
+}
+static inline unsigned int cpuid_edx(unsigned int op)
+{
+ unsigned int eax, edx;
+
+ __asm__("cpuid"
+ : "=a" (eax), "=d" (edx)
+ : "0" (op)
+ : "bx", "cx");
+ return edx;
+}
+
+
+/*
+ * Intel CPU flags in CR0
+ */
+#define X86_CR0_PE 0x00000001 /* Enable Protected Mode (RW) */
+#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor (RW) */
+#define X86_CR0_EM 0x00000004 /* Require FPU Emulation (RO) */
+#define X86_CR0_TS 0x00000008 /* Task Switched (RW) */
+#define X86_CR0_NE 0x00000020 /* Numeric Error Reporting (RW) */
+#define X86_CR0_WP 0x00010000 /* Supervisor Write Protect (RW) */
+#define X86_CR0_AM 0x00040000 /* Alignment Checking (RW) */
+#define X86_CR0_NW 0x20000000 /* Not Write-Through (RW) */
+#define X86_CR0_CD 0x40000000 /* Cache Disable (RW) */
+#define X86_CR0_PG 0x80000000 /* Paging (RW) */
+
+#define read_cr0() ({ \
+ unsigned int __dummy; \
+ __asm__( \
+ "movl %%cr0,%0\n\t" \
+ :"=r" (__dummy)); \
+ __dummy; \
+})
+
+#define write_cr0(x) \
+ __asm__("movl %0,%%cr0": :"r" (x));
+
+
+/*
+ * Intel CPU features in CR4
+ */
+#define X86_CR4_VME 0x0001 /* enable vm86 extensions */
+#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
+#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
+#define X86_CR4_DE 0x0008 /* enable debugging extensions */
+#define X86_CR4_PSE 0x0010 /* enable page size extensions */
+#define X86_CR4_PAE 0x0020 /* enable physical address extensions */
+#define X86_CR4_MCE 0x0040 /* Machine check enable */
+#define X86_CR4_PGE 0x0080 /* enable global pages */
+#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
+#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */
+#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */
+
+/*
+ * Save the cr4 feature set we're using (ie
+ * Pentium 4MB enable and PPro Global page
+ * enable), so that any CPU's that boot up
+ * after us can get the correct flags.
+ */
+extern unsigned long mmu_cr4_features;
+
+static inline void set_in_cr4 (unsigned long mask)
+{
+ mmu_cr4_features |= mask;
+ __asm__("movl %%cr4,%%eax\n\t"
+ "orl %0,%%eax\n\t"
+ "movl %%eax,%%cr4\n"
+ : : "irg" (mask)
+ :"ax");
+}
+
+static inline void clear_in_cr4 (unsigned long mask)
+{
+ mmu_cr4_features &= ~mask;
+ __asm__("movl %%cr4,%%eax\n\t"
+ "andl %0,%%eax\n\t"
+ "movl %%eax,%%cr4\n"
+ : : "irg" (~mask)
+ :"ax");
+}
+
+
+
+/*
+ * Cyrix CPU configuration register indexes
+ */
+#define CX86_CCR0 0xc0
+#define CX86_CCR1 0xc1
+#define CX86_CCR2 0xc2
+#define CX86_CCR3 0xc3
+#define CX86_CCR4 0xe8
+#define CX86_CCR5 0xe9
+#define CX86_CCR6 0xea
+#define CX86_CCR7 0xeb
+#define CX86_DIR0 0xfe
+#define CX86_DIR1 0xff
+#define CX86_ARR_BASE 0xc4
+#define CX86_RCR_BASE 0xdc
+
+/*
+ * Cyrix CPU indexed register access macros
+ */
+
+#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); })
+
+#define setCx86(reg, data) do { \
+ outb((reg), 0x22); \
+ outb((data), 0x23); \
+} while (0)
+
+#define EISA_bus (0)
+#define MCA_bus (0)
+
+/* from system description table in BIOS. Mostly for MCA use, but
+others may find it useful. */
+extern unsigned int machine_id;
+extern unsigned int machine_submodel_id;
+extern unsigned int BIOS_revision;
+extern unsigned int mca_pentium_flag;
+
+/*
+ * User space process size: 3GB (default).
+ */
+#define TASK_SIZE (PAGE_OFFSET)
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE (TASK_SIZE / 3)
+
+/*
+ * Size of io_bitmap in longwords:
+ * For Xen we support the full 8kbyte IO bitmap but use the io_bitmap_sel field
+ * of the task_struct to avoid a full 8kbyte copy when switching to / from
+ * domains with bits cleared.
+ */
+#define IO_BITMAP_SIZE 2048
+#define IO_BITMAP_BYTES (IO_BITMAP_SIZE * 4)
+#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
+#define INVALID_IO_BITMAP_OFFSET 0x8000
+
+struct i387_fsave_struct {
+ long cwd;
+ long swd;
+ long twd;
+ long fip;
+ long fcs;
+ long foo;
+ long fos;
+ long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */
+ long status; /* software status information */
+};
+
+struct i387_fxsave_struct {
+ unsigned short cwd;
+ unsigned short swd;
+ unsigned short twd;
+ unsigned short fop;
+ long fip;
+ long fcs;
+ long foo;
+ long fos;
+ long mxcsr;
+ long reserved;
+ long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
+ long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */
+ long padding[56];
+} __attribute__ ((aligned (16)));
+
+struct i387_soft_struct {
+ long cwd;
+ long swd;
+ long twd;
+ long fip;
+ long fcs;
+ long foo;
+ long fos;
+ long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */
+ unsigned char ftop, changed, lookahead, no_update, rm, alimit;
+ struct info *info;
+ unsigned long entry_eip;
+};
+
+union i387_union {
+ struct i387_fsave_struct fsave;
+ struct i387_fxsave_struct fxsave;
+ struct i387_soft_struct soft;
+};
+
+typedef struct {
+ unsigned long seg;
+} mm_segment_t;
+
+struct tss_struct {
+ unsigned short back_link,__blh;
+ unsigned long esp0;
+ unsigned short ss0,__ss0h;
+ unsigned long esp1;
+ unsigned short ss1,__ss1h;
+ unsigned long esp2;
+ unsigned short ss2,__ss2h;
+ unsigned long __cr3;
+ unsigned long eip;
+ unsigned long eflags;
+ unsigned long eax,ecx,edx,ebx;
+ unsigned long esp;
+ unsigned long ebp;
+ unsigned long esi;
+ unsigned long edi;
+ unsigned short es, __esh;
+ unsigned short cs, __csh;
+ unsigned short ss, __ssh;
+ unsigned short ds, __dsh;
+ unsigned short fs, __fsh;
+ unsigned short gs, __gsh;
+ unsigned short ldt, __ldth;
+ unsigned short trace, bitmap;
+ unsigned long io_bitmap[IO_BITMAP_SIZE+1];
+ /*
+ * pads the TSS to be cacheline-aligned (total size is 0x2080)
+ */
+ unsigned long __cacheline_filler[5];
+};
+
+struct thread_struct {
+ unsigned long guestos_sp, guestos_ss;
+/* Hardware debugging registers */
+ unsigned long debugreg[8]; /* %%db0-7 debug registers */
+/* floating point info */
+ union i387_union i387;
+/* Trap info. */
+ int fast_trap_idx;
+ struct desc_struct fast_trap_desc;
+ trap_info_t traps[256];
+};
+
+#define IDT_ENTRIES 256
+extern struct desc_struct idt_table[];
+extern struct desc_struct *idt_tables[];
+
+#define SET_DEFAULT_FAST_TRAP(_p) \
+ (_p)->fast_trap_idx = 0x20; \
+ (_p)->fast_trap_desc.a = 0; \
+ (_p)->fast_trap_desc.b = 0;
+
+#define CLEAR_FAST_TRAP(_p) \
+ (memset(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \
+ 0, 8))
+
+#ifdef XEN_DEBUGGER
+#define SET_FAST_TRAP(_p) \
+ (pdb_initialized ? (void *) 0 : \
+ (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \
+ &((_p)->fast_trap_desc), 8)))
+#else
+#define SET_FAST_TRAP(_p) \
+ (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \
+ &((_p)->fast_trap_desc), 8))
+#endif
+
+long set_fast_trap(struct task_struct *p, int idx);
+
+#define INIT_THREAD { \
+ 0, 0, \
+ { [0 ... 7] = 0 }, /* debugging registers */ \
+ { { 0, }, }, /* 387 state */ \
+ 0x20, { 0, 0 }, /* DEFAULT_FAST_TRAP */ \
+ { {0} } /* io permissions */ \
+}
+
+#define INIT_TSS { \
+ 0,0, /* back_link, __blh */ \
+ 0, /* esp0 */ \
+ 0, 0, /* ss0 */ \
+ 0,0,0,0,0,0, /* stack1, stack2 */ \
+ 0, /* cr3 */ \
+ 0,0, /* eip,eflags */ \
+ 0,0,0,0, /* eax,ecx,edx,ebx */ \
+ 0,0,0,0, /* esp,ebp,esi,edi */ \
+ 0,0,0,0,0,0, /* es,cs,ss */ \
+ 0,0,0,0,0,0, /* ds,fs,gs */ \
+ 0,0, /* ldt */ \
+ 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \
+ { [0 ... IO_BITMAP_SIZE] = ~0UL }, /* ioperm */ \
+}
+
+struct mm_struct {
+ /*
+ * Every domain has a L1 pagetable of its own. Per-domain mappings
+ * are put in this table (eg. the current GDT is mapped here).
+ */
+ l1_pgentry_t *perdomain_pt;
+ pagetable_t pagetable;
+
+ /* shadow mode status and controls */
+ unsigned int shadow_mode; /* flags to control shadow table operation */
+ pagetable_t shadow_table;
+ spinlock_t shadow_lock;
+ unsigned int shadow_max_page_count; // currently unused
+
+ /* shadow hashtable */
+ struct shadow_status *shadow_ht;
+ struct shadow_status *shadow_ht_free;
+ struct shadow_status *shadow_ht_extras; /* extra allocation units */
+ unsigned int shadow_extras_count;
+
+ /* shadow dirty bitmap */
+ unsigned long *shadow_dirty_bitmap;
+ unsigned int shadow_dirty_bitmap_size; /* in pages, bit per page */
+
+ /* shadow mode stats */
+ unsigned int shadow_page_count;
+ unsigned int shadow_fault_count;
+ unsigned int shadow_dirty_count;
+
+
+ /* Current LDT details. */
+ unsigned long ldt_base, ldt_ents, shadow_ldt_mapcnt;
+ /* Next entry is passed to LGDT on domain switch. */
+ char gdt[6];
+};
+
+static inline void write_ptbase(struct mm_struct *mm)
+{
+ unsigned long pa;
+
+ if ( unlikely(mm->shadow_mode) )
+ pa = pagetable_val(mm->shadow_table);
+ else
+ pa = pagetable_val(mm->pagetable);
+
+ __asm__ __volatile__ ( "movl %0, %%cr3" : : "r" (pa) : "memory" );
+}
+
+#define IDLE0_MM \
+{ \
+ perdomain_pt: 0, \
+ pagetable: mk_pagetable(__pa(idle_pg_table)) \
+}
+
+/* Convenient accessor for mm.gdt. */
+#define SET_GDT_ENTRIES(_p, _e) ((*(u16 *)((_p)->mm.gdt + 0)) = (_e))
+#define SET_GDT_ADDRESS(_p, _a) ((*(u32 *)((_p)->mm.gdt + 2)) = (_a))
+#define GET_GDT_ENTRIES(_p) ((*(u16 *)((_p)->mm.gdt + 0)))
+#define GET_GDT_ADDRESS(_p) ((*(u32 *)((_p)->mm.gdt + 2)))
+
+long set_gdt(struct task_struct *p,
+ unsigned long *frames,
+ unsigned int entries);
+
+long set_debugreg(struct task_struct *p, int reg, unsigned long value);
+
+struct microcode {
+ unsigned int hdrver;
+ unsigned int rev;
+ unsigned int date;
+ unsigned int sig;
+ unsigned int cksum;
+ unsigned int ldrver;
+ unsigned int pf;
+ unsigned int reserved[5];
+ unsigned int bits[500];
+};
+
+/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */
+#define MICROCODE_IOCFREE _IO('6',0)
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static inline void rep_nop(void)
+{
+ __asm__ __volatile__("rep;nop");
+}
+
+#define cpu_relax() rep_nop()
+
+/* Prefetch instructions for Pentium III and AMD Athlon */
+#ifdef CONFIG_MPENTIUMIII
+
+#define ARCH_HAS_PREFETCH
+extern inline void prefetch(const void *x)
+{
+ __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x));
+}
+
+#elif CONFIG_X86_USE_3DNOW
+
+#define ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCHW
+#define ARCH_HAS_SPINLOCK_PREFETCH
+
+extern inline void prefetch(const void *x)
+{
+ __asm__ __volatile__ ("prefetch (%0)" : : "r"(x));
+}
+
+extern inline void prefetchw(const void *x)
+{
+ __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x));
+}
+#define spin_lock_prefetch(x) prefetchw(x)
+
+#endif
+
+#endif /* __ASM_I386_PROCESSOR_H */
--- /dev/null
+#ifndef _I386_PTRACE_H
+#define _I386_PTRACE_H
+
+struct pt_regs {
+ long ebx;
+ long ecx;
+ long edx;
+ long esi;
+ long edi;
+ long ebp;
+ long eax;
+ int xds;
+ int xes;
+ int xfs;
+ int xgs;
+ long orig_eax;
+ long eip;
+ int xcs;
+ long eflags;
+ long esp;
+ int xss;
+};
+
+enum EFLAGS {
+ EF_CF = 0x00000001,
+ EF_PF = 0x00000004,
+ EF_AF = 0x00000010,
+ EF_ZF = 0x00000040,
+ EF_SF = 0x00000080,
+ EF_TF = 0x00000100,
+ EF_IE = 0x00000200,
+ EF_DF = 0x00000400,
+ EF_OF = 0x00000800,
+ EF_IOPL = 0x00003000,
+ EF_IOPL_RING0 = 0x00000000,
+ EF_IOPL_RING1 = 0x00001000,
+ EF_IOPL_RING2 = 0x00002000,
+ EF_NT = 0x00004000, /* nested task */
+ EF_RF = 0x00010000, /* resume */
+ EF_VM = 0x00020000, /* virtual mode */
+ EF_AC = 0x00040000, /* alignment */
+ EF_VIF = 0x00080000, /* virtual interrupt */
+ EF_VIP = 0x00100000, /* virtual interrupt pending */
+ EF_ID = 0x00200000, /* id */
+};
+
+#ifdef __KERNEL__
+#define user_mode(regs) ((3 & (regs)->xcs))
+#endif
+
+#endif
--- /dev/null
+/* include/asm-x86/rwlock.h
+ *
+ * Helpers used by both rw spinlocks and rw semaphores.
+ *
+ * Based in part on code from semaphore.h and
+ * spinlock.h Copyright 1996 Linus Torvalds.
+ *
+ * Copyright 1999 Red Hat, Inc.
+ *
+ * Written by Benjamin LaHaise.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _ASM_X86_RWLOCK_H
+#define _ASM_X86_RWLOCK_H
+
+#define RW_LOCK_BIAS 0x01000000
+#define RW_LOCK_BIAS_STR "0x01000000"
+
+#define __build_read_lock_ptr(rw, helper) \
+ asm volatile(LOCK "subl $1,(%0)\n\t" \
+ "js 2f\n" \
+ "1:\n" \
+ ".section .text.lock,\"ax\"\n" \
+ "2:\tcall " helper "\n\t" \
+ "jmp 1b\n" \
+ ".previous" \
+ ::"a" (rw) : "memory")
+
+#define __build_read_lock_const(rw, helper) \
+ asm volatile(LOCK "subl $1,%0\n\t" \
+ "js 2f\n" \
+ "1:\n" \
+ ".section .text.lock,\"ax\"\n" \
+ "2:\tpush"__OS" %%"__OP"ax\n\t" \
+ "lea"__OS" %0,%%"__OP"ax\n\t" \
+ "call " helper "\n\t" \
+ "pop"__OS" %%"__OP"ax\n\t" \
+ "jmp 1b\n" \
+ ".previous" \
+ :"=m" (*(volatile int *)rw) : : "memory")
+
+#define __build_read_lock(rw, helper) do { \
+ if (__builtin_constant_p(rw)) \
+ __build_read_lock_const(rw, helper); \
+ else \
+ __build_read_lock_ptr(rw, helper); \
+ } while (0)
+
+#define __build_write_lock_ptr(rw, helper) \
+ asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
+ "jnz 2f\n" \
+ "1:\n" \
+ ".section .text.lock,\"ax\"\n" \
+ "2:\tcall " helper "\n\t" \
+ "jmp 1b\n" \
+ ".previous" \
+ ::"a" (rw) : "memory")
+
+#define __build_write_lock_const(rw, helper) \
+ asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
+ "jnz 2f\n" \
+ "1:\n" \
+ ".section .text.lock,\"ax\"\n" \
+ "2:\tpush"__OS" %%"__OP"ax\n\t" \
+ "lea"__OS" %0,%%"__OP"ax\n\t" \
+ "call " helper "\n\t" \
+ "pop"__OS" %%"__OP"ax\n\t" \
+ "jmp 1b\n" \
+ ".previous" \
+ :"=m" (*(volatile int *)rw) : : "memory")
+
+#define __build_write_lock(rw, helper) do { \
+ if (__builtin_constant_p(rw)) \
+ __build_write_lock_const(rw, helper); \
+ else \
+ __build_write_lock_ptr(rw, helper); \
+ } while (0)
+
+#endif
--- /dev/null
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+/*
+ * We need the APIC definitions automatically as part of 'smp.h'
+ */
+#ifndef __ASSEMBLY__
+#include <xen/config.h>
+/*#include <xen/threads.h>*/
+#include <asm/ptrace.h>
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#ifndef __ASSEMBLY__
+#include <asm/fixmap.h>
+#include <asm/bitops.h>
+#include <asm/mpspec.h>
+#ifdef CONFIG_X86_IO_APIC
+#include <asm/io_apic.h>
+#endif
+#include <asm/apic.h>
+#endif
+#endif
+
+#ifdef CONFIG_SMP
+#ifndef __ASSEMBLY__
+
+/*
+ * Private routines/data
+ */
+
+extern void smp_alloc_memory(void);
+extern unsigned long phys_cpu_present_map;
+extern unsigned long cpu_online_map;
+extern volatile unsigned long smp_invalidate_needed;
+extern int pic_mode;
+extern int smp_num_siblings;
+extern int cpu_sibling_map[];
+
+extern void smp_flush_tlb(void);
+extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
+extern void smp_send_reschedule(int cpu);
+extern void smp_invalidate_rcv(void); /* Process an NMI */
+extern void (*mtrr_hook) (void);
+extern void zap_low_mappings (void);
+
+/*
+ * On x86 all CPUs are mapped 1:1 to the APIC space.
+ * This simplifies scheduling and IPI sending and
+ * compresses data structures.
+ */
+static inline int cpu_logical_map(int cpu)
+{
+ return cpu;
+}
+static inline int cpu_number_map(int cpu)
+{
+ return cpu;
+}
+
+/*
+ * Some lowlevel functions might want to know about
+ * the real APIC ID <-> CPU # mapping.
+ */
+#define MAX_APICID 256
+extern volatile int cpu_to_physical_apicid[NR_CPUS];
+extern volatile int physical_apicid_to_cpu[MAX_APICID];
+extern volatile int cpu_to_logical_apicid[NR_CPUS];
+extern volatile int logical_apicid_to_cpu[MAX_APICID];
+
+/*
+ * General functions that each host system must provide.
+ */
+
+/*extern void smp_boot_cpus(void);*/
+extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial udelay numbers */
+
+/*
+ * This function is needed by all SMP systems. It must _always_ be valid
+ * from the initial startup. We map APIC_BASE very early in page_setup(),
+ * so this is correct in the x86 case.
+ */
+
+#ifdef x86_32
+#define smp_processor_id() (current->processor)
+#else
+#include <asm/pda.h>
+#define smp_processor_id() read_pda(cpunumber)
+#endif
+
+static __inline int hard_smp_processor_id(void)
+{
+ /* we don't want to mark this access volatile - bad code generation */
+ return GET_APIC_ID(*(unsigned *)(APIC_BASE+APIC_ID));
+}
+
+static __inline int logical_smp_processor_id(void)
+{
+ /* we don't want to mark this access volatile - bad code generation */
+ return GET_APIC_LOGICAL_ID(*(unsigned *)(APIC_BASE+APIC_LDR));
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#define NO_PROC_ID 0xFF /* No processor magic marker */
+
+/*
+ * This magic constant controls our willingness to transfer
+ * a process across CPUs. Such a transfer incurs misses on the L1
+ * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My
+ * gut feeling is this will vary by board in value. For a board
+ * with separate L2 cache it probably depends also on the RSS, and
+ * for a board with shared L2 cache it ought to decay fast as other
+ * processes are run.
+ */
+
+#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */
+
+#endif
+#endif
--- /dev/null
+#ifndef __ASM_SMPBOOT_H
+#define __ASM_SMPBOOT_H
+
+/*emum for clustered_apic_mode values*/
+enum{
+ CLUSTERED_APIC_NONE = 0,
+ CLUSTERED_APIC_XAPIC,
+ CLUSTERED_APIC_NUMAQ
+};
+
+#ifdef CONFIG_X86_CLUSTERED_APIC
+extern unsigned int apic_broadcast_id;
+extern unsigned char clustered_apic_mode;
+extern unsigned char esr_disable;
+extern unsigned char int_delivery_mode;
+extern unsigned int int_dest_addr_mode;
+extern int cyclone_setup(char*);
+
+static inline void detect_clustered_apic(char* oem, char* prod)
+{
+ /*
+ * Can't recognize Summit xAPICs at present, so use the OEM ID.
+ */
+ if (!strncmp(oem, "IBM ENSW", 8) && !strncmp(prod, "VIGIL SMP", 9)){
+ clustered_apic_mode = CLUSTERED_APIC_XAPIC;
+ apic_broadcast_id = APIC_BROADCAST_ID_XAPIC;
+ int_dest_addr_mode = APIC_DEST_PHYSICAL;
+ int_delivery_mode = dest_Fixed;
+ esr_disable = 1;
+ /*Start cyclone clock*/
+ cyclone_setup(0);
+ /* check for ACPI tables */
+ } else if (!strncmp(oem, "IBM", 3) &&
+ (!strncmp(prod, "SERVIGIL", 8) ||
+ !strncmp(prod, "EXA", 3) ||
+ !strncmp(prod, "RUTHLESS", 8))){
+ clustered_apic_mode = CLUSTERED_APIC_XAPIC;
+ apic_broadcast_id = APIC_BROADCAST_ID_XAPIC;
+ int_dest_addr_mode = APIC_DEST_PHYSICAL;
+ int_delivery_mode = dest_Fixed;
+ esr_disable = 1;
+ /*Start cyclone clock*/
+ cyclone_setup(0);
+ } else if (!strncmp(oem, "IBM NUMA", 8)){
+ clustered_apic_mode = CLUSTERED_APIC_NUMAQ;
+ apic_broadcast_id = APIC_BROADCAST_ID_APIC;
+ int_dest_addr_mode = APIC_DEST_LOGICAL;
+ int_delivery_mode = dest_LowestPrio;
+ esr_disable = 1;
+ }
+}
+#define INT_DEST_ADDR_MODE (int_dest_addr_mode)
+#define INT_DELIVERY_MODE (int_delivery_mode)
+#else /* CONFIG_X86_CLUSTERED_APIC */
+#define apic_broadcast_id (APIC_BROADCAST_ID_APIC)
+#define clustered_apic_mode (CLUSTERED_APIC_NONE)
+#define esr_disable (0)
+#define detect_clustered_apic(x,y)
+#define INT_DEST_ADDR_MODE (APIC_DEST_LOGICAL) /* logical delivery */
+#define INT_DELIVERY_MODE (dest_LowestPrio)
+#endif /* CONFIG_X86_CLUSTERED_APIC */
+#define BAD_APICID 0xFFu
+
+#define TRAMPOLINE_LOW phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0x8:0x467)
+#define TRAMPOLINE_HIGH phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0xa:0x469)
+
+#define boot_cpu_apicid ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?boot_cpu_logical_apicid:boot_cpu_physical_apicid)
+
+extern unsigned char raw_phys_apicid[NR_CPUS];
+
+/*
+ * How to map from the cpu_present_map
+ */
+static inline int cpu_present_to_apicid(int mps_cpu)
+{
+ if (clustered_apic_mode == CLUSTERED_APIC_XAPIC)
+ return raw_phys_apicid[mps_cpu];
+ if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
+ return (mps_cpu/4)*16 + (1<<(mps_cpu%4));
+ return mps_cpu;
+}
+
+static inline unsigned long apicid_to_phys_cpu_present(int apicid)
+{
+ if(clustered_apic_mode)
+ return 1UL << (((apicid >> 4) << 2) + (apicid & 0x3));
+ return 1UL << apicid;
+}
+
+#define physical_to_logical_apicid(phys_apic) ( (1ul << (phys_apic & 0x3)) | (phys_apic & 0xF0u) )
+
+/*
+ * Mappings between logical cpu number and logical / physical apicid
+ * The first four macros are trivial, but it keeps the abstraction consistent
+ */
+extern volatile int logical_apicid_2_cpu[];
+extern volatile int cpu_2_logical_apicid[];
+extern volatile int physical_apicid_2_cpu[];
+extern volatile int cpu_2_physical_apicid[];
+
+#define logical_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid]
+#define cpu_to_logical_apicid(cpu) cpu_2_logical_apicid[cpu]
+#define physical_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
+#define cpu_to_physical_apicid(cpu) cpu_2_physical_apicid[cpu]
+#ifdef CONFIG_MULTIQUAD /* use logical IDs to bootstrap */
+#define boot_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid]
+#define cpu_to_boot_apicid(cpu) cpu_2_logical_apicid[cpu]
+#else /* !CONFIG_MULTIQUAD */ /* use physical IDs to bootstrap */
+#define boot_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
+#define cpu_to_boot_apicid(cpu) cpu_2_physical_apicid[cpu]
+#endif /* CONFIG_MULTIQUAD */
+
+#ifdef CONFIG_X86_CLUSTERED_APIC
+static inline int target_cpus(void)
+{
+ static int cpu;
+ switch(clustered_apic_mode){
+ case CLUSTERED_APIC_NUMAQ:
+ /* Broadcast intrs to local quad only. */
+ return APIC_BROADCAST_ID_APIC;
+ case CLUSTERED_APIC_XAPIC:
+ /*round robin the interrupts*/
+ cpu = (cpu+1)%smp_num_cpus;
+ return cpu_to_physical_apicid(cpu);
+ default:
+ }
+ return cpu_online_map;
+}
+#else
+#define target_cpus() (cpu_online_map)
+#endif
+#endif
--- /dev/null
+#ifndef __ASM_SOFTIRQ_H
+#define __ASM_SOFTIRQ_H
+
+#include <asm/atomic.h>
+#include <asm/hardirq.h>
+
+#define cpu_bh_enable(cpu) \
+ do { barrier(); local_bh_count(cpu)--; } while (0)
+#define cpu_bh_disable(cpu) \
+ do { local_bh_count(cpu)++; barrier(); } while (0)
+
+#define local_bh_disable() cpu_bh_disable(smp_processor_id())
+#define local_bh_enable() cpu_bh_enable(smp_processor_id())
+
+#define in_softirq() (local_bh_count(smp_processor_id()) != 0)
+
+#endif /* __ASM_SOFTIRQ_H */
--- /dev/null
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <asm/atomic.h>
+#include <asm/rwlock.h>
+
+#if 0
+#define SPINLOCK_DEBUG 1
+#else
+#define SPINLOCK_DEBUG 0
+#endif
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ */
+
+typedef struct {
+ volatile unsigned int lock;
+#if SPINLOCK_DEBUG
+ unsigned magic;
+#endif
+} spinlock_t;
+
+#define SPINLOCK_MAGIC 0xdead4ead
+
+#if SPINLOCK_DEBUG
+#define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC
+#else
+#define SPINLOCK_MAGIC_INIT /* */
+#endif
+
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT }
+
+#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
+
+/*
+ * Simple spin lock operations. There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ */
+
+#define spin_is_locked(x) (*(volatile char *)(&(x)->lock) <= 0)
+#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x))
+
+#define spin_lock_string \
+ "\n1:\t" \
+ "lock ; decb %0\n\t" \
+ "js 2f\n" \
+ ".section .text.lock,\"ax\"\n" \
+ "2:\t" \
+ "cmpb $0,%0\n\t" \
+ "rep;nop\n\t" \
+ "jle 2b\n\t" \
+ "jmp 1b\n" \
+ ".previous"
+
+/*
+ * This works. Despite all the confusion.
+ * (except on PPro SMP or if we are using OOSTORE)
+ * (PPro errata 66, 92)
+ */
+
+#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
+
+#define spin_unlock_string \
+ "movb $1,%0" \
+ :"=m" (lock->lock) : : "memory"
+
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+#if SPINLOCK_DEBUG
+ if (lock->magic != SPINLOCK_MAGIC)
+ BUG();
+ if (!spin_is_locked(lock))
+ BUG();
+#endif
+ __asm__ __volatile__(
+ spin_unlock_string
+ );
+}
+
+#else
+
+#define spin_unlock_string \
+ "xchgb %b0, %1" \
+ :"=q" (oldval), "=m" (lock->lock) \
+ :"0" (oldval) : "memory"
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+ char oldval = 1;
+#if SPINLOCK_DEBUG
+ if (lock->magic != SPINLOCK_MAGIC)
+ BUG();
+ if (!spin_is_locked(lock))
+ BUG();
+#endif
+ __asm__ __volatile__(
+ spin_unlock_string
+ );
+}
+
+#endif
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+ char oldval;
+ __asm__ __volatile__(
+ "xchgb %b0,%1"
+ :"=q" (oldval), "=m" (lock->lock)
+ :"0" (0) : "memory");
+ return oldval > 0;
+}
+
+static inline void spin_lock(spinlock_t *lock)
+{
+#if SPINLOCK_DEBUG
+ __label__ here;
+here:
+ if (lock->magic != SPINLOCK_MAGIC) {
+printk("eip: %p\n", &&here);
+ BUG();
+ }
+#endif
+ __asm__ __volatile__(
+ spin_lock_string
+ :"=m" (lock->lock) : : "memory");
+}
+
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+typedef struct {
+ volatile unsigned int lock;
+#if SPINLOCK_DEBUG
+ unsigned magic;
+#endif
+} rwlock_t;
+
+#define RWLOCK_MAGIC 0xdeaf1eed
+
+#if SPINLOCK_DEBUG
+#define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC
+#else
+#define RWLOCK_MAGIC_INIT /* */
+#endif
+
+#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
+
+#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
+
+/*
+ * On x86, we implement read-write locks as a 32-bit counter
+ * with the high bit (sign) being the "contended" bit.
+ *
+ * The inline assembly is non-obvious. Think about it.
+ *
+ * Changed to use the same technique as rw semaphores. See
+ * semaphore.h for details. -ben
+ */
+/* the spinlock helpers are in arch/x86/kernel/semaphore.c */
+
+static inline void read_lock(rwlock_t *rw)
+{
+#if SPINLOCK_DEBUG
+ if (rw->magic != RWLOCK_MAGIC)
+ BUG();
+#endif
+ __build_read_lock(rw, "__read_lock_failed");
+}
+
+static inline void write_lock(rwlock_t *rw)
+{
+#if SPINLOCK_DEBUG
+ if (rw->magic != RWLOCK_MAGIC)
+ BUG();
+#endif
+ __build_write_lock(rw, "__write_lock_failed");
+}
+
+#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
+#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
+
+static inline int write_trylock(rwlock_t *lock)
+{
+ atomic_t *count = (atomic_t *)lock;
+ if (atomic_sub_and_test(RW_LOCK_BIAS, count))
+ return 1;
+ atomic_add(RW_LOCK_BIAS, count);
+ return 0;
+}
+
+#endif /* __ASM_SPINLOCK_H */
--- /dev/null
+#ifndef _I386_STRING_H_
+#define _I386_STRING_H_
+
+#ifdef __KERNEL__
+#include <xen/config.h>
+/*
+ * On a 486 or Pentium, we are better off not using the
+ * byte string operations. But on a 386 or a PPro the
+ * byte string ops are faster than doing it by hand
+ * (MUCH faster on a Pentium).
+ *
+ * Also, the byte strings actually work correctly. Forget
+ * the i486 routines for now as they may be broken..
+ */
+#if FIXED_486_STRING && defined(CONFIG_X86_USE_STRING_486)
+#include <asm/string-486.h>
+#else
+
+/*
+ * This string-include defines all string functions as inline
+ * functions. Use gcc. It also assumes ds=es=data space, this should be
+ * normal. Most of the string-functions are rather heavily hand-optimized,
+ * see especially strtok,strstr,str[c]spn. They should work, but are not
+ * very easy to understand. Everything is done entirely within the register
+ * set, making the functions fast and clean. String instructions have been
+ * used through-out, making for "slightly" unclear code :-)
+ *
+ * NO Copyright (C) 1991, 1992 Linus Torvalds,
+ * consider these trivial functions to be PD.
+ */
+
+
+#define __HAVE_ARCH_STRCPY
+static inline char * strcpy(char * dest,const char *src)
+{
+int d0, d1, d2;
+__asm__ __volatile__(
+ "1:\tlodsb\n\t"
+ "stosb\n\t"
+ "testb %%al,%%al\n\t"
+ "jne 1b"
+ : "=&S" (d0), "=&D" (d1), "=&a" (d2)
+ :"0" (src),"1" (dest) : "memory");
+return dest;
+}
+
+#define __HAVE_ARCH_STRNCPY
+static inline char * strncpy(char * dest,const char *src,size_t count)
+{
+int d0, d1, d2, d3;
+__asm__ __volatile__(
+ "1:\tdecl %2\n\t"
+ "js 2f\n\t"
+ "lodsb\n\t"
+ "stosb\n\t"
+ "testb %%al,%%al\n\t"
+ "jne 1b\n\t"
+ "rep\n\t"
+ "stosb\n"
+ "2:"
+ : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
+ :"0" (src),"1" (dest),"2" (count) : "memory");
+return dest;
+}
+
+#define __HAVE_ARCH_STRCAT
+static inline char * strcat(char * dest,const char * src)
+{
+int d0, d1, d2, d3;
+__asm__ __volatile__(
+ "repne\n\t"
+ "scasb\n\t"
+ "decl %1\n"
+ "1:\tlodsb\n\t"
+ "stosb\n\t"
+ "testb %%al,%%al\n\t"
+ "jne 1b"
+ : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
+ : "0" (src), "1" (dest), "2" (0), "3" (0xffffffff):"memory");
+return dest;
+}
+
+#define __HAVE_ARCH_STRNCAT
+static inline char * strncat(char * dest,const char * src,size_t count)
+{
+int d0, d1, d2, d3;
+__asm__ __volatile__(
+ "repne\n\t"
+ "scasb\n\t"
+ "decl %1\n\t"
+ "movl %8,%3\n"
+ "1:\tdecl %3\n\t"
+ "js 2f\n\t"
+ "lodsb\n\t"
+ "stosb\n\t"
+ "testb %%al,%%al\n\t"
+ "jne 1b\n"
+ "2:\txorl %2,%2\n\t"
+ "stosb"
+ : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
+ : "0" (src),"1" (dest),"2" (0),"3" (0xffffffff), "g" (count)
+ : "memory");
+return dest;
+}
+
+#define __HAVE_ARCH_STRCMP
+static inline int strcmp(const char * cs,const char * ct)
+{
+int d0, d1;
+register int __res;
+__asm__ __volatile__(
+ "1:\tlodsb\n\t"
+ "scasb\n\t"
+ "jne 2f\n\t"
+ "testb %%al,%%al\n\t"
+ "jne 1b\n\t"
+ "xorl %%eax,%%eax\n\t"
+ "jmp 3f\n"
+ "2:\tsbbl %%eax,%%eax\n\t"
+ "orb $1,%%al\n"
+ "3:"
+ :"=a" (__res), "=&S" (d0), "=&D" (d1)
+ :"1" (cs),"2" (ct));
+return __res;
+}
+
+#define __HAVE_ARCH_STRNCMP
+static inline int strncmp(const char * cs,const char * ct,size_t count)
+{
+register int __res;
+int d0, d1, d2;
+__asm__ __volatile__(
+ "1:\tdecl %3\n\t"
+ "js 2f\n\t"
+ "lodsb\n\t"
+ "scasb\n\t"
+ "jne 3f\n\t"
+ "testb %%al,%%al\n\t"
+ "jne 1b\n"
+ "2:\txorl %%eax,%%eax\n\t"
+ "jmp 4f\n"
+ "3:\tsbbl %%eax,%%eax\n\t"
+ "orb $1,%%al\n"
+ "4:"
+ :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
+ :"1" (cs),"2" (ct),"3" (count));
+return __res;
+}
+
+#define __HAVE_ARCH_STRCHR
+static inline char * strchr(const char * s, int c)
+{
+int d0;
+register char * __res;
+__asm__ __volatile__(
+ "movb %%al,%%ah\n"
+ "1:\tlodsb\n\t"
+ "cmpb %%ah,%%al\n\t"
+ "je 2f\n\t"
+ "testb %%al,%%al\n\t"
+ "jne 1b\n\t"
+ "movl $1,%1\n"
+ "2:\tmovl %1,%0\n\t"
+ "decl %0"
+ :"=a" (__res), "=&S" (d0) : "1" (s),"0" (c));
+return __res;
+}
+
+#define __HAVE_ARCH_STRRCHR
+static inline char * strrchr(const char * s, int c)
+{
+int d0, d1;
+register char * __res;
+__asm__ __volatile__(
+ "movb %%al,%%ah\n"
+ "1:\tlodsb\n\t"
+ "cmpb %%ah,%%al\n\t"
+ "jne 2f\n\t"
+ "leal -1(%%esi),%0\n"
+ "2:\ttestb %%al,%%al\n\t"
+ "jne 1b"
+ :"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c));
+return __res;
+}
+
+#define __HAVE_ARCH_STRLEN
+static inline size_t strlen(const char * s)
+{
+int d0;
+register int __res;
+__asm__ __volatile__(
+ "repne\n\t"
+ "scasb\n\t"
+ "notl %0\n\t"
+ "decl %0"
+ :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff));
+return __res;
+}
+
+static inline void * __memcpy(void * to, const void * from, size_t n)
+{
+int d0, d1, d2;
+__asm__ __volatile__(
+ "rep ; movsl\n\t"
+ "testb $2,%b4\n\t"
+ "je 1f\n\t"
+ "movsw\n"
+ "1:\ttestb $1,%b4\n\t"
+ "je 2f\n\t"
+ "movsb\n"
+ "2:"
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+ :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
+ : "memory");
+return (to);
+}
+
+/*
+ * This looks horribly ugly, but the compiler can optimize it totally,
+ * as the count is constant.
+ */
+static inline void * __constant_memcpy(void * to, const void * from, size_t n)
+{
+ switch (n) {
+ case 0:
+ return to;
+ case 1:
+ *(unsigned char *)to = *(const unsigned char *)from;
+ return to;
+ case 2:
+ *(unsigned short *)to = *(const unsigned short *)from;
+ return to;
+ case 3:
+ *(unsigned short *)to = *(const unsigned short *)from;
+ *(2+(unsigned char *)to) = *(2+(const unsigned char *)from);
+ return to;
+ case 4:
+ *(unsigned long *)to = *(const unsigned long *)from;
+ return to;
+ case 6: /* for Ethernet addresses */
+ *(unsigned long *)to = *(const unsigned long *)from;
+ *(2+(unsigned short *)to) = *(2+(const unsigned short *)from);
+ return to;
+ case 8:
+ *(unsigned long *)to = *(const unsigned long *)from;
+ *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
+ return to;
+ case 12:
+ *(unsigned long *)to = *(const unsigned long *)from;
+ *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
+ *(2+(unsigned long *)to) = *(2+(const unsigned long *)from);
+ return to;
+ case 16:
+ *(unsigned long *)to = *(const unsigned long *)from;
+ *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
+ *(2+(unsigned long *)to) = *(2+(const unsigned long *)from);
+ *(3+(unsigned long *)to) = *(3+(const unsigned long *)from);
+ return to;
+ case 20:
+ *(unsigned long *)to = *(const unsigned long *)from;
+ *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
+ *(2+(unsigned long *)to) = *(2+(const unsigned long *)from);
+ *(3+(unsigned long *)to) = *(3+(const unsigned long *)from);
+ *(4+(unsigned long *)to) = *(4+(const unsigned long *)from);
+ return to;
+ }
+#define COMMON(x) \
+__asm__ __volatile__( \
+ "rep ; movsl" \
+ x \
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2) \
+ : "0" (n/4),"1" ((long) to),"2" ((long) from) \
+ : "memory");
+{
+ int d0, d1, d2;
+ switch (n % 4) {
+ case 0: COMMON(""); return to;
+ case 1: COMMON("\n\tmovsb"); return to;
+ case 2: COMMON("\n\tmovsw"); return to;
+ default: COMMON("\n\tmovsw\n\tmovsb"); return to;
+ }
+}
+
+#undef COMMON
+}
+
+#define __HAVE_ARCH_MEMCPY
+
+#define memcpy(t, f, n) \
+(__builtin_constant_p(n) ? \
+ __constant_memcpy((t),(f),(n)) : \
+ __memcpy((t),(f),(n)))
+
+
+/*
+ * struct_cpy(x,y), copy structure *x into (matching structure) *y.
+ *
+ * We get link-time errors if the structure sizes do not match.
+ * There is no runtime overhead, it's all optimized away at
+ * compile time.
+ */
+//extern void __struct_cpy_bug (void);
+
+/*
+#define struct_cpy(x,y) \
+({ \
+ if (sizeof(*(x)) != sizeof(*(y))) \
+ __struct_cpy_bug; \
+ memcpy(x, y, sizeof(*(x))); \
+})
+*/
+
+#define __HAVE_ARCH_MEMMOVE
+static inline void * memmove(void * dest,const void * src, size_t n)
+{
+int d0, d1, d2;
+if (dest<src)
+__asm__ __volatile__(
+ "rep\n\t"
+ "movsb"
+ : "=&c" (d0), "=&S" (d1), "=&D" (d2)
+ :"0" (n),"1" (src),"2" (dest)
+ : "memory");
+else
+__asm__ __volatile__(
+ "std\n\t"
+ "rep\n\t"
+ "movsb\n\t"
+ "cld"
+ : "=&c" (d0), "=&S" (d1), "=&D" (d2)
+ :"0" (n),
+ "1" (n-1+(const char *)src),
+ "2" (n-1+(char *)dest)
+ :"memory");
+return dest;
+}
+
+#define memcmp __builtin_memcmp
+
+#define __HAVE_ARCH_MEMCHR
+static inline void * memchr(const void * cs,int c,size_t count)
+{
+int d0;
+register void * __res;
+if (!count)
+ return NULL;
+__asm__ __volatile__(
+ "repne\n\t"
+ "scasb\n\t"
+ "je 1f\n\t"
+ "movl $1,%0\n"
+ "1:\tdecl %0"
+ :"=D" (__res), "=&c" (d0) : "a" (c),"0" (cs),"1" (count));
+return __res;
+}
+
+static inline void * __memset_generic(void * s, char c,size_t count)
+{
+int d0, d1;
+__asm__ __volatile__(
+ "rep\n\t"
+ "stosb"
+ : "=&c" (d0), "=&D" (d1)
+ :"a" (c),"1" (s),"0" (count)
+ :"memory");
+return s;
+}
+
+/* we might want to write optimized versions of these later */
+#define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count))
+
+/*
+ * memset(x,0,y) is a reasonably common thing to do, so we want to fill
+ * things 32 bits at a time even when we don't know the size of the
+ * area at compile-time..
+ */
+static inline void * __constant_c_memset(void * s, unsigned long c, size_t count)
+{
+int d0, d1;
+__asm__ __volatile__(
+ "rep ; stosl\n\t"
+ "testb $2,%b3\n\t"
+ "je 1f\n\t"
+ "stosw\n"
+ "1:\ttestb $1,%b3\n\t"
+ "je 2f\n\t"
+ "stosb\n"
+ "2:"
+ : "=&c" (d0), "=&D" (d1)
+ :"a" (c), "q" (count), "0" (count/4), "1" ((long) s)
+ :"memory");
+return (s);
+}
+
+/* Added by Gertjan van Wingerde to make minix and sysv module work */
+#define __HAVE_ARCH_STRNLEN
+static inline size_t strnlen(const char * s, size_t count)
+{
+int d0;
+register int __res;
+__asm__ __volatile__(
+ "movl %2,%0\n\t"
+ "jmp 2f\n"
+ "1:\tcmpb $0,(%0)\n\t"
+ "je 3f\n\t"
+ "incl %0\n"
+ "2:\tdecl %1\n\t"
+ "cmpl $-1,%1\n\t"
+ "jne 1b\n"
+ "3:\tsubl %2,%0"
+ :"=a" (__res), "=&d" (d0)
+ :"c" (s),"1" (count));
+return __res;
+}
+/* end of additional stuff */
+
+//#define __HAVE_ARCH_STRSTR
+
+//extern char *strstr(const char *cs, const char *ct);
+
+/*
+ * This looks horribly ugly, but the compiler can optimize it totally,
+ * as we by now know that both pattern and count is constant..
+ */
+static inline void * __constant_c_and_count_memset(void * s, unsigned long pattern, size_t count)
+{
+ switch (count) {
+ case 0:
+ return s;
+ case 1:
+ *(unsigned char *)s = pattern;
+ return s;
+ case 2:
+ *(unsigned short *)s = pattern;
+ return s;
+ case 3:
+ *(unsigned short *)s = pattern;
+ *(2+(unsigned char *)s) = pattern;
+ return s;
+ case 4:
+ *(unsigned long *)s = pattern;
+ return s;
+ }
+#define COMMON(x) \
+__asm__ __volatile__( \
+ "rep ; stosl" \
+ x \
+ : "=&c" (d0), "=&D" (d1) \
+ : "a" (pattern),"0" (count/4),"1" ((long) s) \
+ : "memory")
+{
+ int d0, d1;
+ switch (count % 4) {
+ case 0: COMMON(""); return s;
+ case 1: COMMON("\n\tstosb"); return s;
+ case 2: COMMON("\n\tstosw"); return s;
+ default: COMMON("\n\tstosw\n\tstosb"); return s;
+ }
+}
+
+#undef COMMON
+}
+
+#define __constant_c_x_memset(s, c, count) \
+(__builtin_constant_p(count) ? \
+ __constant_c_and_count_memset((s),(c),(count)) : \
+ __constant_c_memset((s),(c),(count)))
+
+#define __memset(s, c, count) \
+(__builtin_constant_p(count) ? \
+ __constant_count_memset((s),(c),(count)) : \
+ __memset_generic((s),(c),(count)))
+
+#define __HAVE_ARCH_MEMSET
+#define memset(s, c, count) \
+(__builtin_constant_p(c) ? \
+ __constant_c_x_memset((s),(0x01010101UL*(unsigned char)(c)),(count)) : \
+ __memset((s),(c),(count)))
+
+/*
+ * find the first occurrence of byte 'c', or 1 past the area if none
+ */
+#define __HAVE_ARCH_MEMSCAN
+static inline void * memscan(void * addr, int c, size_t size)
+{
+ if (!size)
+ return addr;
+ __asm__("repnz; scasb\n\t"
+ "jnz 1f\n\t"
+ "dec %%edi\n"
+ "1:"
+ : "=D" (addr), "=c" (size)
+ : "0" (addr), "1" (size), "a" (c));
+ return addr;
+}
+
+#endif /* CONFIG_X86_USE_STRING_486 */
+#endif /* __KERNEL__ */
+
+#endif
--- /dev/null
+#ifndef __ASM_SYSTEM_H
+#define __ASM_SYSTEM_H
+
+#include <xen/config.h>
+#include <asm/bitops.h>
+
+/* Clear and set 'TS' bit respectively */
+#define clts() __asm__ __volatile__ ("clts")
+#define stts() write_cr0(X86_CR0_TS|read_cr0())
+
+#define wbinvd() \
+ __asm__ __volatile__ ("wbinvd": : :"memory");
+
+static inline unsigned long get_limit(unsigned long segment)
+{
+ unsigned long __limit;
+ __asm__("lsll %1,%0"
+ :"=r" (__limit):"r" (segment));
+ return __limit+1;
+}
+
+#define nop() __asm__ __volatile__ ("nop")
+
+#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
+
+struct __xchg_dummy { unsigned long a[100]; };
+#define __xg(x) ((struct __xchg_dummy *)(x))
+
+
+/*
+ * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
+ * Note 2: xchg has side effect, so that attribute volatile is necessary,
+ * but generally the primitive is invalid, *ptr is output argument. --ANK
+ */
+static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
+{
+ switch (size) {
+ case 1:
+ __asm__ __volatile__("xchgb %b0,%1"
+ :"=q" (x)
+ :"m" (*__xg(ptr)), "0" (x)
+ :"memory");
+ break;
+ case 2:
+ __asm__ __volatile__("xchgw %w0,%1"
+ :"=r" (x)
+ :"m" (*__xg(ptr)), "0" (x)
+ :"memory");
+ break;
+#ifdef x86_32
+ case 4:
+ __asm__ __volatile__("xchgl %0,%1"
+ :"=r" (x)
+ :"m" (*__xg(ptr)), "0" (x)
+ :"memory");
+ break;
+#else
+ case 4:
+ __asm__ __volatile__("xchgl %k0,%1"
+ :"=r" (x)
+ :"m" (*__xg(ptr)), "0" (x)
+ :"memory");
+ break;
+ case 8:
+ __asm__ __volatile__("xchgq %0,%1"
+ :"=r" (x)
+ :"m" (*__xg(ptr)), "0" (x)
+ :"memory");
+ break;
+#endif
+ }
+ return x;
+}
+
+/*
+ * Atomic compare and exchange. Compare OLD with MEM, if identical,
+ * store NEW in MEM. Return the initial value in MEM. Success is
+ * indicated by comparing RETURN with OLD.
+ */
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+ unsigned long new, int size)
+{
+ unsigned long prev;
+ switch (size) {
+ case 1:
+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+#ifdef x86_32
+ case 4:
+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+#else
+ case 4:
+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %k1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ case 8:
+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgq %1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+#endif
+ }
+ return old;
+}
+
+#define cmpxchg(ptr,o,n)\
+ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+ (unsigned long)(n),sizeof(*(ptr))))
+
+
+/*
+ * This function causes longword _o to be changed to _n at location _p.
+ * If this access causes a fault then we return 1, otherwise we return 0.
+ * If no fault occurs then _o is updated to teh value we saw at _p. If this
+ * is the same as the initial value of _o then _n is written to location _p.
+ */
+#define cmpxchg_user(_p,_o,_n) \
+({ \
+ int _rc; \
+ __asm__ __volatile__ ( \
+ "1: " LOCK_PREFIX "cmpxchg"__OS" %2,%3\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl $1,%1\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,3b\n" \
+ ".previous" \
+ : "=a" (_o), "=r" (_rc) \
+ : "q" (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \
+ : "memory"); \
+ _rc; \
+})
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ *
+ * For now, "wmb()" doesn't actually do anything, as all
+ * Intel CPU's follow what Intel calls a *Processor Order*,
+ * in which all writes are seen in the program order even
+ * outside the CPU.
+ *
+ * I expect future Intel CPU's to have a weaker ordering,
+ * but I'd also expect them to finally get their act together
+ * and add some real memory barriers if so.
+ *
+ * Some non intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+
+#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
+#define rmb() mb()
+
+#ifdef CONFIG_X86_OOSTORE
+#define wmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
+#else
+#define wmb() __asm__ __volatile__ ("": : :"memory")
+#endif
+
+#ifdef CONFIG_SMP
+#define smp_mb() mb()
+#define smp_rmb() rmb()
+#define smp_wmb() wmb()
+#else
+#define smp_mb() barrier()
+#define smp_rmb() barrier()
+#define smp_wmb() barrier()
+#endif
+
+#define set_mb(var, value) do { xchg(&var, value); } while (0)
+#define set_wmb(var, value) do { var = value; wmb(); } while (0)
+
+/* interrupt control.. */
+#ifdef x86_64
+#define __save_flags(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0)
+#define __restore_flags(x) __asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc")
+#else
+#define __save_flags(x) __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */)
+#define __restore_flags(x) __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc")
+#endif
+#define __cli() __asm__ __volatile__("cli": : :"memory")
+#define __sti() __asm__ __volatile__("sti": : :"memory")
+/* used in the idle loop; sti takes one instruction cycle to complete */
+#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory")
+
+/* For spinlocks etc */
+#ifdef x86_64
+#define local_irq_save(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0)
+#define local_irq_restore(x) __asm__ __volatile__("# local_irq_restore \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory")
+#else
+#define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
+#define local_irq_restore(x) __restore_flags(x)
+#endif
+#define local_irq_disable() __cli()
+#define local_irq_enable() __sti()
+
+static inline int local_irq_is_enabled(void)
+{
+ unsigned long flags;
+ __save_flags(flags);
+ return !!(flags & (1<<9)); /* EFLAGS_IF */
+}
+
+#ifdef CONFIG_SMP
+
+extern void __global_cli(void);
+extern void __global_sti(void);
+extern unsigned long __global_save_flags(void);
+extern void __global_restore_flags(unsigned long);
+#define cli() __global_cli()
+#define sti() __global_sti()
+#define save_flags(x) ((x)=__global_save_flags())
+#define restore_flags(x) __global_restore_flags(x)
+
+#else
+
+#define cli() __cli()
+#define sti() __sti()
+#define save_flags(x) __save_flags(x)
+#define restore_flags(x) __restore_flags(x)
+
+#endif
+
+/*
+ * disable hlt during certain critical i/o operations
+ */
+#define HAVE_DISABLE_HLT
+void disable_hlt(void);
+void enable_hlt(void);
+
+#define BROKEN_ACPI_Sx 0x0001
+#define BROKEN_INIT_AFTER_S1 0x0002
+
+#endif
--- /dev/null
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: time.h
+ * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ *
+ * Environment: Xen Hypervisor
+ * Description: Architecture dependent definition of time variables
+ */
+
+#ifndef _ASM_TIME_H_
+#define _ASM_TIME_H_
+
+#include <asm/types.h>
+#include <asm/msr.h>
+
+typedef s64 s_time_t; /* system time */
+
+#endif /* _ASM_TIME_H_ */
--- /dev/null
+/*
+ * linux/include/asm-i386/timex.h
+ *
+ * i386 architecture timex specifications
+ */
+#ifndef _ASMi386_TIMEX_H
+#define _ASMi386_TIMEX_H
+
+#include <xen/config.h>
+#include <asm/msr.h>
+
+#ifdef CONFIG_MELAN
+# define CLOCK_TICK_RATE 1189200 /* AMD Elan has different frequency! */
+#else
+# define CLOCK_TICK_RATE 1193180 /* Underlying HZ */
+#endif
+
+#define CLOCK_TICK_FACTOR 20 /* Factor of both 1000000 and CLOCK_TICK_RATE */
+#define FINETUNE ((((((long)LATCH * HZ - CLOCK_TICK_RATE) << SHIFT_HZ) * \
+ (1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \
+ << (SHIFT_SCALE-SHIFT_HZ)) / HZ)
+
+/*
+ * Standard way to access the cycle counter on i586+ CPUs.
+ * Currently only used on SMP.
+ *
+ * If you really have a SMP machine with i486 chips or older,
+ * compile for that, and this will just always return zero.
+ * That's ok, it just means that the nicer scheduling heuristics
+ * won't work for you.
+ *
+ * We only use the low 32 bits, and we'd simply better make sure
+ * that we reschedule before that wraps. Scheduling at least every
+ * four billion cycles just basically sounds like a good idea,
+ * regardless of how fast the machine is.
+ */
+typedef unsigned long long cycles_t;
+
+extern cycles_t cacheflush_time;
+
+static inline cycles_t get_cycles (void)
+{
+#ifndef CONFIG_X86_TSC
+ return 0;
+#else
+ unsigned long long ret;
+
+ rdtscll(ret);
+ return ret;
+#endif
+}
+
+extern unsigned long cpu_khz;
+
+#define vxtime_lock() do {} while (0)
+#define vxtime_unlock() do {} while (0)
+
+#endif
--- /dev/null
+#ifndef _X86_TYPES_H
+#define _X86_TYPES_H
+
+typedef unsigned short umode_t;
+
+typedef unsigned int size_t;
+
+/*
+ * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
+ * header files exported to user space
+ */
+
+typedef __signed__ char __s8;
+typedef unsigned char __u8;
+
+typedef __signed__ short __s16;
+typedef unsigned short __u16;
+
+typedef __signed__ int __s32;
+typedef unsigned int __u32;
+
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+#ifdef x86_32
+typedef __signed__ long long __s64;
+typedef unsigned long long __u64;
+#else
+typedef __signed__ long __s64;
+typedef unsigned long __u64;
+#endif
+#endif
+
+#include <xen/config.h>
+
+typedef signed char s8;
+typedef unsigned char u8;
+
+typedef signed short s16;
+typedef unsigned short u16;
+
+typedef signed int s32;
+typedef unsigned int u32;
+
+#ifdef x86_32
+typedef signed long long s64;
+typedef unsigned long long u64;
+#define BITS_PER_LONG 32
+#else
+typedef signed long s64;
+typedef unsigned long u64;
+#define BITS_PER_LONG 64
+#endif
+
+/* DMA addresses come in generic and 64-bit flavours. */
+
+typedef unsigned long dma_addr_t;
+typedef u64 dma64_addr_t;
+
+#endif
--- /dev/null
+#ifndef __i386_UACCESS_H
+#define __i386_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/prefetch.h>
+#include <asm/page.h>
+
+#define VERIFY_READ 0
+#define VERIFY_WRITE 1
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not. If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+
+#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
+
+
+#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF)
+#define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
+
+#define get_ds() (KERNEL_DS)
+#define get_fs() (current->addr_limit)
+#define set_fs(x) (current->addr_limit = (x))
+
+#define segment_eq(a,b) ((a).seg == (b).seg)
+
+extern int __verify_write(const void *, unsigned long);
+
+#define __addr_ok(addr) ((unsigned long)(addr) < (current->addr_limit.seg))
+
+/*
+ * Uhhuh, this needs 33-bit arithmetic. We have a carry..
+ */
+#define __range_ok(addr,size) ({ \
+ unsigned long flag,sum; \
+ asm("addl %3,%1 ; sbbl %0,%0; cmpl %1,%4; sbbl $0,%0" \
+ :"=&r" (flag), "=r" (sum) \
+ :"1" (addr),"g" ((int)(size)),"g" (current->addr_limit.seg)); \
+ flag; })
+
+#define access_ok(type,addr,size) (__range_ok(addr,size) == 0)
+
+static inline int verify_area(int type, const void * addr, unsigned long size)
+{
+ return access_ok(type,addr,size) ? 0 : -EFAULT;
+}
+
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue. No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path. This means when everything is well,
+ * we don't even have to jump over them. Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry
+{
+ unsigned long insn, fixup;
+};
+
+/* Returns 0 if exception not found and fixup otherwise. */
+extern unsigned long search_exception_table(unsigned long);
+
+
+/*
+ * These are the main single-value transfer routines. They automatically
+ * use the right size if we just have the right pointer type.
+ *
+ * This gets kind of ugly. We want to return _two_ values in "get_user()"
+ * and yet we don't want to do any pointers, because that is too much
+ * of a performance impact. Thus we have a few rather ugly macros here,
+ * and hide all the uglyness from the user.
+ *
+ * The "__xxx" versions of the user access functions are versions that
+ * do not verify the address space, that must have been done previously
+ * with a separate "access_ok()" call (this is used when we do multiple
+ * accesses to the same area of user memory).
+ */
+
+extern void __get_user_1(void);
+extern void __get_user_2(void);
+extern void __get_user_4(void);
+
+#define __get_user_x(size,ret,x,ptr) \
+ __asm__ __volatile__("call __get_user_" #size \
+ :"=a" (ret),"=d" (x) \
+ :"0" (ptr))
+
+/* Careful: we have to cast the result to the type of the pointer for sign reasons */
+#define get_user(x,ptr) \
+({ int __ret_gu=1,__val_gu; \
+ switch(sizeof (*(ptr))) { \
+ case 1: __ret_gu=copy_from_user(&__val_gu,ptr,1); break; \
+ case 2: __ret_gu=copy_from_user(&__val_gu,ptr,2); break; \
+ case 4: __ret_gu=copy_from_user(&__val_gu,ptr,4); break; \
+ default: __ret_gu=copy_from_user(&__val_gu,ptr,8); break; \
+ /*case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break;*/ \
+ /*case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break;*/ \
+ /*case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break;*/ \
+ /*default: __get_user_x(X,__ret_gu,__val_gu,ptr); break;*/ \
+ } \
+ (x) = (__typeof__(*(ptr)))__val_gu; \
+ __ret_gu; \
+})
+
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+extern void __put_user_bad(void);
+
+#define put_user(x,ptr) \
+ __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+
+#define __get_user(x,ptr) \
+ __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
+#define __put_user(x,ptr) \
+ __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+
+#define __put_user_nocheck(x,ptr,size) \
+({ \
+ long __pu_err; \
+ __put_user_size((x),(ptr),(size),__pu_err); \
+ __pu_err; \
+})
+
+
+#define __put_user_check(x,ptr,size) \
+({ \
+ long __pu_err = -EFAULT; \
+ __typeof__(*(ptr)) *__pu_addr = (ptr); \
+ if (access_ok(VERIFY_WRITE,__pu_addr,size)) \
+ __put_user_size((x),__pu_addr,(size),__pu_err); \
+ __pu_err; \
+})
+
+#define __put_user_u64(x, addr, err) \
+ __asm__ __volatile__( \
+ "1: movl %%eax,0(%2)\n" \
+ "2: movl %%edx,4(%2)\n" \
+ "3:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "4: movl %3,%0\n" \
+ " jmp 3b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,4b\n" \
+ " .long 2b,4b\n" \
+ ".previous" \
+ : "=r"(err) \
+ : "A" (x), "r" (addr), "i"(-EFAULT), "0"(err))
+
+#define __put_user_size(x,ptr,size,retval) \
+do { \
+ retval = 0; \
+ switch (size) { \
+ case 1: __put_user_asm(x,ptr,retval,"b","b","iq"); break; \
+ case 2: __put_user_asm(x,ptr,retval,"w","w","ir"); break; \
+ case 4: __put_user_asm(x,ptr,retval,"l","","ir"); break; \
+ case 8: __put_user_u64(x,ptr,retval); break; \
+ default: __put_user_bad(); \
+ } \
+} while (0)
+
+struct __large_struct { unsigned long buf[100]; };
+#define __m(x) (*(struct __large_struct *)(x))
+
+/*
+ * Tell gcc we read from memory instead of writing: this is because
+ * we do not write to any memory gcc knows about, so there are no
+ * aliasing issues.
+ */
+#define __put_user_asm(x, addr, err, itype, rtype, ltype) \
+ __asm__ __volatile__( \
+ "1: mov"itype" %"rtype"1,%2\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl %3,%0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,3b\n" \
+ ".previous" \
+ : "=r"(err) \
+ : ltype (x), "m"(__m(addr)), "i"(-EFAULT), "0"(err))
+
+
+#define __get_user_nocheck(x,ptr,size) \
+({ \
+ long __gu_err, __gu_val; \
+ __get_user_size(__gu_val,(ptr),(size),__gu_err); \
+ (x) = (__typeof__(*(ptr)))__gu_val; \
+ __gu_err; \
+})
+
+extern long __get_user_bad(void);
+
+#define __get_user_size(x,ptr,size,retval) \
+do { \
+ retval = 0; \
+ switch (size) { \
+ case 1: __get_user_asm(x,ptr,retval,"b","b","=q"); break; \
+ case 2: __get_user_asm(x,ptr,retval,"w","w","=r"); break; \
+ case 4: __get_user_asm(x,ptr,retval,"l","","=r"); break; \
+ default: (x) = __get_user_bad(); \
+ } \
+} while (0)
+
+#define __get_user_asm(x, addr, err, itype, rtype, ltype) \
+ __asm__ __volatile__( \
+ "1: mov"itype" %2,%"rtype"1\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl %3,%0\n" \
+ " xor"itype" %"rtype"1,%"rtype"1\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,3b\n" \
+ ".previous" \
+ : "=r"(err), ltype (x) \
+ : "m"(__m(addr)), "i"(-EFAULT), "0"(err))
+
+
+/*
+ * Copy To/From Userspace
+ */
+
+/* Generic arbitrary sized copy. */
+#define __copy_user(to,from,size) \
+do { \
+ int __d0, __d1; \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ " movl %3,%0\n" \
+ "1: rep; movsb\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: lea 0(%3,%0,4),%0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,3b\n" \
+ " .long 1b,2b\n" \
+ ".previous" \
+ : "=&c"(size), "=&D" (__d0), "=&S" (__d1) \
+ : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \
+ : "memory"); \
+} while (0)
+
+#define __copy_user_zeroing(to,from,size) \
+do { \
+ int __d0, __d1; \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ " movl %3,%0\n" \
+ "1: rep; movsb\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: lea 0(%3,%0,4),%0\n" \
+ "4: pushl %0\n" \
+ " pushl %%eax\n" \
+ " xorl %%eax,%%eax\n" \
+ " rep; stosb\n" \
+ " popl %%eax\n" \
+ " popl %0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,3b\n" \
+ " .long 1b,4b\n" \
+ ".previous" \
+ : "=&c"(size), "=&D" (__d0), "=&S" (__d1) \
+ : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \
+ : "memory"); \
+} while (0)
+
+/* We let the __ versions of copy_from/to_user inline, because they're often
+ * used in fast paths and have only a small space overhead.
+ */
+static inline unsigned long
+__generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
+{
+ __copy_user_zeroing(to,from,n);
+ return n;
+}
+
+static inline unsigned long
+__generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
+{
+ __copy_user(to,from,n);
+ return n;
+}
+
+
+/* Optimize just a little bit when we know the size of the move. */
+#define __constant_copy_user(to, from, size) \
+do { \
+ int __d0, __d1; \
+ switch (size & 3) { \
+ default: \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ "1:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "2: shl $2,%0\n" \
+ " jmp 1b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,2b\n" \
+ ".previous" \
+ : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+ : "1"(from), "2"(to), "0"(size/4) \
+ : "memory"); \
+ break; \
+ case 1: \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ "1: movsb\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: shl $2,%0\n" \
+ "4: incl %0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,3b\n" \
+ " .long 1b,4b\n" \
+ ".previous" \
+ : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+ : "1"(from), "2"(to), "0"(size/4) \
+ : "memory"); \
+ break; \
+ case 2: \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ "1: movsw\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: shl $2,%0\n" \
+ "4: addl $2,%0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,3b\n" \
+ " .long 1b,4b\n" \
+ ".previous" \
+ : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+ : "1"(from), "2"(to), "0"(size/4) \
+ : "memory"); \
+ break; \
+ case 3: \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ "1: movsw\n" \
+ "2: movsb\n" \
+ "3:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "4: shl $2,%0\n" \
+ "5: addl $2,%0\n" \
+ "6: incl %0\n" \
+ " jmp 3b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,4b\n" \
+ " .long 1b,5b\n" \
+ " .long 2b,6b\n" \
+ ".previous" \
+ : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+ : "1"(from), "2"(to), "0"(size/4) \
+ : "memory"); \
+ break; \
+ } \
+} while (0)
+
+/* Optimize just a little bit when we know the size of the move. */
+#define __constant_copy_user_zeroing(to, from, size) \
+do { \
+ int __d0, __d1; \
+ switch (size & 3) { \
+ default: \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ "1:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "2: pushl %0\n" \
+ " pushl %%eax\n" \
+ " xorl %%eax,%%eax\n" \
+ " rep; stosl\n" \
+ " popl %%eax\n" \
+ " popl %0\n" \
+ " shl $2,%0\n" \
+ " jmp 1b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,2b\n" \
+ ".previous" \
+ : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+ : "1"(from), "2"(to), "0"(size/4) \
+ : "memory"); \
+ break; \
+ case 1: \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ "1: movsb\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: pushl %0\n" \
+ " pushl %%eax\n" \
+ " xorl %%eax,%%eax\n" \
+ " rep; stosl\n" \
+ " stosb\n" \
+ " popl %%eax\n" \
+ " popl %0\n" \
+ " shl $2,%0\n" \
+ " incl %0\n" \
+ " jmp 2b\n" \
+ "4: pushl %%eax\n" \
+ " xorl %%eax,%%eax\n" \
+ " stosb\n" \
+ " popl %%eax\n" \
+ " incl %0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,3b\n" \
+ " .long 1b,4b\n" \
+ ".previous" \
+ : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+ : "1"(from), "2"(to), "0"(size/4) \
+ : "memory"); \
+ break; \
+ case 2: \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ "1: movsw\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: pushl %0\n" \
+ " pushl %%eax\n" \
+ " xorl %%eax,%%eax\n" \
+ " rep; stosl\n" \
+ " stosw\n" \
+ " popl %%eax\n" \
+ " popl %0\n" \
+ " shl $2,%0\n" \
+ " addl $2,%0\n" \
+ " jmp 2b\n" \
+ "4: pushl %%eax\n" \
+ " xorl %%eax,%%eax\n" \
+ " stosw\n" \
+ " popl %%eax\n" \
+ " addl $2,%0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,3b\n" \
+ " .long 1b,4b\n" \
+ ".previous" \
+ : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+ : "1"(from), "2"(to), "0"(size/4) \
+ : "memory"); \
+ break; \
+ case 3: \
+ __asm__ __volatile__( \
+ "0: rep; movsl\n" \
+ "1: movsw\n" \
+ "2: movsb\n" \
+ "3:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "4: pushl %0\n" \
+ " pushl %%eax\n" \
+ " xorl %%eax,%%eax\n" \
+ " rep; stosl\n" \
+ " stosw\n" \
+ " stosb\n" \
+ " popl %%eax\n" \
+ " popl %0\n" \
+ " shl $2,%0\n" \
+ " addl $3,%0\n" \
+ " jmp 2b\n" \
+ "5: pushl %%eax\n" \
+ " xorl %%eax,%%eax\n" \
+ " stosw\n" \
+ " stosb\n" \
+ " popl %%eax\n" \
+ " addl $3,%0\n" \
+ " jmp 2b\n" \
+ "6: pushl %%eax\n" \
+ " xorl %%eax,%%eax\n" \
+ " stosb\n" \
+ " popl %%eax\n" \
+ " incl %0\n" \
+ " jmp 3b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 0b,4b\n" \
+ " .long 1b,5b\n" \
+ " .long 2b,6b\n" \
+ ".previous" \
+ : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+ : "1"(from), "2"(to), "0"(size/4) \
+ : "memory"); \
+ break; \
+ } \
+} while (0)
+
+unsigned long __generic_copy_to_user(void *, const void *, unsigned long);
+unsigned long __generic_copy_from_user(void *, const void *, unsigned long);
+
+static inline unsigned long
+__constant_copy_to_user(void *to, const void *from, unsigned long n)
+{
+ prefetch(from);
+ if (access_ok(VERIFY_WRITE, to, n))
+ __constant_copy_user(to,from,n);
+ return n;
+}
+
+static inline unsigned long
+__constant_copy_from_user(void *to, const void *from, unsigned long n)
+{
+ if (access_ok(VERIFY_READ, from, n))
+ __constant_copy_user_zeroing(to,from,n);
+ else
+ memset(to, 0, n);
+ return n;
+}
+
+static inline unsigned long
+__constant_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
+{
+ __constant_copy_user(to,from,n);
+ return n;
+}
+
+static inline unsigned long
+__constant_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
+{
+ __constant_copy_user_zeroing(to,from,n);
+ return n;
+}
+
+#define copy_to_user(to,from,n) \
+ (__builtin_constant_p(n) ? \
+ __constant_copy_to_user((to),(from),(n)) : \
+ __generic_copy_to_user((to),(from),(n)))
+
+#define copy_from_user(to,from,n) \
+ (__builtin_constant_p(n) ? \
+ __constant_copy_from_user((to),(from),(n)) : \
+ __generic_copy_from_user((to),(from),(n)))
+
+#define __copy_to_user(to,from,n) \
+ (__builtin_constant_p(n) ? \
+ __constant_copy_to_user_nocheck((to),(from),(n)) : \
+ __generic_copy_to_user_nocheck((to),(from),(n)))
+
+#define __copy_from_user(to,from,n) \
+ (__builtin_constant_p(n) ? \
+ __constant_copy_from_user_nocheck((to),(from),(n)) : \
+ __generic_copy_from_user_nocheck((to),(from),(n)))
+
+long strncpy_from_user(char *dst, const char *src, long count);
+long __strncpy_from_user(char *dst, const char *src, long count);
+#define strlen_user(str) strnlen_user(str, ~0UL >> 1)
+long strnlen_user(const char *str, long n);
+unsigned long clear_user(void *mem, unsigned long len);
+unsigned long __clear_user(void *mem, unsigned long len);
+
+#endif /* __i386_UACCESS_H */
--- /dev/null
+#ifndef __X86_UNALIGNED_H
+#define __X86_UNALIGNED_H
+
+/*
+ * x86 can do unaligned accesses itself.
+ *
+ * The strange macros are there to make sure these can't
+ * be misused in a way that makes them not work on other
+ * architectures where unaligned accesses aren't as simple.
+ */
+
+/**
+ * get_unaligned - get value from possibly mis-aligned location
+ * @ptr: pointer to value
+ *
+ * This macro should be used for accessing values larger in size than
+ * single bytes at locations that are expected to be improperly aligned,
+ * e.g. retrieving a u16 value from a location not u16-aligned.
+ *
+ * Note that unaligned accesses can be very expensive on some architectures.
+ */
+#define get_unaligned(ptr) (*(ptr))
+
+/**
+ * put_unaligned - put value to a possibly mis-aligned location
+ * @val: value to place
+ * @ptr: pointer to location
+ *
+ * This macro should be used for placing values larger in size than
+ * single bytes at locations that are expected to be improperly aligned,
+ * e.g. writing a u16 value to a location not u16-aligned.
+ *
+ * Note that unaligned accesses can be very expensive on some architectures.
+ */
+#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) ))
+
+#endif
--- /dev/null
+/******************************************************************************
+ * config.h
+ *
+ * A Linux-style configuration list.
+ *
+ */
+
+#ifndef __XEN_X86_64_CONFIG_H__
+#define __XEN_X86_64_CONFIG_H__
+
+#define CONFIG_X86 1
+#define CONFIG_X86_64BITMODE 1
+
+#define CONFIG_SMP 1
+#define CONFIG_X86_LOCAL_APIC 1
+#define CONFIG_X86_IO_APIC 1
+#define CONFIG_X86_L1_CACHE_SHIFT 5
+
+#define CONFIG_PCI 1
+#define CONFIG_PCI_BIOS 1
+#define CONFIG_PCI_DIRECT 1
+
+#define CONFIG_IDE 1
+#define CONFIG_BLK_DEV_IDE 1
+#define CONFIG_BLK_DEV_IDEDMA 1
+#define CONFIG_BLK_DEV_IDEPCI 1
+#define CONFIG_IDEDISK_MULTI_MODE 1
+#define CONFIG_IDEDISK_STROKE 1
+#define CONFIG_IDEPCI_SHARE_IRQ 1
+#define CONFIG_BLK_DEV_IDEDMA_PCI 1
+#define CONFIG_IDEDMA_PCI_AUTO 1
+#define CONFIG_IDEDMA_AUTO 1
+#define CONFIG_IDEDMA_ONLYDISK 1
+#define CONFIG_BLK_DEV_IDE_MODES 1
+#define CONFIG_BLK_DEV_PIIX 1
+
+#define CONFIG_SCSI 1
+#define CONFIG_SCSI_LOGGING 1
+#define CONFIG_BLK_DEV_SD 1
+#define CONFIG_SD_EXTRA_DEVS 40
+#define CONFIG_SCSI_MULTI_LUN 1
+
+#define CONFIG_XEN_ATTENTION_KEY 1
+
+#define HZ 100
+
+/*
+ * Just to keep compiler happy.
+ * NB. DO NOT CHANGE SMP_CACHE_BYTES WITHOUT FIXING arch/i386/entry.S!!!
+ * It depends on size of irq_cpustat_t, for example, being 64 bytes. :-)
+ * Mmmm... so niiiiiice....
+ */
+#define SMP_CACHE_BYTES 64
+#define NR_CPUS 16
+#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
+#define ____cacheline_aligned __cacheline_aligned
+
+#define PHYSICAL_ADDRESS_BITS 52
+#define MAX_PHYSICAL_ADDRESS (1 << PHYSICAL_ADDRESS_BITS)
+#define VIRTUAL_ADDRESS_BITS 48
+#define XEN_PAGE_SIZE 4096
+
+#define PTE_SIZE 8
+#define TOTAL_PTES (512ULL * 512 * 512 * 512)
+
+/* next PML4 from an _END address */
+#define PML4_BITS 39
+#define PML4_SPACE (1ULL << PML4_BITS)
+
+/*
+ * Memory layout
+ *
+ * 0x0000000000000000 - 0x00007fffffffffff Guest & user apps (128TB)
+ * (Only for 32-bit guests)
+ * 0x00000000fc000000 - 0x00000000fc3fffff Machine/Physical 32-bit shadow (4MB)
+ * 0x00000000fc400000 - 0x00000000feffffff IO remap for 32-bit guests (44MB)
+ * 0x00000000ff000000 - 0x00000000ff3fffff 32-bit PTE shadow (4MB)
+ *
+ * 0xffff800000000000 - 0xffff807fffffffff Linear page table (512GB)
+ * 0xffff808000000000 - 0xffff80ffffffffff Reserved for shadow page table (512GB)
+ *
+ * 0xffff810000000000 - 0xffff82ffffffffff Xen PML4 slots
+ * 0xffff810000000000 - 0xffff81003fffffff Xen hypervisor virtual space (1GB)
+ * 0xffff810040000000 - 0xffff81807fffffff Per-domain mappings (1GB)
+ * 0xffff810080000000 - 0xffff81387fffffff R/O physical map (224GB)
+ * 0xffff813880000000 - 0xffff81707fffffff R/W physical map (224GB)
+ * 0xffff817080000000 - 0xffff82c07fffffff Frame table (1344GB)
+ * 0xffff82c080000000 - 0xffff82c0bfffffff I/O remap space (1GB)
+ * 0xffff82c0c0000000 - 0xffff82ffffffffff (253GB)
+ *
+ * 0xffff830000000000 - 0xffff87ffffffffff RESERVED (5TB)
+ *
+ * 0xffff880000000000 - ... Physical 1:1 direct mapping (112TB max)
+ * 0xffff880000000000 - 0xffff880001000000 Low memory DMA region (16M)
+ *
+ * 0xfffff80000000000 - 0xffffffffffffffff Reserved for guest (8TB)
+ *
+ * The requirement that we have a 1:1 map of physical memory limits
+ * the maximum memory size we can support. With only 48 virtual address
+ * bits, and the assumption that guests will run users in positive address
+ * space, a contiguous 1:1 map can only live in the negative address space.
+ * Since we don't want to bump guests out of the very top of memory and
+ * force relocation, we can't use this entire space, and Xen has several
+ * heavy mapping that require PML4 slices. Just to be safe, we reserve
+ * 16 PML4s each for Xen and the guest. 224 PML4s give us 112 terabytes
+ * of addressable memory. Any high device physical addresses beyond this
+ * region can be mapped into the IO remap space or some of the reserved
+ * 6TB region.
+ *
+ * 112 TB is just 16 TB shy of the maximum physical memory supported
+ * on Linux 2.6.0, and should be enough for anybody.
+ *
+ * There are some additional constraints in the memory layout that require
+ * several changes from the i386 architecture.
+ *
+ * ACPI data and ACPI non-volatile storage must be placed in some region
+ * of memory below the 4GB mark. Depending on the BIOS and system, we
+ * may have this located as low as 1GB. This means allocating large
+ * chunks of physically contiguous memory from the direct mapping may not
+ * be possible.
+ *
+ * The full frame table for 112TB of physical memory currently occupies
+ * 1344GB space. This clearly can not be allocated in physically contiguous
+ * space, so it must be moved to a virtual address.
+ *
+ * Both copies of the machine->physical table must also be relocated.
+ * (112 TB / 4k) * 8 bytes means that each copy of the physical map requires
+ * 224GB of space, thus it also must move to VM space.
+ *
+ * The physical pages used to allocate the page tables for the direct 1:1
+ * map may occupy (112TB / 2M) * 8 bytes = 448MB. This is almost guaranteed
+ * to fit in contiguous physical memory, but these pages used to be allocated
+ * in the Xen monitor address space. This means the Xen address space must
+ * accomodate up to ~500 MB, which means it also must move out of the
+ * direct mapped region.
+ *
+ * Since both copies of the MPT, the frame table, and Xen now exist in
+ * purely virtual space, we have the added advantage of being able to
+ * map them to local pages on NUMA machines, or use NUMA aware memory
+ * allocation within Xen itself.
+ *
+ * Additionally, the 1:1 page table now exists contiguously in virtual
+ * space, but may be mapped to physically separated pages, allowing
+ * each node to contain the page tables for its own local memory. Setting
+ * up this mapping presents a bit of a chicken-egg problem, but is possible
+ * as a future enhancement.
+ *
+ * Zachary Amsden (zamsden@cisco.com)
+ *
+ */
+
+/* Guest and user space */
+#define NSPACE_VIRT_START 0
+#define NSPACE_VIRT_END (1ULL << (VIRTUAL_ADDRESS_BITS - 1))
+
+/* Priviledged space */
+#define ESPACE_VIRT_END 0
+#define ESPACE_VIRT_START (ESPACE_VIRT_END-(1ULL << (VIRTUAL_ADDRESS_BITS-1)))
+
+/* reservations in e-space */
+#define GUEST_RESERVED_PML4S 16
+#define XEN_RESERVED_PML4S 16
+
+#define MAX_MEMORY_SIZE ((1ULL << (VIRTUAL_ADDRESS_BITS-1)) \
+ -((GUEST_RESERVED_PML4S + XEN_RESERVED_PML4S) * PML4_SPACE))
+#define MAX_MEMORY_FRAMES (MAX_MEMORY_SIZE / XEN_PAGE_SIZE)
+
+/*
+ * Virtual addresses beyond this are not modifiable by guest OSes.
+ */
+#define HYPERVISOR_VIRT_START ESPACE_VIRT_START
+#define HYPERVISOR_VIRT_END (ESPACE_VIRT_END-(GUEST_RESERVED_PML4S * PML4_SPACE))
+
+/* First 512GB of virtual address space is used as a linear p.t. mapping. */
+#define LINEAR_PT_VIRT_START (HYPERVISOR_VIRT_START)
+#define LINEAR_PT_VIRT_END (LINEAR_PT_VIRT_START + (PTE_SIZE * TOTAL_PTES))
+
+/* Reserve some space for a shadow PT mapping */
+#define SHADOW_PT_VIRT_START (LINEAR_PT_VIRT_END)
+#define SHADOW_PT_VIRT_END (SHADOW_PT_VIRT_START + (PTE_SIZE * TOTAL_PTES))
+
+/* Xen exists in the first 1GB of the next PML4 space */
+#define MAX_MONITOR_ADDRESS (1 * 1024 * 1024 * 1024)
+#define MONITOR_VIRT_START (SHADOW_PT_VIRT_END)
+#define MONITOR_VIRT_END (MONITOR_VIRT_START + MAX_MONITOR_ADDRESS)
+
+/* Next 1GB of virtual address space used for per-domain mappings (eg. GDT). */
+#define PERDOMAIN_VIRT_START (MONITOR_VIRT_END)
+#define PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + (512 * 512 * 4096))
+#define GDT_VIRT_START (PERDOMAIN_VIRT_START)
+#define GDT_VIRT_END (GDT_VIRT_START + (128*1024))
+#define LDT_VIRT_START (GDT_VIRT_END)
+#define LDT_VIRT_END (LDT_VIRT_START + (128*1024))
+
+/*
+ * First set of MPTs are mapped read-only for all. It's for the machine->physical
+ * mapping table (MPT table). The following are virtual addresses.
+ */
+#define READONLY_MPT_VIRT_START (PERDOMAIN_VIRT_END)
+#define READONLY_MPT_VIRT_END (READONLY_MPT_VIRT_START + (PTE_SIZE * MAX_MEMORY_FRAMES))
+
+/* R/W machine->physical table */
+#define RDWR_MPT_VIRT_START (READONLY_MPT_VIRT_END)
+#define RDWR_MPT_VIRT_END (RDWR_MPT_VIRT_START + (PTE_SIZE * MAX_MEMORY_FRAMES))
+
+/* Frame table */
+#define FRAMETABLE_ENTRY_SIZE (48)
+#define FRAMETABLE_VIRT_START (RDWR_MPT_VIRT_END)
+#define FRAMETABLE_VIRT_END (FRAMETABLE_VIRT_START + (FRAMETABLE_ENTRY_SIZE * MAX_MEMORY_FRAMES))
+
+/* Next 1GB of virtual address space used for ioremap(). */
+#define IOREMAP_VIRT_START (FRAMETABLE_VIRT_END)
+#define IOREMAP_VIRT_END (IOREMAP_VIRT_START + (512 * 512 * 4096))
+
+/* And the virtual addresses for the direct-map region... */
+#define DIRECTMAP_VIRT_START (ESPACE_VIRT_START + (XEN_RESERVED_PML4S * PML4_SPACE))
+#define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + MAX_DIRECTMAP_ADDRESS)
+
+/*
+ * Next is the direct-mapped memory region. The following are machine addresses.
+ */
+#define MAX_DMA_ADDRESS (16*1024*1024)
+#define MAX_DIRECTMAP_ADDRESS MAX_MEMORY_SIZE
+
+
+
+/*
+ * Amount of slack domain memory to leave in system, in kilobytes.
+ * Prevents a hard out-of-memory crunch for thinsg like network receive.
+ */
+#define SLACK_DOMAIN_MEM_KILOBYTES 2048
+
+
+/*
+ * These will probably change in the future..
+ * locations for 32-bit guest compatibility mappings
+ */
+
+/* 4M of 32-bit machine-physical shadow in low 4G of VM space */
+#define SHADOW_MPT32_VIRT_START (0xfc000000)
+#define SHADOW_MPT32_VIRT_END (SHADOW_MPT32_VIRT_START + (4 * 1024 * 1024))
+
+/* 44M of I/O remap for 32-bit drivers */
+#define IOREMAP_LOW_VIRT_START (SHADOW_MPT32_VIRT_END)
+#define IOREMAP_LOW_VIRT_END (IOREMAP_LOW_VIRT_START + (44 * 1024 * 1024))
+
+/* 4M of 32-bit page table */
+#define SHADOW_PT32_VIRT_START (IOREMAP_LOW_VIRT_END)
+#define SHADOW_PT32_VIRT_END (SHADOW_PT32_VIRT_START + (4 * 1024 * 1024))
+
+
+/* Linkage for x86 */
+#define FASTCALL(x) x __attribute__((regparm(3)))
+#define asmlinkage __attribute__((regparm(0)))
+#define __ALIGN .align 16,0x90
+#define __ALIGN_STR ".align 16,0x90"
+#define SYMBOL_NAME_STR(X) #X
+#define SYMBOL_NAME(X) X
+#define SYMBOL_NAME_LABEL(X) X##:
+#ifdef __ASSEMBLY__
+#define ALIGN __ALIGN
+#define ALIGN_STR __ALIGN_STR
+#define ENTRY(name) \
+ .globl SYMBOL_NAME(name); \
+ ALIGN; \
+ SYMBOL_NAME_LABEL(name)
+#endif
+
+#define PGT_base_page_table PGT_l4_page_table
+
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+/*
+ * Hypervisor segment selectors
+ */
+#define __HYPERVISOR_CS64 0x0810
+#define __HYPERVISOR_CS32 0x0808
+#define __HYPERVISOR_DS 0x0818
+
+#define NR_syscalls 256
+
+#ifndef NDEBUG
+#define MEMORY_GUARD
+#endif
+
+#ifndef __ASSEMBLY__
+extern unsigned long _end; /* standard ELF symbol */
+extern void __out_of_line_bug(int line) __attribute__((noreturn));
+#define out_of_line_bug() __out_of_line_bug(__LINE__)
+#endif /* __ASSEMBLY__ */
+
+#endif /* __XEN_X86_64_CONFIG_H__ */
--- /dev/null
+#ifndef _X86_64_CURRENT_H
+#define _X86_64_CURRENT_H
+
+#if !defined(__ASSEMBLY__)
+struct task_struct;
+
+#include <asm/pda.h>
+
+#define STACK_RESERVED \
+ (sizeof(execution_context_t))
+
+static inline struct task_struct * get_current(void)
+{
+ struct task_struct *current;
+ current = read_pda(pcurrent);
+ return current;
+}
+
+#define current get_current()
+
+static inline void set_current(struct task_struct *p)
+{
+ write_pda(pcurrent,p);
+}
+
+static inline execution_context_t *get_execution_context(void)
+{
+ execution_context_t *execution_context;
+ __asm__( "andq %%rsp,%0; addq %2,%0"
+ : "=r" (execution_context)
+ : "0" (~(STACK_SIZE-1)), "i" (STACK_SIZE-STACK_RESERVED) );
+ return execution_context;
+}
+
+static inline unsigned long get_stack_top(void)
+{
+ unsigned long p;
+ __asm__ ( "orq %%rsp,%0; andq $~7,%0"
+ : "=r" (p) : "0" (STACK_SIZE-8) );
+ return p;
+}
+
+#define schedule_tail(_p) \
+ __asm__ __volatile__ ( \
+ "andq %%rsp,%0; addq %2,%0; movq %0,%%rsp; jmp *%1" \
+ : : "r" (~(STACK_SIZE-1)), \
+ "r" (unlikely(is_idle_task((_p))) ? \
+ continue_cpu_idle_loop : \
+ continue_nonidle_task), \
+ "i" (STACK_SIZE-STACK_RESERVED) )
+
+
+#else
+
+#ifndef ASM_OFFSET_H
+#include <asm/offset.h>
+#endif
+
+#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg
+
+#endif
+
+#endif /* !(_X86_64_CURRENT_H) */
--- /dev/null
+#ifndef __ARCH_DESC_H
+#define __ARCH_DESC_H
+
+#define LDT_ENTRY_SIZE 16
+
+#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
+
+#define __FIRST_PER_CPU_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
+
+#define __CPU_DESC_INDEX(x,field) \
+ ((x) * sizeof(struct per_cpu_gdt) + offsetof(struct per_cpu_gdt, field) + (__FIRST_PER_CPU_ENTRY*8))
+#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
+
+#define load_TR(cpu) asm volatile("ltr %w0"::"r" (__CPU_DESC_INDEX(cpu, tss)));
+#define __load_LDT(cpu) asm volatile("lldt %w0"::"r" (__CPU_DESC_INDEX(cpu, ldt)));
+#define clear_LDT(n) asm volatile("lldt %w0"::"r" (0))
+
+/*
+ * Guest OS must provide its own code selectors, or use the one we provide. The
+ * RPL must be 1, as we only create bounce frames to ring 1. Any LDT selector
+ * value is okay. Note that checking only the RPL is insufficient: if the
+ * selector is poked into an interrupt, trap or call gate then the RPL is
+ * ignored when the gate is accessed.
+ */
+#define VALID_SEL(_s) \
+ (((((_s)>>3) < FIRST_RESERVED_GDT_ENTRY) || \
+ (((_s)>>3) > LAST_RESERVED_GDT_ENTRY) || \
+ ((_s)&4)) && \
+ (((_s)&3) == 0))
+#define VALID_CODESEL(_s) ((_s) == FLAT_RING3_CS || VALID_SEL(_s))
+
+/* These are bitmasks for the first 32 bits of a descriptor table entry. */
+#define _SEGMENT_TYPE (15<< 8)
+#define _SEGMENT_S ( 1<<12) /* System descriptor (yes iff S==0) */
+#define _SEGMENT_DPL ( 3<<13) /* Descriptor Privilege Level */
+#define _SEGMENT_P ( 1<<15) /* Segment Present */
+#define _SEGMENT_G ( 1<<23) /* Granularity */
+
+#ifndef __ASSEMBLY__
+
+enum {
+ GATE_INTERRUPT = 0xE,
+ GATE_TRAP = 0xF,
+ GATE_CALL = 0xC,
+};
+
+// 16byte gate
+struct gate_struct {
+ u16 offset_low;
+ u16 segment;
+ unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
+ u16 offset_middle;
+ u32 offset_high;
+ u32 zero1;
+} __attribute__((packed));
+
+// 8 byte segment descriptor
+struct desc_struct {
+ u16 limit0;
+ u16 base0;
+ unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1;
+ unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8;
+} __attribute__((packed));
+
+// LDT or TSS descriptor in the GDT. 16 bytes.
+struct ldttss_desc {
+ u16 limit0;
+ u16 base0;
+ unsigned base1 : 8, type : 5, dpl : 2, p : 1;
+ unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8;
+ u32 base3;
+ u32 zero1;
+} __attribute__((packed));
+
+// Union of above structures
+union desc_union {
+ struct desc_struct seg;
+ struct ldttss_desc ldttss;
+ struct gate_struct gate;
+};
+
+struct per_cpu_gdt {
+ struct ldttss_desc tss;
+ struct ldttss_desc ldt;
+} ____cacheline_aligned;
+
+
+struct Xgt_desc_struct {
+ unsigned short size;
+ unsigned long address;
+} __attribute__((packed));
+
+extern __u8 gdt_table[];
+extern __u8 gdt_end[];
+extern union desc_union *gdt;
+
+extern struct per_cpu_gdt gdt_cpu_table[];
+
+#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF)
+#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF)
+#define PTR_HIGH(x) ((unsigned long)(x) >> 32)
+
+enum {
+ DESC_TSS = 0x9,
+ DESC_LDT = 0x2,
+};
+
+extern struct gate_struct *idt;
+
+#define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2))
+#define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2))
+
+extern void set_intr_gate(unsigned int irq, void * addr);
+extern void set_tss_desc(unsigned int n, void *addr);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
--- /dev/null
+#ifndef __ARCH_LDT_H
+#define __ARCH_LDT_H
+
+#ifndef __ASSEMBLY__
+
+static inline void load_LDT(struct task_struct *p)
+{
+ unsigned long ents;
+
+ if ( (ents = p->mm.ldt_ents) == 0 )
+ {
+ __asm__ __volatile__ ( "lldt %w0" : : "r" (0) );
+ }
+ else
+ {
+ unsigned int cpu;
+ struct ldttss_desc *desc;
+
+ cpu = smp_processor_id();
+ desc = (struct ldttss_desc *)((char *)GET_GDT_ADDRESS(p) + __CPU_DESC_INDEX(cpu, ldt));
+ desc->limit0 = ents*8-1;
+ desc->base0 = LDT_VIRT_START&0xffff;
+ desc->base1 = (LDT_VIRT_START&0xff0000)>>16;
+ desc->type = DESC_LDT;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit1 = 0;
+ desc->zero0 = 0;
+ desc->g = 0;
+ desc->base2 = (LDT_VIRT_START&0xff000000)>>24;
+ desc->base3 = LDT_VIRT_START>>32;
+ desc->zero1 = 0;
+ __load_LDT(cpu);
+ }
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
--- /dev/null
+#ifndef _X86_64_PAGE_H
+#define _X86_64_PAGE_H
+
+#define BUG() do { \
+ printk("BUG at %s:%d\n", __FILE__, __LINE__); \
+ __asm__ __volatile__("ud2"); \
+} while (0)
+
+#define __PHYSICAL_MASK 0x0000ffffffffffffUL
+#define PHYSICAL_PAGE_MASK 0x0000fffffffff000UL
+#define PTE_MASK PHYSICAL_PAGE_MASK
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT 12
+#ifdef __ASSEMBLY__
+#define PAGE_SIZE (0x1 << PAGE_SHIFT)
+#else
+#define PAGE_SIZE (1UL << PAGE_SHIFT)
+#endif
+#define PAGE_MASK (~(PAGE_SIZE-1))
+#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
+#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
+
+#define L1_PAGETABLE_SHIFT 12
+#define L2_PAGETABLE_SHIFT 21
+#define L3_PAGETABLE_SHIFT 30
+#define L4_PAGETABLE_SHIFT 39
+#define LARGE_PFN (LARGE_PAGE_SIZE / PAGE_SIZE)
+
+#define ENTRIES_PER_L1_PAGETABLE 512
+#define ENTRIES_PER_L2_PAGETABLE 512
+#define ENTRIES_PER_L3_PAGETABLE 512
+#define ENTRIES_PER_L4_PAGETABLE 512
+
+#define KERNEL_TEXT_SIZE (40UL*1024*1024)
+#define KERNEL_TEXT_START 0xffffffff80000000UL
+
+/* Changing the next two defines should be enough to increase the kernel stack */
+/* We still hope 8K is enough, but ... */
+#define THREAD_ORDER 1
+#define THREAD_SIZE (2*PAGE_SIZE)
+
+#define INIT_TASK_SIZE THREAD_SIZE
+#define CURRENT_MASK (~(THREAD_SIZE-1))
+
+#define clear_page(_p) memset((void *)(_p), 0, PAGE_SIZE)
+#define copy_page(_t,_f) memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
+
+#ifndef __ASSEMBLY__
+#include <xen/config.h>
+typedef struct { unsigned long l1_lo; } l1_pgentry_t;
+typedef struct { unsigned long l2_lo; } l2_pgentry_t;
+typedef struct { unsigned long l3_lo; } l3_pgentry_t;
+typedef struct { unsigned long l4_lo; } l4_pgentry_t;
+typedef l1_pgentry_t *l1_pagetable_t;
+typedef l2_pgentry_t *l2_pagetable_t;
+typedef l3_pgentry_t *l3_pagetable_t;
+typedef l4_pgentry_t *l4_pagetable_t;
+typedef struct { unsigned long pt_lo; } pagetable_t;
+typedef struct { unsigned long pgprot; } pgprot_t;
+#endif /* !__ASSEMBLY__ */
+
+/* Strip type from a table entry. */
+#define l1_pgentry_val(_x) ((_x).l1_lo)
+#define l2_pgentry_val(_x) ((_x).l2_lo)
+#define l3_pgentry_val(_x) ((_x).l3_lo)
+#define l4_pgentry_val(_x) ((_x).l4_lo)
+#define pagetable_val(_x) ((_x).pt_lo)
+
+#define alloc_l1_pagetable() ((l1_pgentry_t *)get_free_page(GFP_KERNEL))
+#define alloc_l2_pagetable() ((l2_pgentry_t *)get_free_page(GFP_KERNEL))
+#define alloc_l3_pagetable() ((l3_pgentry_t *)get_free_page(GFP_KERNEL))
+#define alloc_l4_pagetable() ((l4_pgentry_t *)get_free_page(GFP_KERNEL))
+
+/* Add type to a table entry. */
+#define mk_l1_pgentry(_x) ( (l1_pgentry_t) { (_x) } )
+#define mk_l2_pgentry(_x) ( (l2_pgentry_t) { (_x) } )
+#define mk_l3_pgentry(_x) ( (l3_pgentry_t) { (_x) } )
+#define mk_l4_pgentry(_x) ( (l4_pgentry_t) { (_x) } )
+#define mk_pagetable(_x) ( (pagetable_t) { (_x) } )
+
+/* Turn a typed table entry into a page index. */
+#define l1_pgentry_to_pagenr(_x) (l1_pgentry_val(_x) >> PAGE_SHIFT)
+#define l2_pgentry_to_pagenr(_x) (l2_pgentry_val(_x) >> PAGE_SHIFT)
+#define l3_pgentry_to_pagenr(_x) (l3_pgentry_val(_x) >> PAGE_SHIFT)
+#define l4_pgentry_to_pagenr(_x) (l4_pgentry_val(_x) >> PAGE_SHIFT)
+
+/* Turn a typed table entry into a physical address. */
+#define l1_pgentry_to_phys(_x) (l1_pgentry_val(_x) & PAGE_MASK)
+#define l2_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PAGE_MASK)
+#define l3_pgentry_to_phys(_x) (l3_pgentry_val(_x) & PAGE_MASK)
+#define l4_pgentry_to_phys(_x) (l4_pgentry_val(_x) & PAGE_MASK)
+
+/* Dereference a typed level-2 entry to yield a typed level-1 table. */
+#define l2_pgentry_to_l1(_x) \
+ ((l1_pgentry_t *)__va(l2_pgentry_val(_x) & PAGE_MASK))
+
+/* Dereference a typed level-4 entry to yield a typed level-3 table. */
+#define l4_pgentry_to_l3(_x) \
+ ((l3_pgentry_t *)__va(l4_pgentry_val(_x) & PAGE_MASK))
+
+/* Dereference a typed level-3 entry to yield a typed level-2 table. */
+#define l3_pgentry_to_l2(_x) \
+ ((l2_pgentry_t *)__va(l3_pgentry_val(_x) & PAGE_MASK))
+
+/* Given a virtual address, get an entry offset into a page table. */
+#define l1_table_offset(_a) \
+ (((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1))
+#define l2_table_offset(_a) \
+ (((_a) >> L2_PAGETABLE_SHIFT) & (ENTRIES_PER_L2_PAGETABLE - 1))
+#define l3_table_offset(_a) \
+ (((_a) >> L3_PAGETABLE_SHIFT) & (ENTRIES_PER_L3_PAGETABLE - 1))
+#define l4_table_offset(_a) \
+ ((_a) >> L4_PAGETABLE_SHIFT)
+
+/* Hypervisor table entries use zero to sugnify 'empty'. */
+#define l1_pgentry_empty(_x) (!l1_pgentry_val(_x))
+#define l2_pgentry_empty(_x) (!l2_pgentry_val(_x))
+#define l3_pgentry_empty(_x) (!l3_pgentry_val(_x))
+#define l4_pgentry_empty(_x) (!l4_pgentry_val(_x))
+
+
+#define pgprot_val(x) ((x).pgprot)
+#define __pgprot(x) ((pgprot_t) { (x) } )
+
+#define clear_user_page(page, vaddr) clear_page(page)
+#define copy_user_page(to, from, vaddr) copy_page(to, from)
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
+
+/*
+ * NB. We don't currently track I/O holes in the physical RAM space.
+ * For now we guess that I/O devices will be mapped in the first 1MB
+ * (e.g., VGA buffers) or beyond the end of physical RAM.
+ */
+#define pfn_is_ram(_pfn) (((_pfn) > 0x100) && ((_pfn) < max_page))
+
+/* High table entries are reserved by the hypervisor. */
+#define DOMAIN_ENTRIES_PER_L4_PAGETABLE \
+ (HYPERVISOR_VIRT_START >> L4_PAGETABLE_SHIFT)
+#define HYPERVISOR_ENTRIES_PER_L4_PAGETABLE \
+ (ENTRIES_PER_L4_PAGETABLE - DOMAIN_ENTRIES_PER_L4_PAGETABLE)
+
+#define __START_KERNEL 0xffffffff80100000
+#define __START_KERNEL_map 0xffffffff80000000
+#define __PAGE_OFFSET 0x0000010000000000
+#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
+
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+#include <asm/bitops.h>
+#include <asm/flushtlb.h>
+
+extern unsigned long vm_stack_flags, vm_stack_flags32;
+extern unsigned long vm_data_default_flags, vm_data_default_flags32;
+extern unsigned long vm_force_exec32;
+
+#define linear_pg_table ((l1_pgentry_t *)LINEAR_PT_VIRT_START)
+
+extern l2_pgentry_t idle_pg_table[ENTRIES_PER_L2_PAGETABLE];
+extern void paging_init(void);
+
+#define __flush_tlb() \
+ do { \
+ __asm__ __volatile__ ( \
+ "movl %%cr3, %%eax; movl %%eax, %%cr3" \
+ : : : "memory", "eax" ); \
+ tlb_clocktick(); \
+ } while ( 0 )
+
+/* Flush global pages as well. */
+
+#define __pge_off() \
+ do { \
+ __asm__ __volatile__( \
+ "movl %0, %%cr4; # turn off PGE " \
+ :: "r" (mmu_cr4_features & ~X86_CR4_PGE)); \
+ } while (0)
+
+#define __pge_on() \
+ do { \
+ __asm__ __volatile__( \
+ "movl %0, %%cr4; # turn off PGE " \
+ :: "r" (mmu_cr4_features)); \
+ } while (0)
+
+
+#define __flush_tlb_pge() \
+ do { \
+ __pge_off(); \
+ __flush_tlb(); \
+ __pge_on(); \
+ } while (0)
+
+#define __flush_tlb_one(__addr) \
+__asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr)))
+
+#include <xen/config.h>
+
+/*
+ * Tell the user there is some problem. The exception handler decodes this frame.
+ */
+struct bug_frame {
+ unsigned char ud2[2];
+ char *filename; /* should use 32bit offset instead, but the assembler doesn't like it */
+ unsigned short line;
+} __attribute__((packed));
+#define HEADER_BUG() asm volatile("ud2 ; .quad %P1 ; .short %P0" :: "i"(__LINE__), \
+ "i" (__stringify(__FILE__)))
+#define PAGE_BUG(page) BUG()
+
+#endif /* ASSEMBLY */
+
+#define _PAGE_PRESENT 0x001
+#define _PAGE_RW 0x002
+#define _PAGE_USER 0x004
+#define _PAGE_PWT 0x008
+#define _PAGE_PCD 0x010
+#define _PAGE_ACCESSED 0x020
+#define _PAGE_DIRTY 0x040
+#define _PAGE_PAT 0x080
+#define _PAGE_PSE 0x080
+#define _PAGE_GLOBAL 0x100
+
+#define __PAGE_HYPERVISOR \
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define __PAGE_HYPERVISOR_NOCACHE \
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED)
+#define __PAGE_HYPERVISOR_RO \
+ (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED)
+
+#define MAKE_GLOBAL(_x) ((_x) | _PAGE_GLOBAL)
+
+#define PAGE_HYPERVISOR MAKE_GLOBAL(__PAGE_HYPERVISOR)
+#define PAGE_HYPERVISOR_RO MAKE_GLOBAL(__PAGE_HYPERVISOR_RO)
+#define PAGE_HYPERVISOR_NOCACHE MAKE_GLOBAL(__PAGE_HYPERVISOR_NOCACHE)
+
+#define mk_l4_writeable(_p) \
+ (*(_p) = mk_l4_pgentry(l4_pgentry_val(*(_p)) | _PAGE_RW))
+#define mk_l4_readonly(_p) \
+ (*(_p) = mk_l4_pgentry(l4_pgentry_val(*(_p)) & ~_PAGE_RW))
+#define mk_l3_writeable(_p) \
+ (*(_p) = mk_l3_pgentry(l3_pgentry_val(*(_p)) | _PAGE_RW))
+#define mk_l3_readonly(_p) \
+ (*(_p) = mk_l3_pgentry(l3_pgentry_val(*(_p)) & ~_PAGE_RW))
+#define mk_l2_writeable(_p) \
+ (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) | _PAGE_RW))
+#define mk_l2_readonly(_p) \
+ (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) & ~_PAGE_RW))
+#define mk_l1_writeable(_p) \
+ (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) | _PAGE_RW))
+#define mk_l1_readonly(_p) \
+ (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) & ~_PAGE_RW))
+
+/* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol.
+ Otherwise you risk miscompilation. */
+#define __pa(x) (((unsigned long)(x)>=__START_KERNEL_map)?(unsigned long)(x) - (unsigned long)__START_KERNEL_map:(unsigned long)(x) - PAGE_OFFSET)
+/* __pa_symbol should use for C visible symbols, but only for them.
+ This seems to be the official gcc blessed way to do such arithmetic. */
+#define __pa_symbol(x) \
+ ({unsigned long v; \
+ asm("" : "=r" (v) : "0" (x)); \
+ v - __START_KERNEL_map; })
+#define __pa_maybe_symbol(x) \
+ ({unsigned long v; \
+ asm("" : "=r" (v) : "0" (x)); \
+ __pa(v); })
+#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
+#ifndef CONFIG_DISCONTIGMEM
+#define virt_to_page(kaddr) (frame_table + (__pa(kaddr) >> PAGE_SHIFT))
+#define pfn_to_page(pfn) (frame_table + (pfn))
+#define page_address(_p) (__va(((_p) - frame_table) << PAGE_SHIFT))
+#define VALID_PAGE(page) (((page) - frame_table) < max_mapnr)
+#endif
+
+#ifndef __ASSEMBLY__
+static __inline__ int get_order(unsigned long size)
+{
+ int order;
+
+ size = (size-1) >> (PAGE_SHIFT-1);
+ order = -1;
+ do {
+ size >>= 1;
+ order++;
+ } while (size);
+ return order;
+}
+#endif
+
+#define phys_to_pfn(phys) ((phys) >> PAGE_SHIFT)
+
+#define __VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define __VM_STACK_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | VM_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_DATA_DEFAULT_FLAGS \
+ ((current->thread.flags & THREAD_IA32) ? vm_data_default_flags32 : \
+ vm_data_default_flags)
+#define VM_STACK_FLAGS vm_stack_flags
+
+#endif /* _X86_64_PAGE_H */
--- /dev/null
+#ifndef X86_64_PDA_H
+#define X86_64_PDA_H
+
+#include <xen/cache.h>
+
+/* Per processor datastructure. %gs points to it while the kernel runs */
+/* To use a new field with the *_pda macros it needs to be added to tools/offset.c */
+struct x8664_pda {
+ unsigned long kernelstack; /* TOS for current process */
+ unsigned long oldrsp; /* user rsp for system call */
+ unsigned long irqrsp; /* Old rsp for interrupts. */
+ struct task_struct *pcurrent; /* Current process */
+ int irqcount; /* Irq nesting counter. Starts with -1 */
+ int cpunumber; /* Logical CPU number */
+ /* XXX: could be a single list */
+ unsigned long *pgd_quick;
+ unsigned long *pmd_quick;
+ unsigned long *pte_quick;
+ unsigned long pgtable_cache_sz;
+ char *irqstackptr; /* top of irqstack */
+ unsigned long volatile *level4_pgt;
+} ____cacheline_aligned;
+
+#define PDA_STACKOFFSET (5*8)
+
+#define IRQSTACK_ORDER 2
+#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER)
+
+extern struct x8664_pda cpu_pda[];
+
+/*
+ * There is no fast way to get the base address of the PDA, all the accesses
+ * have to mention %fs/%gs. So it needs to be done this Torvaldian way.
+ */
+#define sizeof_field(type,field) (sizeof(((type *)0)->field))
+#define typeof_field(type,field) typeof(((type *)0)->field)
+
+extern void __bad_pda_field(void);
+/* Don't use offsetof because it requires too much infrastructure */
+#define pda_offset(field) ((unsigned long)&((struct x8664_pda *)0)->field)
+
+#define pda_to_op(op,field,val) do { \
+ switch (sizeof_field(struct x8664_pda, field)) { \
+ case 2: asm volatile(op "w %0,%%gs:%P1" :: "r" (val), "i"(pda_offset(field)):"memory"); break; \
+ case 4: asm volatile(op "l %0,%%gs:%P1" :: "r" (val), "i"(pda_offset(field)):"memory"); break; \
+ case 8: asm volatile(op "q %0,%%gs:%P1" :: "r" (val), "i"(pda_offset(field)):"memory"); break; \
+ default: __bad_pda_field(); \
+ } \
+ } while (0)
+
+
+#define pda_from_op(op,field) ({ \
+ typedef typeof_field(struct x8664_pda, field) T__; T__ ret__; \
+ switch (sizeof_field(struct x8664_pda, field)) { \
+ case 2: asm volatile(op "w %%gs:%P1,%0":"=r" (ret__): "i" (pda_offset(field)):"memory"); break; \
+ case 4: asm volatile(op "l %%gs:%P1,%0":"=r" (ret__): "i" (pda_offset(field)):"memory"); break; \
+ case 8: asm volatile(op "q %%gs:%P1,%0":"=r" (ret__): "i" (pda_offset(field)):"memory"); break; \
+ default: __bad_pda_field(); \
+ } \
+ ret__; })
+
+
+#define read_pda(field) pda_from_op("mov",field)
+#define write_pda(field,val) pda_to_op("mov",field,val)
+#define add_pda(field,val) pda_to_op("add",field,val)
+#define sub_pda(field,val) pda_to_op("sub",field,val)
+
+#endif
--- /dev/null
+/*
+ * include/asm-x86_64/processor.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ */
+
+#ifndef __ASM_X86_64_PROCESSOR_H
+#define __ASM_X86_64_PROCESSOR_H
+
+#include <asm/page.h>
+#include <asm/types.h>
+#include <asm/cpufeature.h>
+#include <asm/desc.h>
+#include <xen/config.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+
+struct task_struct;
+
+#define TF_MASK 0x00000100
+#define IF_MASK 0x00000200
+#define IOPL_MASK 0x00003000
+#define NT_MASK 0x00004000
+#define VM_MASK 0x00020000
+#define AC_MASK 0x00040000
+#define VIF_MASK 0x00080000 /* virtual interrupt flag */
+#define VIP_MASK 0x00100000 /* virtual interrupt pending */
+#define ID_MASK 0x00200000
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr() ({ void *pc; asm volatile("leaq 1f(%%rip),%0\n1:":"=r"(pc)); pc; })
+
+/*
+ * CPU type and hardware bug flags. Kept separately for each CPU.
+ * Members of this structure are referenced in head.S, so think twice
+ * before touching them. [mj]
+ */
+
+struct cpuinfo_x86 {
+ __u8 x86; /* CPU family */
+ __u8 x86_vendor; /* CPU vendor */
+ __u8 x86_model;
+ __u8 x86_mask;
+ int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */
+ __u32 x86_capability[NCAPINTS];
+ char x86_vendor_id[16];
+ char x86_model_id[64];
+ int x86_cache_size; /* in KB - valid for CPUS which support this
+ call */
+ int x86_clflush_size;
+ int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/
+ __u8 x86_virt_bits, x86_phys_bits;
+ __u32 x86_power;
+ unsigned long loops_per_jiffy;
+} ____cacheline_aligned;
+
+#define X86_VENDOR_INTEL 0
+#define X86_VENDOR_CYRIX 1
+#define X86_VENDOR_AMD 2
+#define X86_VENDOR_UMC 3
+#define X86_VENDOR_NEXGEN 4
+#define X86_VENDOR_CENTAUR 5
+#define X86_VENDOR_RISE 6
+#define X86_VENDOR_TRANSMETA 7
+#define X86_VENDOR_UNKNOWN 0xff
+
+/*
+ * capabilities of CPUs
+ */
+
+extern struct cpuinfo_x86 boot_cpu_data;
+extern struct tss_struct init_tss[NR_CPUS];
+
+#ifdef CONFIG_SMP
+extern struct cpuinfo_x86 cpu_data[];
+#define current_cpu_data cpu_data[smp_processor_id()]
+#else
+#define cpu_data (&boot_cpu_data)
+#define current_cpu_data boot_cpu_data
+#endif
+
+#define cpu_has_pge 1
+#define cpu_has_pse 1
+#define cpu_has_pae 1
+#define cpu_has_tsc 1
+#define cpu_has_de 1
+#define cpu_has_vme 1
+#define cpu_has_fxsr 1
+#define cpu_has_xmm 1
+#define cpu_has_apic (test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability))
+
+extern char ignore_irq13;
+
+extern void identify_cpu(struct cpuinfo_x86 *);
+extern void print_cpu_info(struct cpuinfo_x86 *);
+extern void dodgy_tsc(void);
+
+/*
+ * EFLAGS bits
+ */
+#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
+#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
+#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */
+#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
+#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
+#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
+#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
+#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
+#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
+#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
+#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
+#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
+#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
+#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
+#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
+#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
+#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
+
+/*
+ * Generic CPUID function
+ * FIXME: This really belongs to msr.h
+ */
+extern inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
+{
+ __asm__("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (op));
+}
+
+/*
+ * CPUID functions returning a single datum
+ */
+extern inline unsigned int cpuid_eax(unsigned int op)
+{
+ unsigned int eax;
+
+ __asm__("cpuid"
+ : "=a" (eax)
+ : "0" (op)
+ : "bx", "cx", "dx");
+ return eax;
+}
+extern inline unsigned int cpuid_ebx(unsigned int op)
+{
+ unsigned int eax, ebx;
+
+ __asm__("cpuid"
+ : "=a" (eax), "=b" (ebx)
+ : "0" (op)
+ : "cx", "dx" );
+ return ebx;
+}
+extern inline unsigned int cpuid_ecx(unsigned int op)
+{
+ unsigned int eax, ecx;
+
+ __asm__("cpuid"
+ : "=a" (eax), "=c" (ecx)
+ : "0" (op)
+ : "bx", "dx" );
+ return ecx;
+}
+extern inline unsigned int cpuid_edx(unsigned int op)
+{
+ unsigned int eax, edx;
+
+ __asm__("cpuid"
+ : "=a" (eax), "=d" (edx)
+ : "0" (op)
+ : "bx", "cx");
+ return edx;
+}
+
+
+/*
+ * Intel CPU flags in CR0
+ */
+#define X86_CR0_PE 0x00000001 /* Enable Protected Mode (RW) */
+#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor (RW) */
+#define X86_CR0_EM 0x00000004 /* Require FPU Emulation (RO) */
+#define X86_CR0_TS 0x00000008 /* Task Switched (RW) */
+#define X86_CR0_NE 0x00000020 /* Numeric Error Reporting (RW) */
+#define X86_CR0_WP 0x00010000 /* Supervisor Write Protect (RW) */
+#define X86_CR0_AM 0x00040000 /* Alignment Checking (RW) */
+#define X86_CR0_NW 0x20000000 /* Not Write-Through (RW) */
+#define X86_CR0_CD 0x40000000 /* Cache Disable (RW) */
+#define X86_CR0_PG 0x80000000 /* Paging (RW) */
+
+#define read_cr0() ({ \
+ unsigned long __dummy; \
+ __asm__( \
+ "movq %%cr0,%0\n\t" \
+ :"=r" (__dummy)); \
+ __dummy; \
+})
+
+#define write_cr0(x) \
+ __asm__("movq %0,%%cr0": :"r" (x));
+
+
+
+/*
+ * Intel CPU features in CR4
+ */
+#define X86_CR4_VME 0x0001 /* enable vm86 extensions */
+#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
+#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
+#define X86_CR4_DE 0x0008 /* enable debugging extensions */
+#define X86_CR4_PSE 0x0010 /* enable page size extensions */
+#define X86_CR4_PAE 0x0020 /* enable physical address extensions */
+#define X86_CR4_MCE 0x0040 /* Machine check enable */
+#define X86_CR4_PGE 0x0080 /* enable global pages */
+#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
+#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */
+#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */
+
+/*
+ * Save the cr4 feature set we're using (ie
+ * Pentium 4MB enable and PPro Global page
+ * enable), so that any CPU's that boot up
+ * after us can get the correct flags.
+ */
+extern unsigned long mmu_cr4_features;
+
+static inline void set_in_cr4 (unsigned long mask)
+{
+ mmu_cr4_features |= mask;
+ __asm__("movq %%cr4,%%rax\n\t"
+ "orq %0,%%rax\n\t"
+ "movq %%rax,%%cr4\n"
+ : : "irg" (mask)
+ :"ax");
+}
+
+static inline void clear_in_cr4 (unsigned long mask)
+{
+ mmu_cr4_features &= ~mask;
+ __asm__("movq %%cr4,%%rax\n\t"
+ "andq %0,%%rax\n\t"
+ "movq %%rax,%%cr4\n"
+ : : "irg" (~mask)
+ :"ax");
+}
+
+/*
+ * Cyrix CPU configuration register indexes
+ */
+#define CX86_CCR0 0xc0
+#define CX86_CCR1 0xc1
+#define CX86_CCR2 0xc2
+#define CX86_CCR3 0xc3
+#define CX86_CCR4 0xe8
+#define CX86_CCR5 0xe9
+#define CX86_CCR6 0xea
+#define CX86_CCR7 0xeb
+#define CX86_DIR0 0xfe
+#define CX86_DIR1 0xff
+#define CX86_ARR_BASE 0xc4
+#define CX86_RCR_BASE 0xdc
+
+/*
+ * Cyrix CPU indexed register access macros
+ */
+
+#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); })
+
+#define setCx86(reg, data) do { \
+ outb((reg), 0x22); \
+ outb((data), 0x23); \
+} while (0)
+
+/*
+ * Bus types
+ */
+#define EISA_bus 0
+#define MCA_bus 0
+#define MCA_bus__is_a_macro
+
+
+/*
+ * User space process size: 512GB - 1GB (default).
+ */
+#define TASK_SIZE (0x0000007fc0000000)
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_32 0xa0000000
+#define TASK_UNMAPPED_64 (TASK_SIZE/3)
+#define TASK_UNMAPPED_BASE \
+ ((current->thread.flags & THREAD_IA32) ? TASK_UNMAPPED_32 : TASK_UNMAPPED_64)
+
+/*
+ * Size of io_bitmap in longwords: 32 is ports 0-0x3ff.
+ */
+#define IO_BITMAP_SIZE 32
+#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
+#define INVALID_IO_BITMAP_OFFSET 0x8000
+
+struct i387_fxsave_struct {
+ u16 cwd;
+ u16 swd;
+ u16 twd;
+ u16 fop;
+ u64 rip;
+ u64 rdp;
+ u32 mxcsr;
+ u32 mxcsr_mask;
+ u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
+ u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 128 bytes */
+ u32 padding[24];
+} __attribute__ ((aligned (16)));
+
+union i387_union {
+ struct i387_fxsave_struct fxsave;
+};
+
+typedef struct {
+ unsigned long seg;
+} mm_segment_t;
+
+struct tss_struct {
+ unsigned short back_link,__blh;
+/* u32 reserved1; */
+ u64 rsp0;
+ u64 rsp1;
+ u64 rsp2;
+ u64 reserved2;
+ u64 ist[7];
+ u32 reserved3;
+ u32 reserved4;
+ u16 reserved5;
+ u16 io_map_base;
+ u32 io_bitmap[IO_BITMAP_SIZE];
+} __attribute__((packed)) ____cacheline_aligned;
+
+struct thread_struct {
+ unsigned long guestos_sp;
+ unsigned long guestos_ss;
+ unsigned long rip;
+ unsigned long rsp;
+ unsigned long userrsp; /* Copy from PDA */
+ unsigned long fs;
+ unsigned long gs;
+ unsigned short es, ds, fsindex, gsindex;
+ enum {
+ THREAD_IA32 = 0x0001,
+ } flags;
+/* Hardware debugging registers */
+ unsigned long debugreg[8]; /* %%db0-7 debug registers */
+/* floating point info */
+ union i387_union i387;
+/* Trap info. */
+ trap_info_t traps[256];
+};
+
+#define IDT_ENTRIES 256
+extern struct gate_struct idt_table[];
+extern struct gate_struct *idt_tables[];
+
+#define INIT_THREAD { \
+ 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, /* flags */ \
+ { [0 ... 7] = 0 }, /* debugging registers */ \
+ { { 0, }, }, /* 387 state */ \
+ { {0} } /* io permissions */ \
+}
+
+#define INIT_TSS { \
+ 0,0, /* back_link, __blh */ \
+ 0, /* rsp0 */ \
+ 0, 0, /* rsp1, rsp2 */ \
+ 0, /* reserved */ \
+ { [0 ... 6] = 0 }, /* ist[] */ \
+ 0,0, /* reserved */ \
+ 0, INVALID_IO_BITMAP_OFFSET, /* trace, bitmap */ \
+ {~0, } /* ioperm */ \
+}
+
+struct mm_struct {
+ /*
+ * Every domain has a L1 pagetable of its own. Per-domain mappings
+ * are put in this table (eg. the current GDT is mapped here).
+ */
+ l1_pgentry_t *perdomain_pt;
+ pagetable_t pagetable;
+ /* Current LDT details. */
+ unsigned long ldt_base, ldt_ents, shadow_ldt_mapcnt;
+ /* Next entry is passed to LGDT on domain switch. */
+ char gdt[10];
+};
+
+#define IDLE0_MM \
+{ \
+ perdomain_pt: 0, \
+ pagetable: mk_pagetable(__pa(idle_pg_table)) \
+}
+
+/* Convenient accessor for mm.gdt. */
+#define SET_GDT_ENTRIES(_p, _e) ((*(u16 *)((_p)->mm.gdt + 0)) = (_e))
+#define SET_GDT_ADDRESS(_p, _a) ((*(u64 *)((_p)->mm.gdt + 2)) = (_a))
+#define GET_GDT_ENTRIES(_p) ((*(u16 *)((_p)->mm.gdt + 0)))
+#define GET_GDT_ADDRESS(_p) ((*(u64 *)((_p)->mm.gdt + 2)))
+
+long set_gdt(struct task_struct *p,
+ unsigned long *frames,
+ unsigned int entries);
+
+long set_debugreg(struct task_struct *p, int reg, unsigned long value);
+
+struct microcode {
+ unsigned int hdrver;
+ unsigned int rev;
+ unsigned int date;
+ unsigned int sig;
+ unsigned int cksum;
+ unsigned int ldrver;
+ unsigned int pf;
+ unsigned int reserved[5];
+ unsigned int bits[500];
+};
+
+/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */
+#define MICROCODE_IOCFREE _IO('6',0)
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static inline void rep_nop(void)
+{
+ __asm__ __volatile__("rep;nop");
+}
+
+#define cpu_relax() rep_nop()
+
+#define init_task (init_task_union.task)
+#define init_stack (init_task_union.stack)
+
+/* Avoid speculative execution by the CPU */
+extern inline void sync_core(void)
+{
+ int tmp;
+ asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory");
+}
+
+#define cpu_has_fpu 1
+
+#define ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCHW
+#define ARCH_HAS_SPINLOCK_PREFETCH
+
+#define prefetch(x) __builtin_prefetch((x),0)
+#define prefetchw(x) __builtin_prefetch((x),1)
+#define spin_lock_prefetch(x) prefetchw(x)
+#define cpu_relax() rep_nop()
+
+
+#endif /* __ASM_X86_64_PROCESSOR_H */
--- /dev/null
+#ifndef _X86_64_PTRACE_H
+#define _X86_64_PTRACE_H
+
+#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
+#define R15 0
+#define R14 8
+#define R13 16
+#define R12 24
+#define RBP 36
+#define RBX 40
+/* arguments: interrupts/non tracing syscalls only save upto here*/
+#define R11 48
+#define R10 56
+#define R9 64
+#define R8 72
+#define RAX 80
+#define RCX 88
+#define RDX 96
+#define RSI 104
+#define RDI 112
+#define ORIG_RAX 120 /* = ERROR */
+/* end of arguments */
+/* cpu exception frame or undefined in case of fast syscall. */
+#define RIP 128
+#define CS 136
+#define EFLAGS 144
+#define RSP 152
+#define SS 160
+#define ARGOFFSET R11
+#endif /* __ASSEMBLY__ */
+
+/* top of stack page */
+#define FRAME_SIZE 168
+
+#define PTRACE_SETOPTIONS 21
+
+/* options set using PTRACE_SETOPTIONS */
+#define PTRACE_O_TRACESYSGOOD 0x00000001
+
+/* Dummy values for ptrace */
+#define FS 1000
+#define GS 1008
+
+#ifndef __ASSEMBLY__
+
+struct pt_regs {
+ unsigned long r15;
+ unsigned long r14;
+ unsigned long r13;
+ unsigned long r12;
+ unsigned long rbp;
+ unsigned long rbx;
+/* arguments: non interrupts/non tracing syscalls only save upto here*/
+ unsigned long r11;
+ unsigned long r10;
+ unsigned long r9;
+ unsigned long r8;
+ unsigned long rax;
+ unsigned long rcx;
+ unsigned long rdx;
+ unsigned long rsi;
+ unsigned long rdi;
+ unsigned long orig_rax;
+/* end of arguments */
+/* cpu exception frame or undefined */
+ unsigned long rip;
+ unsigned long cs;
+ unsigned long eflags;
+ unsigned long rsp;
+ unsigned long ss;
+/* top of stack page */
+};
+
+#endif
+
+/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
+#define PTRACE_GETREGS 12
+#define PTRACE_SETREGS 13
+#define PTRACE_GETFPREGS 14
+#define PTRACE_SETFPREGS 15
+#define PTRACE_GETFPXREGS 18
+#define PTRACE_SETFPXREGS 19
+
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#define user_mode(regs) (!!((regs)->cs & 3))
+#define instruction_pointer(regs) ((regs)->rip)
+extern void show_regs(struct pt_regs *);
+
+enum {
+ EF_CF = 0x00000001,
+ EF_PF = 0x00000004,
+ EF_AF = 0x00000010,
+ EF_ZF = 0x00000040,
+ EF_SF = 0x00000080,
+ EF_TF = 0x00000100,
+ EF_IE = 0x00000200,
+ EF_DF = 0x00000400,
+ EF_OF = 0x00000800,
+ EF_IOPL = 0x00003000,
+ EF_IOPL_RING0 = 0x00000000,
+ EF_IOPL_RING1 = 0x00001000,
+ EF_IOPL_RING2 = 0x00002000,
+ EF_NT = 0x00004000, /* nested task */
+ EF_RF = 0x00010000, /* resume */
+ EF_VM = 0x00020000, /* virtual mode */
+ EF_AC = 0x00040000, /* alignment */
+ EF_VIF = 0x00080000, /* virtual interrupt */
+ EF_VIP = 0x00100000, /* virtual interrupt pending */
+ EF_ID = 0x00200000, /* id */
+};
+
+#endif
+
+#endif
--- /dev/null
+#ifndef __X86_64_UACCESS_H
+#define __X86_64_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <xen/prefetch.h>
+#include <xen/errno.h>
+#include <asm/page.h>
+
+#define VERIFY_READ 0
+#define VERIFY_WRITE 1
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not. If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+
+#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
+
+#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFFFFFFFFFF)
+#define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
+
+#define get_ds() (KERNEL_DS)
+#define get_fs() (current->addr_limit)
+#define set_fs(x) (current->addr_limit = (x))
+
+#define segment_eq(a,b) ((a).seg == (b).seg)
+
+#define __addr_ok(addr) (!((unsigned long)(addr) & (current->addr_limit.seg)))
+
+/*
+ * Uhhuh, this needs 65-bit arithmetic. We have a carry..
+ */
+#define __range_not_ok(addr,size) ({ \
+ unsigned long flag,sum; \
+ asm("# range_ok\n\r" \
+ "addq %3,%1 ; sbbq %0,%0 ; cmpq %1,%4 ; sbbq $0,%0" \
+ :"=&r" (flag), "=r" (sum) \
+ :"1" (addr),"g" ((long)(size)),"g" (current->addr_limit.seg)); \
+ flag; })
+
+#define access_ok(type,addr,size) (__range_not_ok(addr,size) == 0)
+
+extern inline int verify_area(int type, const void * addr, unsigned long size)
+{
+ return access_ok(type,addr,size) ? 0 : -EFAULT;
+}
+
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue. No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path. This means when everything is well,
+ * we don't even have to jump over them. Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry
+{
+ unsigned long insn, fixup;
+};
+
+
+/*
+ * These are the main single-value transfer routines. They automatically
+ * use the right size if we just have the right pointer type.
+ *
+ * This gets kind of ugly. We want to return _two_ values in "get_user()"
+ * and yet we don't want to do any pointers, because that is too much
+ * of a performance impact. Thus we have a few rather ugly macros here,
+ * and hide all the ugliness from the user.
+ *
+ * The "__xxx" versions of the user access functions are versions that
+ * do not verify the address space, that must have been done previously
+ * with a separate "access_ok()" call (this is used when we do multiple
+ * accesses to the same area of user memory).
+ */
+
+extern void __get_user_1(void);
+extern void __get_user_2(void);
+extern void __get_user_4(void);
+extern void __get_user_8(void);
+
+#define __get_user_x(size,ret,x,ptr) \
+ __asm__ __volatile__("call __get_user_" #size \
+ :"=a" (ret),"=d" (x) \
+ :"0" (ptr) \
+ :"rbx")
+
+/* Careful: we have to cast the result to the type of the pointer for sign reasons */
+#define get_user(x,ptr) \
+({ long __val_gu; \
+ int __ret_gu=1; \
+ switch(sizeof (*(ptr))) { \
++ case 1: __ret_gu=copy_from_user(&__val_gu,ptr,1);break; \
++ case 2: __ret_gu=copy_from_user(&__val_gu,ptr,2);break; \
++ case 4: __ret_gu=copy_from_user(&__val_gu,ptr,4);break; \
++ case 8: __ret_gu=copy_from_user(&__val_gu,ptr,8);break; \
++ default: __ret_gu=copy_from_user(&__val_gu,ptr,sizeof(*(ptr)));break;\
+ /*case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break;*/ \
+ /*case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break;*/ \
+ /*case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break;*/ \
+ /*case 8: __get_user_x(8,__ret_gu,__val_gu,ptr); break;*/ \
+ /*default: __get_user_bad(); break;*/ \
+ } \
+ (x) = (__typeof__(*(ptr)))__val_gu; \
+ __ret_gu; \
+})
+
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+extern void __put_user_bad(void);
+
+#define __put_user_x(size,ret,x,ptr) \
+ __asm__ __volatile__("call __put_user_" #size \
+ :"=a" (ret) \
+ :"0" (ptr),"d" (x) \
+ :"rbx")
+
+#define put_user(x,ptr) \
+ __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+
+#define __get_user(x,ptr) \
+ __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
+#define __put_user(x,ptr) \
+ __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+
+#define __put_user_nocheck(x,ptr,size) \
+({ \
+ int __pu_err; \
+ __put_user_size((x),(ptr),(size),__pu_err); \
+ __pu_err; \
+})
+
+
+#define __put_user_check(x,ptr,size) \
+({ \
+ int __pu_err = -EFAULT; \
+ __typeof__(*(ptr)) *__pu_addr = (ptr); \
+ if (access_ok(VERIFY_WRITE,__pu_addr,size)) \
+ __put_user_size((x),__pu_addr,(size),__pu_err); \
+ __pu_err; \
+})
+
+#define __put_user_size(x,ptr,size,retval) \
+do { \
+ retval = 0; \
+ switch (size) { \
+ case 1: __put_user_asm(x,ptr,retval,"b","b","iq",-EFAULT); break;\
+ case 2: __put_user_asm(x,ptr,retval,"w","w","ir",-EFAULT); break;\
+ case 4: __put_user_asm(x,ptr,retval,"l","k","ir",-EFAULT); break;\
+ case 8: __put_user_asm(x,ptr,retval,"q","","ir",-EFAULT); break;\
+ default: __put_user_bad(); \
+ } \
+} while (0)
+
+/* FIXME: this hack is definitely wrong -AK */
+struct __large_struct { unsigned long buf[100]; };
+#define __m(x) (*(struct __large_struct *)(x))
+
+/*
+ * Tell gcc we read from memory instead of writing: this is because
+ * we do not write to any memory gcc knows about, so there are no
+ * aliasing issues.
+ */
+#define __put_user_asm(x, addr, err, itype, rtype, ltype, errno) \
+ __asm__ __volatile__( \
+ "1: mov"itype" %"rtype"1,%2\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: mov %3,%0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 8\n" \
+ " .quad 1b,3b\n" \
+ ".previous" \
+ : "=r"(err) \
+ : ltype (x), "m"(__m(addr)), "i"(errno), "0"(err))
+
+
+#define __get_user_nocheck(x,ptr,size) \
+({ \
+ int __gu_err; \
+ long __gu_val; \
+ __get_user_size(__gu_val,(ptr),(size),__gu_err); \
+ (x) = (__typeof__(*(ptr)))__gu_val; \
+ __gu_err; \
+})
+
+extern int __get_user_bad(void);
+
+#define __get_user_size(x,ptr,size,retval) \
+do { \
+ retval = 0; \
+ switch (size) { \
+ case 1: __get_user_asm(x,ptr,retval,"b","b","=q",-EFAULT); break;\
+ case 2: __get_user_asm(x,ptr,retval,"w","w","=r",-EFAULT); break;\
+ case 4: __get_user_asm(x,ptr,retval,"l","k","=r",-EFAULT); break;\
+ case 8: __get_user_asm(x,ptr,retval,"q","","=r",-EFAULT); break;\
+ default: (x) = __get_user_bad(); \
+ } \
+} while (0)
+
+#define __get_user_asm(x, addr, err, itype, rtype, ltype, errno) \
+ __asm__ __volatile__( \
+ "1: mov"itype" %2,%"rtype"1\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: mov %3,%0\n" \
+ " xor"itype" %"rtype"1,%"rtype"1\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 8\n" \
+ " .quad 1b,3b\n" \
+ ".previous" \
+ : "=r"(err), ltype (x) \
+ : "m"(__m(addr)), "i"(errno), "0"(err))
+
+/*
+ * Copy To/From Userspace
+ */
+
+/* Handles exceptions in both to and from, but doesn't do access_ok */
+extern unsigned long copy_user_generic(void *to, const void *from, unsigned len);
+
+extern unsigned long copy_to_user(void *to, const void *from, unsigned len);
+extern unsigned long copy_from_user(void *to, const void *from, unsigned len);
+
+static inline int __copy_from_user(void *dst, const void *src, unsigned size)
+{
+ if (!__builtin_constant_p(size))
+ return copy_user_generic(dst,src,size);
+ int ret = 0;
+ switch (size) {
+ case 1:__get_user_asm(*(u8*)dst,(u8 *)src,ret,"b","b","=q",1);
+ return ret;
+ case 2:__get_user_asm(*(u16*)dst,(u16*)src,ret,"w","w","=r",2);
+ return ret;
+ case 4:__get_user_asm(*(u32*)dst,(u32*)src,ret,"l","k","=r",4);
+ return ret;
+ case 8:__get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",8);
+ return ret;
+ case 10:
+ __get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",16);
+ if (ret) return ret;
+ __get_user_asm(*(u16*)(8+dst),(u16*)(8+src),ret,"w","w","=r",2);
+ return ret;
+ case 16:
+ __get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",16);
+ if (ret) return ret;
+ __get_user_asm(*(u64*)(8+dst),(u64*)(8+src),ret,"q","","=r",8);
+ return ret;
+ default:
+ return copy_user_generic(dst,src,size);
+ }
+}
+
+static inline int __copy_to_user(void *dst, const void *src, unsigned size)
+{
+ if (!__builtin_constant_p(size))
+ return copy_user_generic(dst,src,size);
+ int ret = 0;
+ switch (size) {
+ case 1:__put_user_asm(*(u8*)src,(u8 *)dst,ret,"b","b","iq",1);
+ return ret;
+ case 2:__put_user_asm(*(u16*)src,(u16*)dst,ret,"w","w","ir",2);
+ return ret;
+ case 4:__put_user_asm(*(u32*)src,(u32*)dst,ret,"l","k","ir",4);
+ return ret;
+ case 8:__put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",8);
+ return ret;
+ case 10:
+ __put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",10);
+ if (ret) return ret;
+ asm("":::"memory");
+ __put_user_asm(4[(u16*)src],4+(u16*)dst,ret,"w","w","ir",2);
+ return ret;
+ case 16:
+ __put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",16);
+ if (ret) return ret;
+ asm("":::"memory");
+ __put_user_asm(1[(u64*)src],1+(u64*)dst,ret,"q","","ir",8);
+ return ret;
+ default:
+ return copy_user_generic(dst,src,size);
+ }
+}
+
+long strncpy_from_user(char *dst, const char *src, long count);
+long __strncpy_from_user(char *dst, const char *src, long count);
+long strnlen_user(const char *str, long n);
+long strlen_user(const char *str);
+unsigned long clear_user(void *mem, unsigned long len);
+unsigned long __clear_user(void *mem, unsigned long len);
+
+extern unsigned long search_exception_table(unsigned long);
+
+#endif /* __X86_64_UACCESS_H */
+++ /dev/null
-#ifndef __ASM_APIC_H
-#define __ASM_APIC_H
-
-#include <xen/config.h>
-#include <asm/ptrace.h>
-#include <asm/apicdef.h>
-#include <asm/system.h>
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
-#define APIC_DEBUG 0
-
-#if APIC_DEBUG
-#define Dprintk(x...) printk(x)
-#else
-#define Dprintk(x...)
-#endif
-
-/*
- * Basic functions accessing APICs.
- */
-
-static __inline void apic_write(unsigned long reg, unsigned int v)
-{
- *((volatile unsigned int *)(APIC_BASE+reg)) = v;
- barrier();
-}
-
-static __inline void apic_write_atomic(unsigned long reg, unsigned int v)
-{
- xchg((volatile unsigned int *)(APIC_BASE+reg), v);
-}
-
-static __inline unsigned int apic_read(unsigned long reg)
-{
- return *((volatile unsigned int *)(APIC_BASE+reg));
-}
-
-static __inline__ void apic_wait_icr_idle(void)
-{
- do { } while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY );
-}
-
-#ifdef CONFIG_X86_GOOD_APIC
-# define FORCE_READ_AROUND_WRITE 0
-# define apic_read_around(x)
-# define apic_write_around(x,y) apic_write((x),(y))
-#else
-# define FORCE_READ_AROUND_WRITE 1
-# define apic_read_around(x) apic_read(x)
-# define apic_write_around(x,y) apic_write_atomic((x),(y))
-#endif
-
-static inline void ack_APIC_irq(void)
-{
- /*
- * ack_APIC_irq() actually gets compiled as a single instruction:
- * - a single rmw on Pentium/82489DX
- * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
- * ... yummie.
- */
-
- /* Docs say use 0 for future compatibility */
- apic_write_around(APIC_EOI, 0);
-}
-
-extern int get_maxlvt(void);
-extern void connect_bsp_APIC (void);
-extern void disconnect_bsp_APIC (void);
-extern void disable_local_APIC (void);
-extern int verify_local_APIC (void);
-extern void sync_Arb_IDs (void);
-extern void init_bsp_APIC (void);
-extern void setup_local_APIC (void);
-extern void init_apic_mappings (void);
-extern void setup_APIC_clocks (void);
-extern void setup_apic_nmi_watchdog (void);
-extern inline void nmi_watchdog_tick (struct pt_regs * regs);
-extern int APIC_init_uniprocessor (void);
-extern void disable_APIC_timer(void);
-extern void enable_APIC_timer(void);
-
-//extern struct pm_dev *apic_pm_register(pm_dev_t, unsigned long, pm_callback);
-//extern void apic_pm_unregister(struct pm_dev*);
-
-extern unsigned int watchdog_on;
-
-extern unsigned int apic_timer_irqs [NR_CPUS];
-extern int check_nmi_watchdog (void);
-
-extern unsigned int nmi_watchdog;
-#define NMI_NONE 0
-#define NMI_IO_APIC 1
-#define NMI_LOCAL_APIC 2
-#define NMI_INVALID 3
-
-#endif /* CONFIG_X86_LOCAL_APIC */
-
-#define clustered_apic_mode 0
-#define esr_disable 0
-
-#endif /* __ASM_APIC_H */
+++ /dev/null
-#ifndef __ASM_APICDEF_H
-#define __ASM_APICDEF_H
-
-/*
- * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
- *
- * Alan Cox <Alan.Cox@linux.org>, 1995.
- * Ingo Molnar <mingo@redhat.com>, 1999, 2000
- */
-
-#define APIC_DEFAULT_PHYS_BASE 0xfee00000
-
-#define APIC_ID 0x20
-#define APIC_ID_MASK (0x0F<<24)
-#define GET_APIC_ID(x) (((x)>>24)&0x0F)
-#define APIC_LVR 0x30
-#define APIC_LVR_MASK 0xFF00FF
-#define GET_APIC_VERSION(x) ((x)&0xFF)
-#define GET_APIC_MAXLVT(x) (((x)>>16)&0xFF)
-#define APIC_INTEGRATED(x) ((x)&0xF0)
-#define APIC_TASKPRI 0x80
-#define APIC_TPRI_MASK 0xFF
-#define APIC_ARBPRI 0x90
-#define APIC_ARBPRI_MASK 0xFF
-#define APIC_PROCPRI 0xA0
-#define APIC_EOI 0xB0
-#define APIC_EIO_ACK 0x0 /* Write this to the EOI register */
-#define APIC_RRR 0xC0
-#define APIC_LDR 0xD0
-#define APIC_LDR_MASK (0xFF<<24)
-#define GET_APIC_LOGICAL_ID(x) (((x)>>24)&0xFF)
-#define SET_APIC_LOGICAL_ID(x) (((x)<<24))
-#define APIC_ALL_CPUS 0xFF
-#define APIC_DFR 0xE0
-#define APIC_SPIV 0xF0
-#define APIC_SPIV_FOCUS_DISABLED (1<<9)
-#define APIC_SPIV_APIC_ENABLED (1<<8)
-#define APIC_ISR 0x100
-#define APIC_TMR 0x180
-#define APIC_IRR 0x200
-#define APIC_ESR 0x280
-#define APIC_ESR_SEND_CS 0x00001
-#define APIC_ESR_RECV_CS 0x00002
-#define APIC_ESR_SEND_ACC 0x00004
-#define APIC_ESR_RECV_ACC 0x00008
-#define APIC_ESR_SENDILL 0x00020
-#define APIC_ESR_RECVILL 0x00040
-#define APIC_ESR_ILLREGA 0x00080
-#define APIC_ICR 0x300
-#define APIC_DEST_SELF 0x40000
-#define APIC_DEST_ALLINC 0x80000
-#define APIC_DEST_ALLBUT 0xC0000
-#define APIC_ICR_RR_MASK 0x30000
-#define APIC_ICR_RR_INVALID 0x00000
-#define APIC_ICR_RR_INPROG 0x10000
-#define APIC_ICR_RR_VALID 0x20000
-#define APIC_INT_LEVELTRIG 0x08000
-#define APIC_INT_ASSERT 0x04000
-#define APIC_ICR_BUSY 0x01000
-#define APIC_DEST_LOGICAL 0x00800
-#define APIC_DM_FIXED 0x00000
-#define APIC_DM_LOWEST 0x00100
-#define APIC_DM_SMI 0x00200
-#define APIC_DM_REMRD 0x00300
-#define APIC_DM_NMI 0x00400
-#define APIC_DM_INIT 0x00500
-#define APIC_DM_STARTUP 0x00600
-#define APIC_DM_EXTINT 0x00700
-#define APIC_VECTOR_MASK 0x000FF
-#define APIC_ICR2 0x310
-#define GET_APIC_DEST_FIELD(x) (((x)>>24)&0xFF)
-#define SET_APIC_DEST_FIELD(x) ((x)<<24)
-#define APIC_LVTT 0x320
-#define APIC_LVTPC 0x340
-#define APIC_LVT0 0x350
-#define APIC_LVT_TIMER_BASE_MASK (0x3<<18)
-#define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3)
-#define SET_APIC_TIMER_BASE(x) (((x)<<18))
-#define APIC_TIMER_BASE_CLKIN 0x0
-#define APIC_TIMER_BASE_TMBASE 0x1
-#define APIC_TIMER_BASE_DIV 0x2
-#define APIC_LVT_TIMER_PERIODIC (1<<17)
-#define APIC_LVT_MASKED (1<<16)
-#define APIC_LVT_LEVEL_TRIGGER (1<<15)
-#define APIC_LVT_REMOTE_IRR (1<<14)
-#define APIC_INPUT_POLARITY (1<<13)
-#define APIC_SEND_PENDING (1<<12)
-#define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7)
-#define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8))
-#define APIC_MODE_FIXED 0x0
-#define APIC_MODE_NMI 0x4
-#define APIC_MODE_EXINT 0x7
-#define APIC_LVT1 0x360
-#define APIC_LVTERR 0x370
-#define APIC_TMICT 0x380
-#define APIC_TMCCT 0x390
-#define APIC_TDCR 0x3E0
-#define APIC_TDR_DIV_TMBASE (1<<2)
-#define APIC_TDR_DIV_1 0xB
-#define APIC_TDR_DIV_2 0x0
-#define APIC_TDR_DIV_4 0x1
-#define APIC_TDR_DIV_8 0x2
-#define APIC_TDR_DIV_16 0x3
-#define APIC_TDR_DIV_32 0x8
-#define APIC_TDR_DIV_64 0x9
-#define APIC_TDR_DIV_128 0xA
-
-#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
-
-#define MAX_IO_APICS 16
-
-/*
- * the local APIC register structure, memory mapped. Not terribly well
- * tested, but we might eventually use this one in the future - the
- * problem why we cannot use it right now is the P5 APIC, it has an
- * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
- */
-#define u32 unsigned int
-
-#define lapic ((volatile struct local_apic *)APIC_BASE)
-
-struct local_apic {
-
-/*000*/ struct { u32 __reserved[4]; } __reserved_01;
-
-/*010*/ struct { u32 __reserved[4]; } __reserved_02;
-
-/*020*/ struct { /* APIC ID Register */
- u32 __reserved_1 : 24,
- phys_apic_id : 4,
- __reserved_2 : 4;
- u32 __reserved[3];
- } id;
-
-/*030*/ const
- struct { /* APIC Version Register */
- u32 version : 8,
- __reserved_1 : 8,
- max_lvt : 8,
- __reserved_2 : 8;
- u32 __reserved[3];
- } version;
-
-/*040*/ struct { u32 __reserved[4]; } __reserved_03;
-
-/*050*/ struct { u32 __reserved[4]; } __reserved_04;
-
-/*060*/ struct { u32 __reserved[4]; } __reserved_05;
-
-/*070*/ struct { u32 __reserved[4]; } __reserved_06;
-
-/*080*/ struct { /* Task Priority Register */
- u32 priority : 8,
- __reserved_1 : 24;
- u32 __reserved_2[3];
- } tpr;
-
-/*090*/ const
- struct { /* Arbitration Priority Register */
- u32 priority : 8,
- __reserved_1 : 24;
- u32 __reserved_2[3];
- } apr;
-
-/*0A0*/ const
- struct { /* Processor Priority Register */
- u32 priority : 8,
- __reserved_1 : 24;
- u32 __reserved_2[3];
- } ppr;
-
-/*0B0*/ struct { /* End Of Interrupt Register */
- u32 eoi;
- u32 __reserved[3];
- } eoi;
-
-/*0C0*/ struct { u32 __reserved[4]; } __reserved_07;
-
-/*0D0*/ struct { /* Logical Destination Register */
- u32 __reserved_1 : 24,
- logical_dest : 8;
- u32 __reserved_2[3];
- } ldr;
-
-/*0E0*/ struct { /* Destination Format Register */
- u32 __reserved_1 : 28,
- model : 4;
- u32 __reserved_2[3];
- } dfr;
-
-/*0F0*/ struct { /* Spurious Interrupt Vector Register */
- u32 spurious_vector : 8,
- apic_enabled : 1,
- focus_cpu : 1,
- __reserved_2 : 22;
- u32 __reserved_3[3];
- } svr;
-
-/*100*/ struct { /* In Service Register */
-/*170*/ u32 bitfield;
- u32 __reserved[3];
- } isr [8];
-
-/*180*/ struct { /* Trigger Mode Register */
-/*1F0*/ u32 bitfield;
- u32 __reserved[3];
- } tmr [8];
-
-/*200*/ struct { /* Interrupt Request Register */
-/*270*/ u32 bitfield;
- u32 __reserved[3];
- } irr [8];
-
-/*280*/ union { /* Error Status Register */
- struct {
- u32 send_cs_error : 1,
- receive_cs_error : 1,
- send_accept_error : 1,
- receive_accept_error : 1,
- __reserved_1 : 1,
- send_illegal_vector : 1,
- receive_illegal_vector : 1,
- illegal_register_address : 1,
- __reserved_2 : 24;
- u32 __reserved_3[3];
- } error_bits;
- struct {
- u32 errors;
- u32 __reserved_3[3];
- } all_errors;
- } esr;
-
-/*290*/ struct { u32 __reserved[4]; } __reserved_08;
-
-/*2A0*/ struct { u32 __reserved[4]; } __reserved_09;
-
-/*2B0*/ struct { u32 __reserved[4]; } __reserved_10;
-
-/*2C0*/ struct { u32 __reserved[4]; } __reserved_11;
-
-/*2D0*/ struct { u32 __reserved[4]; } __reserved_12;
-
-/*2E0*/ struct { u32 __reserved[4]; } __reserved_13;
-
-/*2F0*/ struct { u32 __reserved[4]; } __reserved_14;
-
-/*300*/ struct { /* Interrupt Command Register 1 */
- u32 vector : 8,
- delivery_mode : 3,
- destination_mode : 1,
- delivery_status : 1,
- __reserved_1 : 1,
- level : 1,
- trigger : 1,
- __reserved_2 : 2,
- shorthand : 2,
- __reserved_3 : 12;
- u32 __reserved_4[3];
- } icr1;
-
-/*310*/ struct { /* Interrupt Command Register 2 */
- union {
- u32 __reserved_1 : 24,
- phys_dest : 4,
- __reserved_2 : 4;
- u32 __reserved_3 : 24,
- logical_dest : 8;
- } dest;
- u32 __reserved_4[3];
- } icr2;
-
-/*320*/ struct { /* LVT - Timer */
- u32 vector : 8,
- __reserved_1 : 4,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- timer_mode : 1,
- __reserved_3 : 14;
- u32 __reserved_4[3];
- } lvt_timer;
-
-/*330*/ struct { u32 __reserved[4]; } __reserved_15;
-
-/*340*/ struct { /* LVT - Performance Counter */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- __reserved_3 : 15;
- u32 __reserved_4[3];
- } lvt_pc;
-
-/*350*/ struct { /* LVT - LINT0 */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- polarity : 1,
- remote_irr : 1,
- trigger : 1,
- mask : 1,
- __reserved_2 : 15;
- u32 __reserved_3[3];
- } lvt_lint0;
-
-/*360*/ struct { /* LVT - LINT1 */
- u32 vector : 8,
- delivery_mode : 3,
- __reserved_1 : 1,
- delivery_status : 1,
- polarity : 1,
- remote_irr : 1,
- trigger : 1,
- mask : 1,
- __reserved_2 : 15;
- u32 __reserved_3[3];
- } lvt_lint1;
-
-/*370*/ struct { /* LVT - Error */
- u32 vector : 8,
- __reserved_1 : 4,
- delivery_status : 1,
- __reserved_2 : 3,
- mask : 1,
- __reserved_3 : 15;
- u32 __reserved_4[3];
- } lvt_error;
-
-/*380*/ struct { /* Timer Initial Count Register */
- u32 initial_count;
- u32 __reserved_2[3];
- } timer_icr;
-
-/*390*/ const
- struct { /* Timer Current Count Register */
- u32 curr_count;
- u32 __reserved_2[3];
- } timer_ccr;
-
-/*3A0*/ struct { u32 __reserved[4]; } __reserved_16;
-
-/*3B0*/ struct { u32 __reserved[4]; } __reserved_17;
-
-/*3C0*/ struct { u32 __reserved[4]; } __reserved_18;
-
-/*3D0*/ struct { u32 __reserved[4]; } __reserved_19;
-
-/*3E0*/ struct { /* Timer Divide Configuration Register */
- u32 divisor : 4,
- __reserved_1 : 28;
- u32 __reserved_2[3];
- } timer_dcr;
-
-/*3F0*/ struct { u32 __reserved[4]; } __reserved_20;
-
-} __attribute__ ((packed));
-
-#undef u32
-
-#endif
+++ /dev/null
-#ifndef __ARCH_X86_64_ATOMIC__
-#define __ARCH_X86_64_ATOMIC__
-
-#include <xen/config.h>
-
-/*
- * Atomic operations that C can't guarantee us. Useful for
- * resource counting etc..
- */
-
-#ifdef CONFIG_SMP
-#define LOCK "lock ; "
-#else
-#define LOCK ""
-#endif
-
-/*
- * Make sure gcc doesn't try to be clever and move things around
- * on us. We need to use _exactly_ the address the user gave us,
- * not some alias that contains the same information.
- */
-typedef struct { volatile int counter; } atomic_t;
-
-#define ATOMIC_INIT(i) { (i) }
-
-/**
- * atomic_read - read atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically reads the value of @v. Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */
-#define atomic_read(v) ((v)->counter)
-
-/**
- * atomic_set - set atomic variable
- * @v: pointer of type atomic_t
- * @i: required value
- *
- * Atomically sets the value of @v to @i. Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */
-#define atomic_set(v,i) (((v)->counter) = (i))
-
-/**
- * atomic_add - add integer to atomic variable
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v. Note that the guaranteed useful range
- * of an atomic_t is only 24 bits.
- */
-static __inline__ void atomic_add(int i, atomic_t *v)
-{
- __asm__ __volatile__(
- LOCK "addl %1,%0"
- :"=m" (v->counter)
- :"ir" (i), "m" (v->counter));
-}
-
-/**
- * atomic_sub - subtract the atomic variable
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v. Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */
-static __inline__ void atomic_sub(int i, atomic_t *v)
-{
- __asm__ __volatile__(
- LOCK "subl %1,%0"
- :"=m" (v->counter)
- :"ir" (i), "m" (v->counter));
-}
-
-/**
- * atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases. Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */
-static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- LOCK "subl %2,%0; sete %1"
- :"=m" (v->counter), "=qm" (c)
- :"ir" (i), "m" (v->counter) : "memory");
- return c;
-}
-
-/**
- * atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1. Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */
-static __inline__ void atomic_inc(atomic_t *v)
-{
- __asm__ __volatile__(
- LOCK "incl %0"
- :"=m" (v->counter)
- :"m" (v->counter));
-}
-
-/**
- * atomic_dec - decrement atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1. Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */
-static __inline__ void atomic_dec(atomic_t *v)
-{
- __asm__ __volatile__(
- LOCK "decl %0"
- :"=m" (v->counter)
- :"m" (v->counter));
-}
-
-/**
- * atomic_dec_and_test - decrement and test
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases. Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */
-static __inline__ int atomic_dec_and_test(atomic_t *v)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- LOCK "decl %0; sete %1"
- :"=m" (v->counter), "=qm" (c)
- :"m" (v->counter) : "memory");
- return c != 0;
-}
-
-/**
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases. Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */
-static __inline__ int atomic_inc_and_test(atomic_t *v)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- LOCK "incl %0; sete %1"
- :"=m" (v->counter), "=qm" (c)
- :"m" (v->counter) : "memory");
- return c != 0;
-}
-
-/**
- * atomic_add_negative - add and test if negative
- * @v: pointer of type atomic_t
- * @i: integer value to add
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero. Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */
-static __inline__ int atomic_add_negative(int i, atomic_t *v)
-{
- unsigned char c;
-
- __asm__ __volatile__(
- LOCK "addl %2,%0; sets %1"
- :"=m" (v->counter), "=qm" (c)
- :"ir" (i), "m" (v->counter) : "memory");
- return c;
-}
-
-
-/* These are x86-specific, used by some header files */
-#define atomic_clear_mask(mask, addr) \
-__asm__ __volatile__(LOCK "andl %0,%1" \
-: : "r" (~(mask)),"m" (*addr) : "memory")
-
-#define atomic_set_mask(mask, addr) \
-__asm__ __volatile__(LOCK "orl %0,%1" \
-: : "r" ((unsigned)mask),"m" (*addr) : "memory")
-
-/* Atomic operations are already serializing on x86 */
-#define smp_mb__before_atomic_dec() barrier()
-#define smp_mb__after_atomic_dec() barrier()
-#define smp_mb__before_atomic_inc() barrier()
-#define smp_mb__after_atomic_inc() barrier()
-
-#endif /* __ARCH_X86_64_ATOMIC__ */
+++ /dev/null
-#ifndef _X86_64_BITOPS_H
-#define _X86_64_BITOPS_H
-
-/*
- * Copyright 1992, Linus Torvalds.
- */
-
-#include <xen/config.h>
-
-/*
- * These have to be done with inline assembly: that way the bit-setting
- * is guaranteed to be atomic. All bit operations return 0 if the bit
- * was cleared before the operation and != 0 if it was not.
- *
- * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
- */
-
-#ifdef CONFIG_SMP
-#define LOCK_PREFIX "lock ; "
-#else
-#define LOCK_PREFIX ""
-#endif
-
-#define ADDR (*(volatile long *) addr)
-
-/**
- * set_bit - Atomically set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * This function is atomic and may not be reordered. See __set_bit()
- * if you do not require the atomic guarantees.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static __inline__ void set_bit(long nr, volatile void * addr)
-{
- __asm__ __volatile__( LOCK_PREFIX
- "btsq %1,%0"
- :"=m" (ADDR)
- :"dIr" (nr));
-}
-
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static __inline__ void __set_bit(long nr, volatile void * addr)
-{
- __asm__(
- "btsq %1,%0"
- :"=m" (ADDR)
- :"dIr" (nr));
-}
-
-/**
- * clear_bit - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and may not be reordered. However, it does
- * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
- * in order to ensure changes are visible on other processors.
- */
-static __inline__ void clear_bit(long nr, volatile void * addr)
-{
- __asm__ __volatile__( LOCK_PREFIX
- "btrq %1,%0"
- :"=m" (ADDR)
- :"dIr" (nr));
-}
-#define smp_mb__before_clear_bit() barrier()
-#define smp_mb__after_clear_bit() barrier()
-
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static __inline__ void __change_bit(long nr, volatile void * addr)
-{
- __asm__ __volatile__(
- "btcq %1,%0"
- :"=m" (ADDR)
- :"dIr" (nr));
-}
-
-/**
- * change_bit - Toggle a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * change_bit() is atomic and may not be reordered.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static __inline__ void change_bit(long nr, volatile void * addr)
-{
- __asm__ __volatile__( LOCK_PREFIX
- "btcq %1,%0"
- :"=m" (ADDR)
- :"dIr" (nr));
-}
-
-/**
- * test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static __inline__ int test_and_set_bit(long nr, volatile void * addr)
-{
- long oldbit;
-
- __asm__ __volatile__( LOCK_PREFIX
- "btsq %2,%1\n\tsbbq %0,%0"
- :"=r" (oldbit),"=m" (ADDR)
- :"dIr" (nr) : "memory");
- return oldbit;
-}
-
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail. You must protect multiple accesses with a lock.
- */
-static __inline__ int __test_and_set_bit(long nr, volatile void * addr)
-{
- long oldbit;
-
- __asm__(
- "btsq %2,%1\n\tsbbq %0,%0"
- :"=r" (oldbit),"=m" (ADDR)
- :"dIr" (nr));
- return oldbit;
-}
-
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static __inline__ int test_and_clear_bit(long nr, volatile void * addr)
-{
- long oldbit;
-
- __asm__ __volatile__( LOCK_PREFIX
- "btrq %2,%1\n\tsbbq %0,%0"
- :"=r" (oldbit),"=m" (ADDR)
- :"dIr" (nr) : "memory");
- return oldbit;
-}
-
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail. You must protect multiple accesses with a lock.
- */
-static __inline__ int __test_and_clear_bit(long nr, volatile void * addr)
-{
- long oldbit;
-
- __asm__(
- "btrq %2,%1\n\tsbbq %0,%0"
- :"=r" (oldbit),"=m" (ADDR)
- :"dIr" (nr));
- return oldbit;
-}
-
-/* WARNING: non atomic and it can be reordered! */
-static __inline__ int __test_and_change_bit(long nr, volatile void * addr)
-{
- long oldbit;
-
- __asm__ __volatile__(
- "btcq %2,%1\n\tsbbq %0,%0"
- :"=r" (oldbit),"=m" (ADDR)
- :"dIr" (nr) : "memory");
- return oldbit;
-}
-
-/**
- * test_and_change_bit - Change a bit and return its new value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static __inline__ int test_and_change_bit(long nr, volatile void * addr)
-{
- long oldbit;
-
- __asm__ __volatile__( LOCK_PREFIX
- "btcq %2,%1\n\tsbbq %0,%0"
- :"=r" (oldbit),"=m" (ADDR)
- :"dIr" (nr) : "memory");
- return oldbit;
-}
-
-#if 0 /* Fool kernel-doc since it doesn't do macros yet */
-/**
- * test_bit - Determine whether a bit is set
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static int test_bit(int nr, const volatile void * addr);
-#endif
-
-static __inline__ int constant_test_bit(long nr, const volatile void * addr)
-{
- return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
-}
-
-static __inline__ int variable_test_bit(long nr, volatile void * addr)
-{
- long oldbit;
-
- __asm__ __volatile__(
- "btq %2,%1\n\tsbbq %0,%0"
- :"=r" (oldbit)
- :"m" (ADDR),"dIr" (nr));
- return oldbit;
-}
-
-#define test_bit(nr,addr) \
-(__builtin_constant_p(nr) ? \
- constant_test_bit((nr),(addr)) : \
- variable_test_bit((nr),(addr)))
-
-/**
- * find_first_zero_bit - find the first zero bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum bitnumber to search
- *
- * Returns the bit-number of the first zero bit, not the number of the byte
- * containing a bit. -1 when none found.
- */
-static __inline__ int find_first_zero_bit(void * addr, unsigned size)
-{
- int d0, d1, d2;
- int res;
-
- if (!size)
- return 0;
- __asm__ __volatile__(
- "movl $-1,%%eax\n\t"
- "xorl %%edx,%%edx\n\t"
- "repe; scasl\n\t"
- "je 1f\n\t"
- "xorl -4(%%rdi),%%eax\n\t"
- "subq $4,%%rdi\n\t"
- "bsfl %%eax,%%edx\n"
- "1:\tsubq %%rbx,%%rdi\n\t"
- "shlq $3,%%rdi\n\t"
- "addq %%rdi,%%rdx"
- :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
- :"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory");
- return res;
-}
-
-/**
- * find_next_zero_bit - find the first zero bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
-{
- unsigned int * p = ((unsigned int *) addr) + (offset >> 5);
- int set = 0, bit = offset & 31, res;
-
- if (bit) {
- /*
- * Look for zero in first byte
- */
- __asm__("bsfl %1,%0\n\t"
- "jne 1f\n\t"
- "movl $32, %0\n"
- "1:"
- : "=r" (set)
- : "r" (~(*p >> bit)));
- if (set < (32 - bit))
- return set + offset;
- set = 32 - bit;
- p++;
- }
- /*
- * No zero yet, search remaining full bytes for a zero
- */
- res = find_first_zero_bit (p, size - 32 * (p - (unsigned int *) addr));
- return (offset + set + res);
-}
-
-/*
- * Find string of zero bits in a bitmap. -1 when not found.
- */
-extern unsigned long
-find_next_zero_string(unsigned long *bitmap, long start, long nbits, int len);
-
-static inline void set_bit_string(unsigned long *bitmap, unsigned long i,
- int len)
-{
- unsigned long end = i + len;
- while (i < end) {
- __set_bit(i, bitmap);
- i++;
- }
-}
-
-static inline void clear_bit_string(unsigned long *bitmap, unsigned long i,
- int len)
-{
- unsigned long end = i + len;
- while (i < end) {
- clear_bit(i, bitmap);
- i++;
- }
-}
-
-/**
- * ffz - find first zero in word.
- * @word: The word to search
- *
- * Undefined if no zero exists, so code should check against ~0UL first.
- */
-static __inline__ unsigned long ffz(unsigned long word)
-{
- __asm__("bsfq %1,%0"
- :"=r" (word)
- :"r" (~word));
- return word;
-}
-
-
-/**
- * ffs - find first bit set
- * @x: the word to search
- *
- * This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
- */
-static __inline__ int ffs(int x)
-{
- int r;
-
- __asm__("bsfl %1,%0\n\t"
- "jnz 1f\n\t"
- "movl $-1,%0\n"
- "1:" : "=r" (r) : "g" (x));
- return r+1;
-}
-
-/**
- * hweightN - returns the hamming weight of a N-bit word
- * @x: the word to weigh
- *
- * The Hamming Weight of a number is the total number of bits set in it.
- */
-
-#define hweight32(x) generic_hweight32(x)
-#define hweight16(x) generic_hweight16(x)
-#define hweight8(x) generic_hweight8(x)
-
-
-
-#define ext2_set_bit __test_and_set_bit
-#define ext2_clear_bit __test_and_clear_bit
-#define ext2_test_bit test_bit
-#define ext2_find_first_zero_bit find_first_zero_bit
-#define ext2_find_next_zero_bit find_next_zero_bit
-
-/* Bitmap functions for the minix filesystem. */
-#define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,addr)
-#define minix_set_bit(nr,addr) __set_bit(nr,addr)
-#define minix_test_and_clear_bit(nr,addr) __test_and_clear_bit(nr,addr)
-#define minix_test_bit(nr,addr) test_bit(nr,addr)
-#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
-
-
-#endif /* _X86_64_BITOPS_H */
+++ /dev/null
-/*
- * include/asm-x8664/cache.h
- */
-#ifndef __ARCH_X8664_CACHE_H
-#define __ARCH_X8664_CACHE_H
-
-#include <xen/config.h>
-
-/* L1 cache line size */
-#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
-#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
-
-#endif
+++ /dev/null
-/******************************************************************************
- * config.h
- *
- * A Linux-style configuration list.
- *
- */
-
-#ifndef __XEN_X86_64_CONFIG_H__
-#define __XEN_X86_64_CONFIG_H__
-
-#define CONFIG_X86 1
-#define CONFIG_X86_64BITMODE 1
-
-#define CONFIG_SMP 1
-#define CONFIG_X86_LOCAL_APIC 1
-#define CONFIG_X86_IO_APIC 1
-#define CONFIG_X86_L1_CACHE_SHIFT 5
-
-#define CONFIG_PCI 1
-#define CONFIG_PCI_BIOS 1
-#define CONFIG_PCI_DIRECT 1
-
-#define CONFIG_IDE 1
-#define CONFIG_BLK_DEV_IDE 1
-#define CONFIG_BLK_DEV_IDEDMA 1
-#define CONFIG_BLK_DEV_IDEPCI 1
-#define CONFIG_IDEDISK_MULTI_MODE 1
-#define CONFIG_IDEDISK_STROKE 1
-#define CONFIG_IDEPCI_SHARE_IRQ 1
-#define CONFIG_BLK_DEV_IDEDMA_PCI 1
-#define CONFIG_IDEDMA_PCI_AUTO 1
-#define CONFIG_IDEDMA_AUTO 1
-#define CONFIG_IDEDMA_ONLYDISK 1
-#define CONFIG_BLK_DEV_IDE_MODES 1
-#define CONFIG_BLK_DEV_PIIX 1
-
-#define CONFIG_SCSI 1
-#define CONFIG_SCSI_LOGGING 1
-#define CONFIG_BLK_DEV_SD 1
-#define CONFIG_SD_EXTRA_DEVS 40
-#define CONFIG_SCSI_MULTI_LUN 1
-
-#define CONFIG_XEN_ATTENTION_KEY 1
-
-#define HZ 100
-
-/*
- * Just to keep compiler happy.
- * NB. DO NOT CHANGE SMP_CACHE_BYTES WITHOUT FIXING arch/i386/entry.S!!!
- * It depends on size of irq_cpustat_t, for example, being 64 bytes. :-)
- * Mmmm... so niiiiiice....
- */
-#define SMP_CACHE_BYTES 64
-#define NR_CPUS 16
-#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
-#define ____cacheline_aligned __cacheline_aligned
-
-#define PHYSICAL_ADDRESS_BITS 52
-#define MAX_PHYSICAL_ADDRESS (1 << PHYSICAL_ADDRESS_BITS)
-#define VIRTUAL_ADDRESS_BITS 48
-#define XEN_PAGE_SIZE 4096
-
-#define PTE_SIZE 8
-#define TOTAL_PTES (512ULL * 512 * 512 * 512)
-
-/* next PML4 from an _END address */
-#define PML4_BITS 39
-#define PML4_SPACE (1ULL << PML4_BITS)
-
-/*
- * Memory layout
- *
- * 0x0000000000000000 - 0x00007fffffffffff Guest & user apps (128TB)
- * (Only for 32-bit guests)
- * 0x00000000fc000000 - 0x00000000fc3fffff Machine/Physical 32-bit shadow (4MB)
- * 0x00000000fc400000 - 0x00000000feffffff IO remap for 32-bit guests (44MB)
- * 0x00000000ff000000 - 0x00000000ff3fffff 32-bit PTE shadow (4MB)
- *
- * 0xffff800000000000 - 0xffff807fffffffff Linear page table (512GB)
- * 0xffff808000000000 - 0xffff80ffffffffff Reserved for shadow page table (512GB)
- *
- * 0xffff810000000000 - 0xffff82ffffffffff Xen PML4 slots
- * 0xffff810000000000 - 0xffff81003fffffff Xen hypervisor virtual space (1GB)
- * 0xffff810040000000 - 0xffff81807fffffff Per-domain mappings (1GB)
- * 0xffff810080000000 - 0xffff81387fffffff R/O physical map (224GB)
- * 0xffff813880000000 - 0xffff81707fffffff R/W physical map (224GB)
- * 0xffff817080000000 - 0xffff82c07fffffff Frame table (1344GB)
- * 0xffff82c080000000 - 0xffff82c0bfffffff I/O remap space (1GB)
- * 0xffff82c0c0000000 - 0xffff82ffffffffff (253GB)
- *
- * 0xffff830000000000 - 0xffff87ffffffffff RESERVED (5TB)
- *
- * 0xffff880000000000 - ... Physical 1:1 direct mapping (112TB max)
- * 0xffff880000000000 - 0xffff880001000000 Low memory DMA region (16M)
- *
- * 0xfffff80000000000 - 0xffffffffffffffff Reserved for guest (8TB)
- *
- * The requirement that we have a 1:1 map of physical memory limits
- * the maximum memory size we can support. With only 48 virtual address
- * bits, and the assumption that guests will run users in positive address
- * space, a contiguous 1:1 map can only live in the negative address space.
- * Since we don't want to bump guests out of the very top of memory and
- * force relocation, we can't use this entire space, and Xen has several
- * heavy mapping that require PML4 slices. Just to be safe, we reserve
- * 16 PML4s each for Xen and the guest. 224 PML4s give us 112 terabytes
- * of addressable memory. Any high device physical addresses beyond this
- * region can be mapped into the IO remap space or some of the reserved
- * 6TB region.
- *
- * 112 TB is just 16 TB shy of the maximum physical memory supported
- * on Linux 2.6.0, and should be enough for anybody.
- *
- * There are some additional constraints in the memory layout that require
- * several changes from the i386 architecture.
- *
- * ACPI data and ACPI non-volatile storage must be placed in some region
- * of memory below the 4GB mark. Depending on the BIOS and system, we
- * may have this located as low as 1GB. This means allocating large
- * chunks of physically contiguous memory from the direct mapping may not
- * be possible.
- *
- * The full frame table for 112TB of physical memory currently occupies
- * 1344GB space. This clearly can not be allocated in physically contiguous
- * space, so it must be moved to a virtual address.
- *
- * Both copies of the machine->physical table must also be relocated.
- * (112 TB / 4k) * 8 bytes means that each copy of the physical map requires
- * 224GB of space, thus it also must move to VM space.
- *
- * The physical pages used to allocate the page tables for the direct 1:1
- * map may occupy (112TB / 2M) * 8 bytes = 448MB. This is almost guaranteed
- * to fit in contiguous physical memory, but these pages used to be allocated
- * in the Xen monitor address space. This means the Xen address space must
- * accomodate up to ~500 MB, which means it also must move out of the
- * direct mapped region.
- *
- * Since both copies of the MPT, the frame table, and Xen now exist in
- * purely virtual space, we have the added advantage of being able to
- * map them to local pages on NUMA machines, or use NUMA aware memory
- * allocation within Xen itself.
- *
- * Additionally, the 1:1 page table now exists contiguously in virtual
- * space, but may be mapped to physically separated pages, allowing
- * each node to contain the page tables for its own local memory. Setting
- * up this mapping presents a bit of a chicken-egg problem, but is possible
- * as a future enhancement.
- *
- * Zachary Amsden (zamsden@cisco.com)
- *
- */
-
-/* Guest and user space */
-#define NSPACE_VIRT_START 0
-#define NSPACE_VIRT_END (1ULL << (VIRTUAL_ADDRESS_BITS - 1))
-
-/* Priviledged space */
-#define ESPACE_VIRT_END 0
-#define ESPACE_VIRT_START (ESPACE_VIRT_END-(1ULL << (VIRTUAL_ADDRESS_BITS-1)))
-
-/* reservations in e-space */
-#define GUEST_RESERVED_PML4S 16
-#define XEN_RESERVED_PML4S 16
-
-#define MAX_MEMORY_SIZE ((1ULL << (VIRTUAL_ADDRESS_BITS-1)) \
- -((GUEST_RESERVED_PML4S + XEN_RESERVED_PML4S) * PML4_SPACE))
-#define MAX_MEMORY_FRAMES (MAX_MEMORY_SIZE / XEN_PAGE_SIZE)
-
-/*
- * Virtual addresses beyond this are not modifiable by guest OSes.
- */
-#define HYPERVISOR_VIRT_START ESPACE_VIRT_START
-#define HYPERVISOR_VIRT_END (ESPACE_VIRT_END-(GUEST_RESERVED_PML4S * PML4_SPACE))
-
-/* First 512GB of virtual address space is used as a linear p.t. mapping. */
-#define LINEAR_PT_VIRT_START (HYPERVISOR_VIRT_START)
-#define LINEAR_PT_VIRT_END (LINEAR_PT_VIRT_START + (PTE_SIZE * TOTAL_PTES))
-
-/* Reserve some space for a shadow PT mapping */
-#define SHADOW_PT_VIRT_START (LINEAR_PT_VIRT_END)
-#define SHADOW_PT_VIRT_END (SHADOW_PT_VIRT_START + (PTE_SIZE * TOTAL_PTES))
-
-/* Xen exists in the first 1GB of the next PML4 space */
-#define MAX_MONITOR_ADDRESS (1 * 1024 * 1024 * 1024)
-#define MONITOR_VIRT_START (SHADOW_PT_VIRT_END)
-#define MONITOR_VIRT_END (MONITOR_VIRT_START + MAX_MONITOR_ADDRESS)
-
-/* Next 1GB of virtual address space used for per-domain mappings (eg. GDT). */
-#define PERDOMAIN_VIRT_START (MONITOR_VIRT_END)
-#define PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + (512 * 512 * 4096))
-#define GDT_VIRT_START (PERDOMAIN_VIRT_START)
-#define GDT_VIRT_END (GDT_VIRT_START + (128*1024))
-#define LDT_VIRT_START (GDT_VIRT_END)
-#define LDT_VIRT_END (LDT_VIRT_START + (128*1024))
-
-/*
- * First set of MPTs are mapped read-only for all. It's for the machine->physical
- * mapping table (MPT table). The following are virtual addresses.
- */
-#define READONLY_MPT_VIRT_START (PERDOMAIN_VIRT_END)
-#define READONLY_MPT_VIRT_END (READONLY_MPT_VIRT_START + (PTE_SIZE * MAX_MEMORY_FRAMES))
-
-/* R/W machine->physical table */
-#define RDWR_MPT_VIRT_START (READONLY_MPT_VIRT_END)
-#define RDWR_MPT_VIRT_END (RDWR_MPT_VIRT_START + (PTE_SIZE * MAX_MEMORY_FRAMES))
-
-/* Frame table */
-#define FRAMETABLE_ENTRY_SIZE (48)
-#define FRAMETABLE_VIRT_START (RDWR_MPT_VIRT_END)
-#define FRAMETABLE_VIRT_END (FRAMETABLE_VIRT_START + (FRAMETABLE_ENTRY_SIZE * MAX_MEMORY_FRAMES))
-
-/* Next 1GB of virtual address space used for ioremap(). */
-#define IOREMAP_VIRT_START (FRAMETABLE_VIRT_END)
-#define IOREMAP_VIRT_END (IOREMAP_VIRT_START + (512 * 512 * 4096))
-
-/* And the virtual addresses for the direct-map region... */
-#define DIRECTMAP_VIRT_START (ESPACE_VIRT_START + (XEN_RESERVED_PML4S * PML4_SPACE))
-#define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + MAX_DIRECTMAP_ADDRESS)
-
-/*
- * Next is the direct-mapped memory region. The following are machine addresses.
- */
-#define MAX_DMA_ADDRESS (16*1024*1024)
-#define MAX_DIRECTMAP_ADDRESS MAX_MEMORY_SIZE
-
-
-
-/*
- * Amount of slack domain memory to leave in system, in kilobytes.
- * Prevents a hard out-of-memory crunch for thinsg like network receive.
- */
-#define SLACK_DOMAIN_MEM_KILOBYTES 2048
-
-
-/*
- * These will probably change in the future..
- * locations for 32-bit guest compatibility mappings
- */
-
-/* 4M of 32-bit machine-physical shadow in low 4G of VM space */
-#define SHADOW_MPT32_VIRT_START (0xfc000000)
-#define SHADOW_MPT32_VIRT_END (SHADOW_MPT32_VIRT_START + (4 * 1024 * 1024))
-
-/* 44M of I/O remap for 32-bit drivers */
-#define IOREMAP_LOW_VIRT_START (SHADOW_MPT32_VIRT_END)
-#define IOREMAP_LOW_VIRT_END (IOREMAP_LOW_VIRT_START + (44 * 1024 * 1024))
-
-/* 4M of 32-bit page table */
-#define SHADOW_PT32_VIRT_START (IOREMAP_LOW_VIRT_END)
-#define SHADOW_PT32_VIRT_END (SHADOW_PT32_VIRT_START + (4 * 1024 * 1024))
-
-
-/* Linkage for x86 */
-#define FASTCALL(x) x __attribute__((regparm(3)))
-#define asmlinkage __attribute__((regparm(0)))
-#define __ALIGN .align 16,0x90
-#define __ALIGN_STR ".align 16,0x90"
-#define SYMBOL_NAME_STR(X) #X
-#define SYMBOL_NAME(X) X
-#define SYMBOL_NAME_LABEL(X) X##:
-#ifdef __ASSEMBLY__
-#define ALIGN __ALIGN
-#define ALIGN_STR __ALIGN_STR
-#define ENTRY(name) \
- .globl SYMBOL_NAME(name); \
- ALIGN; \
- SYMBOL_NAME_LABEL(name)
-#endif
-
-#define PGT_base_page_table PGT_l4_page_table
-
-#define barrier() __asm__ __volatile__("": : :"memory")
-
-/*
- * Hypervisor segment selectors
- */
-#define __HYPERVISOR_CS64 0x0810
-#define __HYPERVISOR_CS32 0x0808
-#define __HYPERVISOR_DS 0x0818
-
-#define NR_syscalls 256
-
-#ifndef NDEBUG
-#define MEMORY_GUARD
-#endif
-
-#ifndef __ASSEMBLY__
-extern unsigned long _end; /* standard ELF symbol */
-extern void __out_of_line_bug(int line) __attribute__((noreturn));
-#define out_of_line_bug() __out_of_line_bug(__LINE__)
-#endif /* __ASSEMBLY__ */
-
-#endif /* __XEN_X86_64_CONFIG_H__ */
+++ /dev/null
-/*
- * cpufeature.h
- *
- * Defines x86 CPU feature bits
- */
-
-#ifndef __ASM_X8664_CPUFEATURE_H
-#define __ASM_X8664_CPUFEATURE_H
-
-/* Sample usage: CPU_FEATURE_P(cpu.x86_capability, FPU) */
-#define CPU_FEATURE_P(CAP, FEATURE) test_bit(CAP, X86_FEATURE_##FEATURE ##_BIT)
-
-#define NCAPINTS 4 /* Currently we have 4 32-bit words worth of info */
-
-/* Intel-defined CPU features, CPUID level 0x00000001, word 0 */
-#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */
-#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */
-#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */
-#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN (0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */
-#define X86_FEATURE_DTES (0*32+21) /* Debug Trace Store */
-#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
- /* of FPU context), and CR4.OSFXSR available */
-#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */
-#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */
-#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */
-#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */
-#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */
-
-/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
-/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */
-
-/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */
-
-/* Other features, Linux-defined mapping, word 3 */
-/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */
-
-#endif /* __ASM_X8664_CPUFEATURE_H */
-
-/*
- * Local Variables:
- * mode:c
- * comment-column:42
- * End:
- */
+++ /dev/null
-#ifndef _X86_64_CURRENT_H
-#define _X86_64_CURRENT_H
-
-#if !defined(__ASSEMBLY__)
-struct task_struct;
-
-#include <asm/pda.h>
-
-#define STACK_RESERVED \
- (sizeof(execution_context_t))
-
-static inline struct task_struct * get_current(void)
-{
- struct task_struct *current;
- current = read_pda(pcurrent);
- return current;
-}
-
-#define current get_current()
-
-static inline void set_current(struct task_struct *p)
-{
- write_pda(pcurrent,p);
-}
-
-static inline execution_context_t *get_execution_context(void)
-{
- execution_context_t *execution_context;
- __asm__( "andq %%rsp,%0; addq %2,%0"
- : "=r" (execution_context)
- : "0" (~(STACK_SIZE-1)), "i" (STACK_SIZE-STACK_RESERVED) );
- return execution_context;
-}
-
-static inline unsigned long get_stack_top(void)
-{
- unsigned long p;
- __asm__ ( "orq %%rsp,%0; andq $~7,%0"
- : "=r" (p) : "0" (STACK_SIZE-8) );
- return p;
-}
-
-#define schedule_tail(_p) \
- __asm__ __volatile__ ( \
- "andq %%rsp,%0; addq %2,%0; movq %0,%%rsp; jmp *%1" \
- : : "r" (~(STACK_SIZE-1)), \
- "r" (unlikely(is_idle_task((_p))) ? \
- continue_cpu_idle_loop : \
- continue_nonidle_task), \
- "i" (STACK_SIZE-STACK_RESERVED) )
-
-
-#else
-
-#ifndef ASM_OFFSET_H
-#include <asm/offset.h>
-#endif
-
-#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg
-
-#endif
-
-#endif /* !(_X86_64_CURRENT_H) */
+++ /dev/null
-#ifndef _X86_64_DEBUGREG_H
-#define _X86_64_DEBUGREG_H
-
-
-/* Indicate the register numbers for a number of the specific
- debug registers. Registers 0-3 contain the addresses we wish to trap on */
-#define DR_FIRSTADDR 0 /* u_debugreg[DR_FIRSTADDR] */
-#define DR_LASTADDR 3 /* u_debugreg[DR_LASTADDR] */
-
-#define DR_STATUS 6 /* u_debugreg[DR_STATUS] */
-#define DR_CONTROL 7 /* u_debugreg[DR_CONTROL] */
-
-/* Define a few things for the status register. We can use this to determine
- which debugging register was responsible for the trap. The other bits
- are either reserved or not of interest to us. */
-
-#define DR_TRAP0 (0x1) /* db0 */
-#define DR_TRAP1 (0x2) /* db1 */
-#define DR_TRAP2 (0x4) /* db2 */
-#define DR_TRAP3 (0x8) /* db3 */
-
-#define DR_STEP (0x4000) /* single-step */
-#define DR_SWITCH (0x8000) /* task switch */
-
-/* Now define a bunch of things for manipulating the control register.
- The top two bytes of the control register consist of 4 fields of 4
- bits - each field corresponds to one of the four debug registers,
- and indicates what types of access we trap on, and how large the data
- field is that we are looking at */
-
-#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */
-#define DR_CONTROL_SIZE 4 /* 4 control bits per register */
-
-#define DR_RW_EXECUTE (0x0) /* Settings for the access types to trap on */
-#define DR_RW_WRITE (0x1)
-#define DR_RW_READ (0x3)
-
-#define DR_LEN_1 (0x0) /* Settings for data length to trap on */
-#define DR_LEN_2 (0x4)
-#define DR_LEN_4 (0xC)
-#define DR_LEN_8 (0x8)
-
-/* The low byte to the control register determine which registers are
- enabled. There are 4 fields of two bits. One bit is "local", meaning
- that the processor will reset the bit after a task switch and the other
- is global meaning that we have to explicitly reset the bit. With linux,
- you can use either one, since we explicitly zero the register when we enter
- kernel mode. */
-
-#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
-#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
-#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
-
-#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
-#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */
-
-/* The second byte to the control register has a few special things.
- We can slow the instruction pipeline for instructions coming via the
- gdt or the ldt if we want to. I am not sure why this is an advantage */
-
-#define DR_CONTROL_RESERVED (0xFFFFFFFF0000FC00UL) /* Reserved */
-#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
-#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
-
-#endif
+++ /dev/null
-#ifndef _X86_64_DELAY_H
-#define _X86_64_DELAY_H
-
-/*
- * Copyright (C) 1993 Linus Torvalds
- *
- * Delay routines calling functions in arch/i386/lib/delay.c
- */
-
-extern unsigned long ticks_per_usec;
-extern void __udelay(unsigned long usecs);
-#define udelay(n) __udelay(n)
-
-#endif /* defined(_X86_64_DELAY_H) */
+++ /dev/null
-#ifndef __ARCH_DESC_H
-#define __ARCH_DESC_H
-
-#define LDT_ENTRY_SIZE 16
-
-#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
-
-#define __FIRST_PER_CPU_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
-
-#define __CPU_DESC_INDEX(x,field) \
- ((x) * sizeof(struct per_cpu_gdt) + offsetof(struct per_cpu_gdt, field) + (__FIRST_PER_CPU_ENTRY*8))
-#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
-
-#define load_TR(cpu) asm volatile("ltr %w0"::"r" (__CPU_DESC_INDEX(cpu, tss)));
-#define __load_LDT(cpu) asm volatile("lldt %w0"::"r" (__CPU_DESC_INDEX(cpu, ldt)));
-#define clear_LDT(n) asm volatile("lldt %w0"::"r" (0))
-
-/*
- * Guest OS must provide its own code selectors, or use the one we provide. The
- * RPL must be 1, as we only create bounce frames to ring 1. Any LDT selector
- * value is okay. Note that checking only the RPL is insufficient: if the
- * selector is poked into an interrupt, trap or call gate then the RPL is
- * ignored when the gate is accessed.
- */
-#define VALID_SEL(_s) \
- (((((_s)>>3) < FIRST_RESERVED_GDT_ENTRY) || \
- (((_s)>>3) > LAST_RESERVED_GDT_ENTRY) || \
- ((_s)&4)) && \
- (((_s)&3) == 0))
-#define VALID_CODESEL(_s) ((_s) == FLAT_RING3_CS || VALID_SEL(_s))
-
-/* These are bitmasks for the first 32 bits of a descriptor table entry. */
-#define _SEGMENT_TYPE (15<< 8)
-#define _SEGMENT_S ( 1<<12) /* System descriptor (yes iff S==0) */
-#define _SEGMENT_DPL ( 3<<13) /* Descriptor Privilege Level */
-#define _SEGMENT_P ( 1<<15) /* Segment Present */
-#define _SEGMENT_G ( 1<<23) /* Granularity */
-
-#ifndef __ASSEMBLY__
-
-enum {
- GATE_INTERRUPT = 0xE,
- GATE_TRAP = 0xF,
- GATE_CALL = 0xC,
-};
-
-// 16byte gate
-struct gate_struct {
- u16 offset_low;
- u16 segment;
- unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
- u16 offset_middle;
- u32 offset_high;
- u32 zero1;
-} __attribute__((packed));
-
-// 8 byte segment descriptor
-struct desc_struct {
- u16 limit0;
- u16 base0;
- unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1;
- unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8;
-} __attribute__((packed));
-
-// LDT or TSS descriptor in the GDT. 16 bytes.
-struct ldttss_desc {
- u16 limit0;
- u16 base0;
- unsigned base1 : 8, type : 5, dpl : 2, p : 1;
- unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8;
- u32 base3;
- u32 zero1;
-} __attribute__((packed));
-
-// Union of above structures
-union desc_union {
- struct desc_struct seg;
- struct ldttss_desc ldttss;
- struct gate_struct gate;
-};
-
-struct per_cpu_gdt {
- struct ldttss_desc tss;
- struct ldttss_desc ldt;
-} ____cacheline_aligned;
-
-
-struct Xgt_desc_struct {
- unsigned short size;
- unsigned long address;
-} __attribute__((packed));
-
-extern __u8 gdt_table[];
-extern __u8 gdt_end[];
-extern union desc_union *gdt;
-
-extern struct per_cpu_gdt gdt_cpu_table[];
-
-#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF)
-#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF)
-#define PTR_HIGH(x) ((unsigned long)(x) >> 32)
-
-enum {
- DESC_TSS = 0x9,
- DESC_LDT = 0x2,
-};
-
-extern struct gate_struct *idt;
-
-#define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2))
-#define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2))
-
-extern void set_intr_gate(unsigned int irq, void * addr);
-extern void set_tss_desc(unsigned int n, void *addr);
-
-#endif /* !__ASSEMBLY__ */
-
-#endif
+++ /dev/null
-/* $Id: dma.h,v 1.7 1992/12/14 00:29:34 root Exp root $
- * linux/include/asm/dma.h: Defines for using and allocating dma channels.
- * Written by Hennus Bergman, 1992.
- * High DMA channel support & info by Hannu Savolainen
- * and John Boyd, Nov. 1992.
- */
-
-#ifndef _ASM_DMA_H
-#define _ASM_DMA_H
-
-#include <xen/config.h>
-#include <xen/spinlock.h> /* And spinlocks */
-#include <asm/io.h> /* need byte IO */
-#include <xen/delay.h>
-
-
-#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER
-#define dma_outb outb_p
-#else
-#define dma_outb outb
-#endif
-
-#define dma_inb inb
-
-/*
- * NOTES about DMA transfers:
- *
- * controller 1: channels 0-3, byte operations, ports 00-1F
- * controller 2: channels 4-7, word operations, ports C0-DF
- *
- * - ALL registers are 8 bits only, regardless of transfer size
- * - channel 4 is not used - cascades 1 into 2.
- * - channels 0-3 are byte - addresses/counts are for physical bytes
- * - channels 5-7 are word - addresses/counts are for physical words
- * - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries
- * - transfer count loaded to registers is 1 less than actual count
- * - controller 2 offsets are all even (2x offsets for controller 1)
- * - page registers for 5-7 don't use data bit 0, represent 128K pages
- * - page registers for 0-3 use bit 0, represent 64K pages
- *
- * DMA transfers are limited to the lower 16MB of _physical_ memory.
- * Note that addresses loaded into registers must be _physical_ addresses,
- * not logical addresses (which may differ if paging is active).
- *
- * Address mapping for channels 0-3:
- *
- * A23 ... A16 A15 ... A8 A7 ... A0 (Physical addresses)
- * | ... | | ... | | ... |
- * | ... | | ... | | ... |
- * | ... | | ... | | ... |
- * P7 ... P0 A7 ... A0 A7 ... A0
- * | Page | Addr MSB | Addr LSB | (DMA registers)
- *
- * Address mapping for channels 5-7:
- *
- * A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0 (Physical addresses)
- * | ... | \ \ ... \ \ \ ... \ \
- * | ... | \ \ ... \ \ \ ... \ (not used)
- * | ... | \ \ ... \ \ \ ... \
- * P7 ... P1 (0) A7 A6 ... A0 A7 A6 ... A0
- * | Page | Addr MSB | Addr LSB | (DMA registers)
- *
- * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses
- * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at
- * the hardware level, so odd-byte transfers aren't possible).
- *
- * Transfer count (_not # bytes_) is limited to 64K, represented as actual
- * count - 1 : 64K => 0xFFFF, 1 => 0x0000. Thus, count is always 1 or more,
- * and up to 128K bytes may be transferred on channels 5-7 in one operation.
- *
- */
-
-#define MAX_DMA_CHANNELS 8
-
-#if 0
-/* The maximum address that we can perform a DMA transfer to on this platform */
-#define MAX_DMA_ADDRESS (PAGE_OFFSET+0x1000000)
-#endif
-
-
-/* 8237 DMA controllers */
-#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */
-#define IO_DMA2_BASE 0xC0 /* 16 bit master DMA, ch 4(=slave input)..7 */
-
-/* DMA controller registers */
-#define DMA1_CMD_REG 0x08 /* command register (w) */
-#define DMA1_STAT_REG 0x08 /* status register (r) */
-#define DMA1_REQ_REG 0x09 /* request register (w) */
-#define DMA1_MASK_REG 0x0A /* single-channel mask (w) */
-#define DMA1_MODE_REG 0x0B /* mode register (w) */
-#define DMA1_CLEAR_FF_REG 0x0C /* clear pointer flip-flop (w) */
-#define DMA1_TEMP_REG 0x0D /* Temporary Register (r) */
-#define DMA1_RESET_REG 0x0D /* Master Clear (w) */
-#define DMA1_CLR_MASK_REG 0x0E /* Clear Mask */
-#define DMA1_MASK_ALL_REG 0x0F /* all-channels mask (w) */
-
-#define DMA2_CMD_REG 0xD0 /* command register (w) */
-#define DMA2_STAT_REG 0xD0 /* status register (r) */
-#define DMA2_REQ_REG 0xD2 /* request register (w) */
-#define DMA2_MASK_REG 0xD4 /* single-channel mask (w) */
-#define DMA2_MODE_REG 0xD6 /* mode register (w) */
-#define DMA2_CLEAR_FF_REG 0xD8 /* clear pointer flip-flop (w) */
-#define DMA2_TEMP_REG 0xDA /* Temporary Register (r) */
-#define DMA2_RESET_REG 0xDA /* Master Clear (w) */
-#define DMA2_CLR_MASK_REG 0xDC /* Clear Mask */
-#define DMA2_MASK_ALL_REG 0xDE /* all-channels mask (w) */
-
-#define DMA_ADDR_0 0x00 /* DMA address registers */
-#define DMA_ADDR_1 0x02
-#define DMA_ADDR_2 0x04
-#define DMA_ADDR_3 0x06
-#define DMA_ADDR_4 0xC0
-#define DMA_ADDR_5 0xC4
-#define DMA_ADDR_6 0xC8
-#define DMA_ADDR_7 0xCC
-
-#define DMA_CNT_0 0x01 /* DMA count registers */
-#define DMA_CNT_1 0x03
-#define DMA_CNT_2 0x05
-#define DMA_CNT_3 0x07
-#define DMA_CNT_4 0xC2
-#define DMA_CNT_5 0xC6
-#define DMA_CNT_6 0xCA
-#define DMA_CNT_7 0xCE
-
-#define DMA_PAGE_0 0x87 /* DMA page registers */
-#define DMA_PAGE_1 0x83
-#define DMA_PAGE_2 0x81
-#define DMA_PAGE_3 0x82
-#define DMA_PAGE_5 0x8B
-#define DMA_PAGE_6 0x89
-#define DMA_PAGE_7 0x8A
-
-#define DMA_MODE_READ 0x44 /* I/O to memory, no autoinit, increment, single mode */
-#define DMA_MODE_WRITE 0x48 /* memory to I/O, no autoinit, increment, single mode */
-#define DMA_MODE_CASCADE 0xC0 /* pass thru DREQ->HRQ, DACK<-HLDA only */
-
-#define DMA_AUTOINIT 0x10
-
-
-extern spinlock_t dma_spin_lock;
-
-static __inline__ unsigned long claim_dma_lock(void)
-{
- unsigned long flags;
- spin_lock_irqsave(&dma_spin_lock, flags);
- return flags;
-}
-
-static __inline__ void release_dma_lock(unsigned long flags)
-{
- spin_unlock_irqrestore(&dma_spin_lock, flags);
-}
-
-/* enable/disable a specific DMA channel */
-static __inline__ void enable_dma(unsigned int dmanr)
-{
- if (dmanr<=3)
- dma_outb(dmanr, DMA1_MASK_REG);
- else
- dma_outb(dmanr & 3, DMA2_MASK_REG);
-}
-
-static __inline__ void disable_dma(unsigned int dmanr)
-{
- if (dmanr<=3)
- dma_outb(dmanr | 4, DMA1_MASK_REG);
- else
- dma_outb((dmanr & 3) | 4, DMA2_MASK_REG);
-}
-
-/* Clear the 'DMA Pointer Flip Flop'.
- * Write 0 for LSB/MSB, 1 for MSB/LSB access.
- * Use this once to initialize the FF to a known state.
- * After that, keep track of it. :-)
- * --- In order to do that, the DMA routines below should ---
- * --- only be used while holding the DMA lock ! ---
- */
-static __inline__ void clear_dma_ff(unsigned int dmanr)
-{
- if (dmanr<=3)
- dma_outb(0, DMA1_CLEAR_FF_REG);
- else
- dma_outb(0, DMA2_CLEAR_FF_REG);
-}
-
-/* set mode (above) for a specific DMA channel */
-static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
-{
- if (dmanr<=3)
- dma_outb(mode | dmanr, DMA1_MODE_REG);
- else
- dma_outb(mode | (dmanr&3), DMA2_MODE_REG);
-}
-
-/* Set only the page register bits of the transfer address.
- * This is used for successive transfers when we know the contents of
- * the lower 16 bits of the DMA current address register, but a 64k boundary
- * may have been crossed.
- */
-static __inline__ void set_dma_page(unsigned int dmanr, char pagenr)
-{
- switch(dmanr) {
- case 0:
- dma_outb(pagenr, DMA_PAGE_0);
- break;
- case 1:
- dma_outb(pagenr, DMA_PAGE_1);
- break;
- case 2:
- dma_outb(pagenr, DMA_PAGE_2);
- break;
- case 3:
- dma_outb(pagenr, DMA_PAGE_3);
- break;
- case 5:
- dma_outb(pagenr & 0xfe, DMA_PAGE_5);
- break;
- case 6:
- dma_outb(pagenr & 0xfe, DMA_PAGE_6);
- break;
- case 7:
- dma_outb(pagenr & 0xfe, DMA_PAGE_7);
- break;
- }
-}
-
-
-/* Set transfer address & page bits for specific DMA channel.
- * Assumes dma flipflop is clear.
- */
-static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
-{
- set_dma_page(dmanr, a>>16);
- if (dmanr <= 3) {
- dma_outb( a & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
- dma_outb( (a>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
- } else {
- dma_outb( (a>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
- dma_outb( (a>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
- }
-}
-
-
-/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for
- * a specific DMA channel.
- * You must ensure the parameters are valid.
- * NOTE: from a manual: "the number of transfers is one more
- * than the initial word count"! This is taken into account.
- * Assumes dma flip-flop is clear.
- * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
- */
-static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
-{
- count--;
- if (dmanr <= 3) {
- dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
- dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
- } else {
- dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
- dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
- }
-}
-
-
-/* Get DMA residue count. After a DMA transfer, this
- * should return zero. Reading this while a DMA transfer is
- * still in progress will return unpredictable results.
- * If called before the channel has been used, it may return 1.
- * Otherwise, it returns the number of _bytes_ left to transfer.
- *
- * Assumes DMA flip-flop is clear.
- */
-static __inline__ int get_dma_residue(unsigned int dmanr)
-{
- unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE
- : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE;
-
- /* using short to get 16-bit wrap around */
- unsigned short count;
-
- count = 1 + dma_inb(io_port);
- count += dma_inb(io_port) << 8;
-
- return (dmanr<=3)? count : (count<<1);
-}
-
-
-/* These are in kernel/dma.c: */
-extern int request_dma(unsigned int dmanr, const char * device_id); /* reserve a DMA channel */
-extern void free_dma(unsigned int dmanr); /* release it again */
-
-/* From PCI */
-
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#else
-#define isa_dma_bridge_buggy (0)
-#endif
-
-#endif /* _ASM_DMA_H */
+++ /dev/null
-/******************************************************************************
- * domain_page.h
- *
- * Allow temporary mapping of domain page frames into Xen space.
- */
-
-#ifndef __ASM_DOMAIN_PAGE_H__
-#define __ASM_DOMAIN_PAGE_H__
-
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <asm/page.h>
-
-/*
- * Maps a given physical address, returning corresponding virtual address.
- * The entire page containing that VA is now accessible until a
- * corresponding call to unmap_domain_mem().
- */
-#define map_domain_mem(pa) __va(pa)
-
-/*
- * Pass a VA within a page previously mapped with map_domain_mem().
- * That page will then be removed from the mapping lists.
- */
-#define unmap_domain_mem(va) {}
-
-#endif /* __ASM_DOMAIN_PAGE_H__ */
+++ /dev/null
-/*
- * fixmap.h: compile-time virtual memory allocation
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1998 Ingo Molnar
- *
- * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
- */
-
-#ifndef _ASM_FIXMAP_H
-#define _ASM_FIXMAP_H
-
-#include <xen/config.h>
-#include <asm/apicdef.h>
-#include <asm/page.h>
-
-/*
- * Here we define all the compile-time 'special' virtual
- * addresses. The point is to have a constant address at
- * compile time, but to set the physical address only
- * in the boot process. We allocate these special addresses
- * from the end of virtual memory (0xfffff000) backwards.
- * Also this lets us do fail-safe vmalloc(), we
- * can guarantee that these special addresses and
- * vmalloc()-ed addresses never overlap.
- *
- * these 'compile-time allocated' memory buffers are
- * fixed-size 4k pages. (or larger if used with an increment
- * highger than 1) use fixmap_set(idx,phys) to associate
- * physical memory with fixmap indices.
- *
- * TLB entries of such buffers will not be flushed across
- * task switches.
- */
-
-/*
- * on UP currently we will have no trace of the fixmap mechanizm,
- * no page table allocations, etc. This might change in the
- * future, say framebuffers for the console driver(s) could be
- * fix-mapped?
- */
-enum fixed_addresses {
-#ifdef CONFIG_X86_LOCAL_APIC
- FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
-#endif
-#ifdef CONFIG_X86_IO_APIC
- FIX_IO_APIC_BASE_0,
- FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
-#endif
-#ifdef CONFIG_HIGHMEM
- FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
- FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
-#endif
- __end_of_fixed_addresses
-};
-
-extern void __set_fixmap (enum fixed_addresses idx,
- l1_pgentry_t entry);
-
-#define set_fixmap(idx, phys) \
- __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR))
-/*
- * Some hardware wants to get fixmapped without caching.
- */
-#define set_fixmap_nocache(idx, phys) \
- __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR_NOCACHE))
-/*
- * used by vmalloc.c.
- *
- * Leave one empty page between vmalloc'ed areas and
- * the start of the fixmap, and leave one page empty
- * at the top of mem..
- */
-#define FIXADDR_TOP (0xffffffffffffe000UL)
-#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
-#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
-
-#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
-
-extern void __this_fixmap_does_not_exist(void);
-
-/*
- * 'index to address' translation. If anyone tries to use the idx
- * directly without tranlation, we catch the bug with a NULL-deference
- * kernel oops. Illegal ranges of incoming indices are caught too.
- */
-static inline unsigned long fix_to_virt(const unsigned int idx)
-{
- /*
- * this branch gets completely eliminated after inlining,
- * except when someone tries to use fixaddr indices in an
- * illegal way. (such as mixing up address types or using
- * out-of-range indices).
- *
- * If it doesn't get removed, the linker will complain
- * loudly with a reasonably clear error message..
- */
- if (idx >= __end_of_fixed_addresses)
- __this_fixmap_does_not_exist();
-
- return __fix_to_virt(idx);
-}
-
-#endif
+++ /dev/null
-/******************************************************************************
- * flushtlb.h
- *
- * TLB flushes are timestamped using a global virtual 'clock' which ticks
- * on any TLB flush on any processor.
- *
- * Copyright (c) 2003, K A Fraser
- */
-
-#ifndef __FLUSHTLB_H__
-#define __FLUSHTLB_H__
-
-#include <xen/smp.h>
-
-/*
- * Every time the TLB clock passes an "epoch", every CPU's TLB is flushed.
- * Therefore, if the current TLB time and a previously-read timestamp differ
- * in their significant bits (i.e., ~TLBCLOCK_EPOCH_MASK), then the TLB clock
- * has wrapped at least once and every CPU's TLB is guaranteed to have been
- * flushed meanwhile.
- * This allows us to deal gracefully with a bounded (a.k.a. wrapping) clock.
- */
-#define TLBCLOCK_EPOCH_MASK ((1U<<16)-1)
-
-/*
- * 'cpu_stamp' is the current timestamp for the CPU we are testing.
- * 'lastuse_stamp' is a timestamp taken when the PFN we are testing was last
- * used for a purpose that may have caused the CPU's TLB to become tainted.
- */
-static inline int NEED_FLUSH(u32 cpu_stamp, u32 lastuse_stamp)
-{
- /*
- * Why does this work?
- * 1. XOR sets high-order bits determines if stamps from differing epochs.
- * 2. Subtraction sets high-order bits if 'cpu_stamp > lastuse_stamp'.
- * In either case a flush is unnecessary: we therefore OR the results from
- * (1) and (2), mask the high-order bits, and return the inverse.
- */
- return !(((lastuse_stamp^cpu_stamp)|(lastuse_stamp-cpu_stamp)) &
- ~TLBCLOCK_EPOCH_MASK);
-}
-
-extern u32 tlbflush_clock;
-extern u32 tlbflush_time[NR_CPUS];
-
-extern void tlb_clocktick(void);
-extern void new_tlbflush_clock_period(void);
-
-#endif /* __FLUSHTLB_H__ */
+++ /dev/null
-#ifndef __ASM_HARDIRQ_H
-#define __ASM_HARDIRQ_H
-
-#include <xen/config.h>
-#include <xen/irq.h>
-
-/* assembly code in softirq.h is sensitive to the offsets of these fields */
-typedef struct {
- unsigned int __softirq_pending;
- unsigned int __local_irq_count;
- unsigned int __local_bh_count;
- unsigned int __syscall_count;
- unsigned int __nmi_count;
- unsigned long idle_timestamp;
-} ____cacheline_aligned irq_cpustat_t;
-
-#include <xen/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
-
-/*
- * Are we in an interrupt context? Either doing bottom half
- * or hardware interrupt processing?
- */
-#define in_interrupt() ({ int __cpu = smp_processor_id(); \
- (local_irq_count(__cpu) + local_bh_count(__cpu) != 0); })
-
-#define in_irq() (local_irq_count(smp_processor_id()) != 0)
-
-#ifndef CONFIG_SMP
-
-#define hardirq_trylock(cpu) (local_irq_count(cpu) == 0)
-#define hardirq_endlock(cpu) do { } while (0)
-
-#define irq_enter(cpu, irq) (local_irq_count(cpu)++)
-#define irq_exit(cpu, irq) (local_irq_count(cpu)--)
-
-#define synchronize_irq() barrier()
-
-#else
-
-#include <asm/atomic.h>
-#include <asm/smp.h>
-
-extern unsigned char global_irq_holder;
-extern unsigned volatile long global_irq_lock; /* long for set_bit -RR */
-
-static inline int irqs_running (void)
-{
- int i;
-
- for (i = 0; i < smp_num_cpus; i++)
- if (local_irq_count(i))
- return 1;
- return 0;
-}
-
-static inline void release_irqlock(int cpu)
-{
- /* if we didn't own the irq lock, just ignore.. */
- if (global_irq_holder == (unsigned char) cpu) {
- global_irq_holder = 0xff;
- clear_bit(0,&global_irq_lock);
- }
-}
-
-static inline void irq_enter(int cpu, int irq)
-{
- ++local_irq_count(cpu);
-
- while (test_bit(0,&global_irq_lock)) {
- cpu_relax();
- }
-}
-
-static inline void irq_exit(int cpu, int irq)
-{
- --local_irq_count(cpu);
-}
-
-static inline int hardirq_trylock(int cpu)
-{
- return !local_irq_count(cpu) && !test_bit(0,&global_irq_lock);
-}
-
-#define hardirq_endlock(cpu) do { } while (0)
-
-extern void synchronize_irq(void);
-
-#endif /* CONFIG_SMP */
-
-#endif /* __ASM_HARDIRQ_H */
+++ /dev/null
-/*
- * linux/include/asm-x86_64/hdreg.h
- *
- * Copyright (C) 1994-1996 Linus Torvalds & authors
- */
-
-#ifndef __ASMx86_64_HDREG_H
-#define __ASMx86_64_HDREG_H
-
-//typedef unsigned short ide_ioreg_t;
-typedef unsigned long ide_ioreg_t;
-
-#endif /* __ASMx86_64_HDREG_H */
+++ /dev/null
-/*
- * include/asm-i386/i387.h
- *
- * Copyright (C) 1994 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- * Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-#ifndef __ASM_I386_I387_H
-#define __ASM_I386_I387_H
-
-#include <xen/sched.h>
-#include <asm/processor.h>
-
-extern void init_fpu(void);
-extern void save_init_fpu( struct task_struct *tsk );
-extern void restore_fpu( struct task_struct *tsk );
-
-#define unlazy_fpu( tsk ) do { \
- if ( test_bit(PF_USEDFPU, &tsk->flags) ) \
- save_init_fpu( tsk ); \
-} while (0)
-
-#define clear_fpu( tsk ) do { \
- if ( test_and_clear_bit(PF_USEDFPU, &tsk->flags) ) { \
- asm volatile("fwait"); \
- stts(); \
- } \
-} while (0)
-
-#define load_mxcsr( val ) do { \
- unsigned long __mxcsr = ((unsigned long)(val) & 0xffbf); \
- asm volatile( "ldmxcsr %0" : : "m" (__mxcsr) ); \
-} while (0)
-
-#endif /* __ASM_I386_I387_H */
+++ /dev/null
-/*
- * linux/include/asm-x86_64/ide.h
- *
- * Copyright (C) 1994-1996 Linus Torvalds & authors
- */
-
-/*
- * This file contains the x86_64 architecture specific IDE code.
- */
-
-#ifndef __ASMx86_64_IDE_H
-#define __ASMx86_64_IDE_H
-
-#ifdef __KERNEL__
-
-#include <xen/config.h>
-
-#ifndef MAX_HWIFS
-# ifdef CONFIG_BLK_DEV_IDEPCI
-#define MAX_HWIFS 10
-# else
-#define MAX_HWIFS 6
-# endif
-#endif
-
-static __inline__ int ide_default_irq(ide_ioreg_t base)
-{
- switch (base) {
- case 0x1f0: return 14;
- case 0x170: return 15;
- case 0x1e8: return 11;
- case 0x168: return 10;
- case 0x1e0: return 8;
- case 0x160: return 12;
- default:
- return 0;
- }
-}
-
-static __inline__ ide_ioreg_t ide_default_io_base(int index)
-{
- switch (index) {
- case 0: return 0x1f0;
- case 1: return 0x170;
- case 2: return 0x1e8;
- case 3: return 0x168;
- case 4: return 0x1e0;
- case 5: return 0x160;
- default:
- return 0;
- }
-}
-
-static __inline__ void ide_init_hwif_ports(hw_regs_t *hw, ide_ioreg_t data_port, ide_ioreg_t ctrl_port, int *irq)
-{
- ide_ioreg_t reg = data_port;
- int i;
-
- for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
- hw->io_ports[i] = reg;
- reg += 1;
- }
- if (ctrl_port) {
- hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port;
- } else {
- hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206;
- }
- if (irq != NULL)
- *irq = 0;
- hw->io_ports[IDE_IRQ_OFFSET] = 0;
-}
-
-static __inline__ void ide_init_default_hwifs(void)
-{
-#ifndef CONFIG_BLK_DEV_IDEPCI
- hw_regs_t hw;
- int index;
-
- for(index = 0; index < MAX_HWIFS; index++) {
- memset(&hw, 0, sizeof hw);
- ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL);
- hw.irq = ide_default_irq(ide_default_io_base(index));
- ide_register_hw(&hw, NULL);
- }
-#endif /* CONFIG_BLK_DEV_IDEPCI */
-}
-
-typedef union {
- unsigned all : 8; /* all of the bits together */
- struct {
- unsigned head : 4; /* always zeros here */
- unsigned unit : 1; /* drive select number, 0 or 1 */
- unsigned bit5 : 1; /* always 1 */
- unsigned lba : 1; /* using LBA instead of CHS */
- unsigned bit7 : 1; /* always 1 */
- } b;
-} select_t;
-
-typedef union {
- unsigned all : 8; /* all of the bits together */
- struct {
- unsigned bit0 : 1;
- unsigned nIEN : 1; /* device INTRQ to host */
- unsigned SRST : 1; /* host soft reset bit */
- unsigned bit3 : 1; /* ATA-2 thingy */
- unsigned reserved456 : 3;
- unsigned HOB : 1; /* 48-bit address ordering */
- } b;
-} control_t;
-
-#define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id))
-#define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id))
-#define ide_check_region(from,extent) check_region((from), (extent))
-#define ide_request_region(from,extent,name) request_region((from), (extent), (name))
-#define ide_release_region(from,extent) release_region((from), (extent))
-
-/*
- * The following are not needed for the non-m68k ports
- */
-#define ide_ack_intr(hwif) (1)
-#define ide_fix_driveid(id) do {} while (0)
-#define ide_release_lock(lock) do {} while (0)
-#define ide_get_lock(lock, hdlr, data) do {} while (0)
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASMx86_64_IDE_H */
+++ /dev/null
-#ifndef _ASM_IO_H
-#define _ASM_IO_H
-
-#include <xen/config.h>
-#include <asm/page.h>
-
-/*
- * This file contains the definitions for the x86 IO instructions
- * inb/inw/inl/outb/outw/outl and the "string versions" of the same
- * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing"
- * versions of the single-IO instructions (inb_p/inw_p/..).
- *
- * This file is not meant to be obfuscating: it's just complicated
- * to (a) handle it all in a way that makes gcc able to optimize it
- * as well as possible and (b) trying to avoid writing the same thing
- * over and over again with slight variations and possibly making a
- * mistake somewhere.
- */
-
-/*
- * Thanks to James van Artsdalen for a better timing-fix than
- * the two short jumps: using outb's to a nonexistent port seems
- * to guarantee better timings even on fast machines.
- *
- * On the other hand, I'd like to be sure of a non-existent port:
- * I feel a bit unsafe about using 0x80 (should be safe, though)
- *
- * Linus
- */
-
- /*
- * Bit simplified and optimized by Jan Hubicka
- * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999.
- *
- * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added,
- * isa_read[wl] and isa_write[wl] fixed
- * - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- */
-
-#ifdef SLOW_IO_BY_JUMPING
-#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:"
-#else
-#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
-#endif
-
-#ifdef REALLY_SLOW_IO
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
-#else
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
-#endif
-
-/*
- * Talk about misusing macros..
- */
-#define __OUT1(s,x) \
-extern inline void out##s(unsigned x value, unsigned short port) {
-
-#define __OUT2(s,s1,s2) \
-__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
-
-#define __OUT(s,s1,x) \
-__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
-
-#define __IN1(s) \
-extern inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
-
-#define __IN2(s,s1,s2) \
-__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
-
-#define __IN(s,s1,i...) \
-__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-
-#define __INS(s) \
-extern inline void ins##s(unsigned short port, void * addr, unsigned long count) \
-{ __asm__ __volatile__ ("rep ; ins" #s \
-: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
-
-#define __OUTS(s) \
-extern inline void outs##s(unsigned short port, const void * addr, unsigned long count) \
-{ __asm__ __volatile__ ("rep ; outs" #s \
-: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
-
-#define RETURN_TYPE unsigned char
-__IN(b,"")
-#undef RETURN_TYPE
-#define RETURN_TYPE unsigned short
-__IN(w,"")
-#undef RETURN_TYPE
-#define RETURN_TYPE unsigned int
-__IN(l,"")
-#undef RETURN_TYPE
-
-__OUT(b,"b",char)
-__OUT(w,"w",short)
-__OUT(l,,int)
-
-__INS(b)
-__INS(w)
-__INS(l)
-
-__OUTS(b)
-__OUTS(w)
-__OUTS(l)
-
-#define IO_SPACE_LIMIT 0xffff
-
-/*
- * Temporary debugging check to catch old code using
- * unmapped ISA addresses. Will be removed in 2.4.
- */
-#ifdef CONFIG_IO_DEBUG
- extern void *__io_virt_debug(unsigned long x, const char *file, int line);
- extern unsigned long __io_phys_debug(unsigned long x, const char *file, int line);
- #define __io_virt(x) __io_virt_debug((unsigned long)(x), __FILE__, __LINE__)
-//#define __io_phys(x) __io_phys_debug((unsigned long)(x), __FILE__, __LINE__)
-#else
- #define __io_virt(x) ((void *)(x))
-//#define __io_phys(x) __pa(x)
-#endif
-
-/*
- * Change virtual addresses to physical addresses and vv.
- * These are pretty trivial
- */
-extern inline unsigned long virt_to_phys(volatile void * address)
-{
- return __pa(address);
-}
-
-extern inline void * phys_to_virt(unsigned long address)
-{
- return __va(address);
-}
-
-/*
- * Change "struct page" to physical address.
- */
-#ifdef CONFIG_DISCONTIGMEM
-#include <asm/mmzone.h>
-#else
-#define page_to_phys(page) (((page) - frame_table) << PAGE_SHIFT)
-#endif
-
-#define page_to_pfn(page) ((unsigned long)((_page) - frame_table))
-#define page_to_virt(page) (phys_to_virt(page_to_phys(_page)))
-
-extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
-
-extern inline void * ioremap (unsigned long offset, unsigned long size)
-{
- return __ioremap(offset, size, 0);
-}
-
-/*
- * This one maps high address device memory and turns off caching for that area.
- * it's useful if some control registers are in such an area and write combining
- * or read caching is not desirable:
- */
-extern inline void * ioremap_nocache (unsigned long offset, unsigned long size)
-{
- return __ioremap(offset, size, _PAGE_PCD);
-}
-
-extern void iounmap(void *addr);
-
-/*
- * IO bus memory addresses are also 1:1 with the physical address
- */
-#define virt_to_bus virt_to_phys
-#define bus_to_virt phys_to_virt
-#define page_to_bus page_to_phys
-
-/*
- * readX/writeX() are used to access memory mapped devices. On some
- * architectures the memory mapped IO stuff needs to be accessed
- * differently. On the x86 architecture, we just read/write the
- * memory location directly.
- */
-
-#define readb(addr) (*(volatile unsigned char *) __io_virt(addr))
-#define readw(addr) (*(volatile unsigned short *) __io_virt(addr))
-#define readl(addr) (*(volatile unsigned int *) __io_virt(addr))
-#define readq(addr) (*(volatile unsigned long *) __io_virt(addr))
-#define __raw_readb readb
-#define __raw_readw readw
-#define __raw_readl readl
-#define __raw_readq readq
-
-#define writeb(b,addr) (*(volatile unsigned char *) __io_virt(addr) = (b))
-#define writew(b,addr) (*(volatile unsigned short *) __io_virt(addr) = (b))
-#define writel(b,addr) (*(volatile unsigned int *) __io_virt(addr) = (b))
-#define writeq(b,addr) (*(volatile unsigned long *) __io_virt(addr) = (b))
-#define __raw_writeb writeb
-#define __raw_writew writew
-#define __raw_writel writel
-#define __raw_writeq writeq
-
-void *memcpy_fromio(void*,const void*,unsigned);
-void *memcpy_toio(void*,const void*,unsigned);
-
-#define memset_io(a,b,c) memset(__io_virt(a),(b),(c))
-
-/*
- * ISA space is 'always mapped' on a typical x86 system, no need to
- * explicitly ioremap() it. The fact that the ISA IO space is mapped
- * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
- * are physical addresses. The following constant pointer can be
- * used as the IO-area pointer (it can be iounmapped as well, so the
- * analogy with PCI is quite large):
- */
-#define __ISA_IO_base ((char *)(PAGE_OFFSET))
-
-#define isa_readb(a) readb(__ISA_IO_base + (a))
-#define isa_readw(a) readw(__ISA_IO_base + (a))
-#define isa_readl(a) readl(__ISA_IO_base + (a))
-#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a))
-#define isa_writew(w,a) writew(w,__ISA_IO_base + (a))
-#define isa_writel(l,a) writel(l,__ISA_IO_base + (a))
-#define isa_memset_io(a,b,c) memset_io(__ISA_IO_base + (a),(b),(c))
-#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),__ISA_IO_base + (b),(c))
-#define isa_memcpy_toio(a,b,c) memcpy_toio(__ISA_IO_base + (a),(b),(c))
-
-
-/*
- * Again, x86-64 does not require mem IO specific function.
- */
-
-#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(b),(c),(d))
-#define isa_eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(__ISA_IO_base + (b)),(c),(d))
-
-static inline int check_signature(unsigned long io_addr,
- const unsigned char *signature, int length)
-{
- int retval = 0;
- do {
- if (readb(io_addr) != *signature)
- goto out;
- io_addr++;
- signature++;
- length--;
- } while (length);
- retval = 1;
-out:
- return retval;
-}
-
-static inline int isa_check_signature(unsigned long io_addr,
- const unsigned char *signature, int length)
-{
- int retval = 0;
- do {
- if (isa_readb(io_addr) != *signature)
- goto out;
- io_addr++;
- signature++;
- length--;
- } while (length);
- retval = 1;
-out:
- return retval;
-}
-
-/* Nothing to do */
-
-#define dma_cache_inv(_start,_size) do { } while (0)
-#define dma_cache_wback(_start,_size) do { } while (0)
-#define dma_cache_wback_inv(_start,_size) do { } while (0)
-
-#define flush_write_buffers()
-
-#endif
+++ /dev/null
-#ifndef __ASM_IO_APIC_H
-#define __ASM_IO_APIC_H
-
-#include <xen/config.h>
-#include <xen/types.h>
-
-/*
- * Intel IO-APIC support for SMP and UP systems.
- *
- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
- */
-
-#ifdef CONFIG_X86_IO_APIC
-
-#define APIC_MISMATCH_DEBUG
-
-#define IO_APIC_BASE(idx) \
- ((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \
- + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK)))
-
-/*
- * The structure of the IO-APIC:
- */
-struct IO_APIC_reg_00 {
- __u32 __reserved_2 : 24,
- ID : 4,
- __reserved_1 : 4;
-} __attribute__ ((packed));
-
-struct IO_APIC_reg_01 {
- __u32 version : 8,
- __reserved_2 : 7,
- PRQ : 1,
- entries : 8,
- __reserved_1 : 8;
-} __attribute__ ((packed));
-
-struct IO_APIC_reg_02 {
- __u32 __reserved_2 : 24,
- arbitration : 4,
- __reserved_1 : 4;
-} __attribute__ ((packed));
-
-/*
- * # of IO-APICs and # of IRQ routing registers
- */
-extern int nr_ioapics;
-extern int nr_ioapic_registers[MAX_IO_APICS];
-
-enum ioapic_irq_destination_types {
- dest_Fixed = 0,
- dest_LowestPrio = 1,
- dest_SMI = 2,
- dest__reserved_1 = 3,
- dest_NMI = 4,
- dest_INIT = 5,
- dest__reserved_2 = 6,
- dest_ExtINT = 7
-};
-
-struct IO_APIC_route_entry {
- __u32 vector : 8,
- delivery_mode : 3, /* 000: FIXED
- * 001: lowest prio
- * 111: ExtINT
- */
- dest_mode : 1, /* 0: physical, 1: logical */
- delivery_status : 1,
- polarity : 1,
- irr : 1,
- trigger : 1, /* 0: edge, 1: level */
- mask : 1, /* 0: enabled, 1: disabled */
- __reserved_2 : 15;
-
- union { struct { __u32
- __reserved_1 : 24,
- physical_dest : 4,
- __reserved_2 : 4;
- } physical;
-
- struct { __u32
- __reserved_1 : 24,
- logical_dest : 8;
- } logical;
- } dest;
-
-} __attribute__ ((packed));
-
-/*
- * MP-BIOS irq configuration table structures:
- */
-
-/* I/O APIC entries */
-extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
-
-/* # of MP IRQ source entries */
-extern int mp_irq_entries;
-
-/* MP IRQ source entries */
-extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* non-0 if default (table-less) MP configuration */
-extern int mpc_default_type;
-
-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
-{
- *IO_APIC_BASE(apic) = reg;
- return *(IO_APIC_BASE(apic)+4);
-}
-
-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
-{
- *IO_APIC_BASE(apic) = reg;
- *(IO_APIC_BASE(apic)+4) = value;
-}
-
-/*
- * Re-write a value: to be used for read-modify-write
- * cycles where the read already set up the index register.
- */
-static inline void io_apic_modify(unsigned int apic, unsigned int value)
-{
- *(IO_APIC_BASE(apic)+4) = value;
-}
-
-/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
-static inline void io_apic_sync(unsigned int apic)
-{
- (void) *(IO_APIC_BASE(apic)+4);
-}
-
-/* 1 if "noapic" boot option passed */
-extern int skip_ioapic_setup;
-
-/*
- * If we use the IO-APIC for IRQ routing, disable automatic
- * assignment of PCI IRQ's.
- */
-#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup)
-
-#else /* !CONFIG_X86_IO_APIC */
-#define io_apic_assign_pci_irqs 0
-#endif
-
-#endif
+++ /dev/null
-#ifndef _ASM_HW_IRQ_H
-#define _ASM_HW_IRQ_H
-
-/* (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar */
-
-#include <xen/config.h>
-#include <asm/atomic.h>
-
-#define SA_INTERRUPT 0x20000000
-#define SA_SHIRQ 0x04000000
-#define SA_NOPROFILE 0x02000000
-
-#define SA_SAMPLE_RANDOM 0 /* Linux driver compatibility */
-
-#define TIMER_IRQ 0
-
-extern void disable_irq(unsigned int);
-extern void disable_irq_nosync(unsigned int);
-extern void enable_irq(unsigned int);
-
-/*
- * IDT vectors usable for external interrupt sources start
- * at 0x20:
- */
-#define NR_VECTORS 256
-#define FIRST_EXTERNAL_VECTOR 0x30
-
-#ifdef CONFIG_X86_IO_APIC
-#define NR_IRQS 224
-#else
-#define NR_IRQS 16
-#endif
-
-#define HYPERVISOR_CALL_VECTOR 0x82
-
-/*
- * Vectors 0x30-0x3f are used for ISA interrupts.
- */
-
-/*
- * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
- *
- * some of the following vectors are 'rare', they are merged
- * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
- * TLB, reschedule and local APIC vectors are performance-critical.
- *
- * Vectors 0xf0-0xfa are free (reserved for future Linux use).
- */
-#define SPURIOUS_APIC_VECTOR 0xff
-#define ERROR_APIC_VECTOR 0xfe
-#define INVALIDATE_TLB_VECTOR 0xfd
-#define EVENT_CHECK_VECTOR 0xfc
-#define CALL_FUNCTION_VECTOR 0xfb
-#define KDB_VECTOR 0xfa
-#define TASK_MIGRATION_VECTOR 0xf9
-
-/*
- * Local APIC timer IRQ vector is on a different priority level,
- * to work around the 'lost local interrupt if more than 2 IRQ
- * sources per level' errata.
- */
-#define LOCAL_TIMER_VECTOR 0xef
-
-/*
- * First APIC vector available to drivers: (vectors 0x40-0xee)
- * we start at 0x41 to spread out vectors evenly between priority
- * levels. (0x82 is the syscall vector)
- */
-#define FIRST_DEVICE_VECTOR 0x41
-#define FIRST_SYSTEM_VECTOR 0xef
-
-extern int irq_vector[NR_IRQS];
-#define IO_APIC_VECTOR(irq) irq_vector[irq]
-
-/*
- * Various low-level irq details needed by irq.c, process.c,
- * time.c, io_apic.c and smp.c
- *
- * Interrupt entry/exit code at both C and assembly level
- */
-
-extern void mask_irq(unsigned int irq);
-extern void unmask_irq(unsigned int irq);
-extern void disable_8259A_irq(unsigned int irq);
-extern void enable_8259A_irq(unsigned int irq);
-extern int i8259A_irq_pending(unsigned int irq);
-extern void make_8259A_irq(unsigned int irq);
-extern void init_8259A(int aeoi);
-extern void FASTCALL(send_IPI_self(int vector));
-extern void init_VISWS_APIC_irqs(void);
-extern void setup_IO_APIC(void);
-extern void disable_IO_APIC(void);
-extern void print_IO_APIC(void);
-extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
-extern void send_IPI(int dest, int vector);
-
-extern unsigned long io_apic_irqs;
-
-extern atomic_t irq_err_count;
-extern atomic_t irq_mis_count;
-
-extern char _stext, _etext;
-
-#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
-
-#define __STR(x) #x
-#define STR(x) __STR(x)
-
-#define IRQ_NAME2(nr) nr##_interrupt(void)
-#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
-
-#define BUILD_IRQ(nr) \
-asmlinkage void IRQ_NAME(nr); \
-__asm__( \
-"\n.p2align\n" \
-SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \
- "push $"#nr"-256\n\t" \
- "jmp common_interrupt");
-
-extern unsigned long prof_cpu_mask;
-extern unsigned int * prof_buffer;
-extern unsigned long prof_len;
-extern unsigned long prof_shift;
-
-#include <xen/irq.h>
-
-#if defined(CONFIG_X86_IO_APIC)
-static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {
- if (IO_APIC_IRQ(i))
- send_IPI_self(IO_APIC_VECTOR(i));
-}
-#else
-static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {}
-#endif
-
-#endif /* _ASM_HW_IRQ_H */
+++ /dev/null
-#ifndef __ARCH_LDT_H
-#define __ARCH_LDT_H
-
-#ifndef __ASSEMBLY__
-
-static inline void load_LDT(struct task_struct *p)
-{
- unsigned long ents;
-
- if ( (ents = p->mm.ldt_ents) == 0 )
- {
- __asm__ __volatile__ ( "lldt %w0" : : "r" (0) );
- }
- else
- {
- unsigned int cpu;
- struct ldttss_desc *desc;
-
- cpu = smp_processor_id();
- desc = (struct ldttss_desc *)((char *)GET_GDT_ADDRESS(p) + __CPU_DESC_INDEX(cpu, ldt));
- desc->limit0 = ents*8-1;
- desc->base0 = LDT_VIRT_START&0xffff;
- desc->base1 = (LDT_VIRT_START&0xff0000)>>16;
- desc->type = DESC_LDT;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit1 = 0;
- desc->zero0 = 0;
- desc->g = 0;
- desc->base2 = (LDT_VIRT_START&0xff000000)>>24;
- desc->base3 = LDT_VIRT_START>>32;
- desc->zero1 = 0;
- __load_LDT(cpu);
- }
-}
-
-#endif /* !__ASSEMBLY__ */
-
-#endif
+++ /dev/null
-/*
- * Machine dependent access functions for RTC registers.
- */
-#ifndef _ASM_MC146818RTC_H
-#define _ASM_MC146818RTC_H
-
-#include <asm/io.h>
-#include <xen/spinlock.h>
-
-extern spinlock_t rtc_lock; /* serialize CMOS RAM access */
-
-/**********************************************************************
- * register summary
- **********************************************************************/
-#define RTC_SECONDS 0
-#define RTC_SECONDS_ALARM 1
-#define RTC_MINUTES 2
-#define RTC_MINUTES_ALARM 3
-#define RTC_HOURS 4
-#define RTC_HOURS_ALARM 5
-/* RTC_*_alarm is always true if 2 MSBs are set */
-# define RTC_ALARM_DONT_CARE 0xC0
-
-#define RTC_DAY_OF_WEEK 6
-#define RTC_DAY_OF_MONTH 7
-#define RTC_MONTH 8
-#define RTC_YEAR 9
-
-/* control registers - Moto names
- */
-#define RTC_REG_A 10
-#define RTC_REG_B 11
-#define RTC_REG_C 12
-#define RTC_REG_D 13
-
-/**********************************************************************
- * register details
- **********************************************************************/
-#define RTC_FREQ_SELECT RTC_REG_A
-
-/* update-in-progress - set to "1" 244 microsecs before RTC goes off the bus,
- * reset after update (may take 1.984ms @ 32768Hz RefClock) is complete,
- * totalling to a max high interval of 2.228 ms.
- */
-# define RTC_UIP 0x80
-# define RTC_DIV_CTL 0x70
- /* divider control: refclock values 4.194 / 1.049 MHz / 32.768 kHz */
-# define RTC_REF_CLCK_4MHZ 0x00
-# define RTC_REF_CLCK_1MHZ 0x10
-# define RTC_REF_CLCK_32KHZ 0x20
- /* 2 values for divider stage reset, others for "testing purposes only" */
-# define RTC_DIV_RESET1 0x60
-# define RTC_DIV_RESET2 0x70
- /* Periodic intr. / Square wave rate select. 0=none, 1=32.8kHz,... 15=2Hz */
-# define RTC_RATE_SELECT 0x0F
-
-/**********************************************************************/
-#define RTC_CONTROL RTC_REG_B
-# define RTC_SET 0x80 /* disable updates for clock setting */
-# define RTC_PIE 0x40 /* periodic interrupt enable */
-# define RTC_AIE 0x20 /* alarm interrupt enable */
-# define RTC_UIE 0x10 /* update-finished interrupt enable */
-# define RTC_SQWE 0x08 /* enable square-wave output */
-# define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */
-# define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */
-# define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */
-
-/**********************************************************************/
-#define RTC_INTR_FLAGS RTC_REG_C
-/* caution - cleared by read */
-# define RTC_IRQF 0x80 /* any of the following 3 is active */
-# define RTC_PF 0x40
-# define RTC_AF 0x20
-# define RTC_UF 0x10
-
-/**********************************************************************/
-#define RTC_VALID RTC_REG_D
-# define RTC_VRT 0x80 /* valid RAM and time */
-/**********************************************************************/
-
-/* example: !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)
- * determines if the following two #defines are needed
- */
-#ifndef BCD_TO_BIN
-#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10)
-#endif
-
-#ifndef BIN_TO_BCD
-#define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10)
-#endif
-
-
-#ifndef RTC_PORT
-#define RTC_PORT(x) (0x70 + (x))
-#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */
-#endif
-
-/*
- * The yet supported machines all access the RTC index register via
- * an ISA port access but the way to access the date register differs ...
- */
-#define CMOS_READ(addr) ({ \
-outb_p((addr),RTC_PORT(0)); \
-inb_p(RTC_PORT(1)); \
-})
-#define CMOS_WRITE(val, addr) ({ \
-outb_p((addr),RTC_PORT(0)); \
-outb_p((val),RTC_PORT(1)); \
-})
-
-#define RTC_IRQ 8
-
-#endif /* _ASM_MC146818RTC_H */
+++ /dev/null
-#ifndef __ASM_MPSPEC_H
-#define __ASM_MPSPEC_H
-
-
-/*
- * Structure definitions for SMP machines following the
- * Intel Multiprocessing Specification 1.1 and 1.4.
- */
-
-/*
- * This tag identifies where the SMP configuration
- * information is.
- */
-
-#define SMP_MAGIC_IDENT (('_'<<24)|('P'<<16)|('M'<<8)|'_')
-
-/*
- * a maximum of 16 APICs with the current APIC ID architecture.
- * xAPICs can have up to 256. SAPICs have 16 ID bits.
- */
-#ifdef CONFIG_X86_CLUSTERED_APIC
-#define MAX_APICS 256
-#else
-#define MAX_APICS 16
-#endif
-
-#define MAX_MPC_ENTRY 1024
-
-struct intel_mp_floating
-{
- char mpf_signature[4]; /* "_MP_" */
- unsigned int mpf_physptr; /* Configuration table address */
- unsigned char mpf_length; /* Our length (paragraphs) */
- unsigned char mpf_specification;/* Specification version */
- unsigned char mpf_checksum; /* Checksum (makes sum 0) */
- unsigned char mpf_feature1; /* Standard or configuration ? */
- unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */
- unsigned char mpf_feature3; /* Unused (0) */
- unsigned char mpf_feature4; /* Unused (0) */
- unsigned char mpf_feature5; /* Unused (0) */
-};
-
-struct mp_config_table
-{
- char mpc_signature[4];
-#define MPC_SIGNATURE "PCMP"
- unsigned short mpc_length; /* Size of table */
- char mpc_spec; /* 0x01 */
- char mpc_checksum;
- char mpc_oem[8];
- char mpc_productid[12];
- unsigned int mpc_oemptr; /* 0 if not present */
- unsigned short mpc_oemsize; /* 0 if not present */
- unsigned short mpc_oemcount;
- unsigned int mpc_lapic; /* APIC address */
- unsigned int reserved;
-};
-
-/* Followed by entries */
-
-#define MP_PROCESSOR 0
-#define MP_BUS 1
-#define MP_IOAPIC 2
-#define MP_INTSRC 3
-#define MP_LINTSRC 4
-#define MP_TRANSLATION 192 /* Used by IBM NUMA-Q to describe node locality */
-
-struct mpc_config_processor
-{
- unsigned char mpc_type;
- unsigned char mpc_apicid; /* Local APIC number */
- unsigned char mpc_apicver; /* Its versions */
- unsigned char mpc_cpuflag;
-#define CPU_ENABLED 1 /* Processor is available */
-#define CPU_BOOTPROCESSOR 2 /* Processor is the BP */
- unsigned int mpc_cpufeature;
-#define CPU_STEPPING_MASK 0x0F
-#define CPU_MODEL_MASK 0xF0
-#define CPU_FAMILY_MASK 0xF00
- unsigned int mpc_featureflag; /* CPUID feature value */
- unsigned int mpc_reserved[2];
-};
-
-struct mpc_config_bus
-{
- unsigned char mpc_type;
- unsigned char mpc_busid;
- unsigned char mpc_bustype[6] __attribute((packed));
-};
-
-/* List of Bus Type string values, Intel MP Spec. */
-#define BUSTYPE_EISA "EISA"
-#define BUSTYPE_ISA "ISA"
-#define BUSTYPE_INTERN "INTERN" /* Internal BUS */
-#define BUSTYPE_MCA "MCA"
-#define BUSTYPE_VL "VL" /* Local bus */
-#define BUSTYPE_PCI "PCI"
-#define BUSTYPE_PCMCIA "PCMCIA"
-#define BUSTYPE_CBUS "CBUS"
-#define BUSTYPE_CBUSII "CBUSII"
-#define BUSTYPE_FUTURE "FUTURE"
-#define BUSTYPE_MBI "MBI"
-#define BUSTYPE_MBII "MBII"
-#define BUSTYPE_MPI "MPI"
-#define BUSTYPE_MPSA "MPSA"
-#define BUSTYPE_NUBUS "NUBUS"
-#define BUSTYPE_TC "TC"
-#define BUSTYPE_VME "VME"
-#define BUSTYPE_XPRESS "XPRESS"
-
-struct mpc_config_ioapic
-{
- unsigned char mpc_type;
- unsigned char mpc_apicid;
- unsigned char mpc_apicver;
- unsigned char mpc_flags;
-#define MPC_APIC_USABLE 0x01
- unsigned int mpc_apicaddr;
-};
-
-struct mpc_config_intsrc
-{
- unsigned char mpc_type;
- unsigned char mpc_irqtype;
- unsigned short mpc_irqflag;
- unsigned char mpc_srcbus;
- unsigned char mpc_srcbusirq;
- unsigned char mpc_dstapic;
- unsigned char mpc_dstirq;
-};
-
-enum mp_irq_source_types {
- mp_INT = 0,
- mp_NMI = 1,
- mp_SMI = 2,
- mp_ExtINT = 3
-};
-
-#define MP_IRQDIR_DEFAULT 0
-#define MP_IRQDIR_HIGH 1
-#define MP_IRQDIR_LOW 3
-
-
-struct mpc_config_lintsrc
-{
- unsigned char mpc_type;
- unsigned char mpc_irqtype;
- unsigned short mpc_irqflag;
- unsigned char mpc_srcbusid;
- unsigned char mpc_srcbusirq;
- unsigned char mpc_destapic;
-#define MP_APIC_ALL 0xFF
- unsigned char mpc_destapiclint;
-};
-
-struct mp_config_oemtable
-{
- char oem_signature[4];
-#define MPC_OEM_SIGNATURE "_OEM"
- unsigned short oem_length; /* Size of table */
- char oem_rev; /* 0x01 */
- char oem_checksum;
- char mpc_oem[8];
-};
-
-struct mpc_config_translation
-{
- unsigned char mpc_type;
- unsigned char trans_len;
- unsigned char trans_type;
- unsigned char trans_quad;
- unsigned char trans_global;
- unsigned char trans_local;
- unsigned short trans_reserved;
-};
-
-/*
- * Default configurations
- *
- * 1 2 CPU ISA 82489DX
- * 2 2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining
- * 3 2 CPU EISA 82489DX
- * 4 2 CPU MCA 82489DX
- * 5 2 CPU ISA+PCI
- * 6 2 CPU EISA+PCI
- * 7 2 CPU MCA+PCI
- */
-
-#define MAX_MP_BUSSES 257
-#define MAX_IRQ_SOURCES (MAX_MP_BUSSES*4)
-enum mp_bustype {
- MP_BUS_ISA = 1,
- MP_BUS_EISA,
- MP_BUS_PCI,
- MP_BUS_MCA
-};
-extern int mp_bus_id_to_type [MAX_MP_BUSSES];
-extern int mp_bus_id_to_node [MAX_MP_BUSSES];
-extern int mp_bus_id_to_local [MAX_MP_BUSSES];
-extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
-extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
-
-extern unsigned int boot_cpu_physical_apicid;
-extern int smp_found_config;
-extern void find_smp_config (void);
-extern void get_smp_config (void);
-extern int apic_version [MAX_APICS];
-extern int mp_current_pci_id;
-extern unsigned long mp_lapic_addr;
-
-#endif
-
+++ /dev/null
-#ifndef X86_64_MSR_H
-#define X86_64_MSR_H 1
-
-#ifndef __ASSEMBLY__
-/*
- * Access to machine-specific registers (available on 586 and better only)
- * Note: the rd* operations modify the parameters directly (without using
- * pointer indirection), this allows gcc to optimize better
- */
-
-#define rdmsr(msr,val1,val2) \
- __asm__ __volatile__("rdmsr" \
- : "=a" (val1), "=d" (val2) \
- : "c" (msr))
-
-
-#define rdmsrl(msr,val) do { unsigned long a__,b__; \
- __asm__ __volatile__("rdmsr" \
- : "=a" (a__), "=d" (b__) \
- : "c" (msr)); \
- val = a__ | (b__<<32); \
-} while(0);
-
-#define wrmsr(msr,val1,val2) \
- __asm__ __volatile__("wrmsr" \
- : /* no outputs */ \
- : "c" (msr), "a" (val1), "d" (val2))
-
-#define rdtsc(low,high) \
- __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
-
-#define rdtscl(low) \
- __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
-
-#define rdtscll(val) do { \
- unsigned int a,d; \
- asm volatile("rdtsc" : "=a" (a), "=d" (d)); \
- (val) = ((unsigned long)a) | (((unsigned long)d)<<32); \
-} while(0)
-
-#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
-
-#define rdpmc(counter,low,high) \
- __asm__ __volatile__("rdpmc" \
- : "=a" (low), "=d" (high) \
- : "c" (counter))
-
-#endif
-
-/* AMD/K8 specific MSRs */
-#define MSR_EFER 0xc0000080 /* extended feature register */
-#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */
-#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */
-#define MSR_CSTAR 0xc0000083 /* compatibility mode SYSCALL target */
-#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */
-#define MSR_FS_BASE 0xc0000100 /* 64bit GS base */
-#define MSR_GS_BASE 0xc0000101 /* 64bit FS base */
-#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow (or USER_GS from kernel) */
-/* EFER bits: */
-#define _EFER_SCE 0 /* SYSCALL/SYSRET */
-#define _EFER_LME 8 /* Long mode enable */
-#define _EFER_LMA 10 /* Long mode active (read-only) */
-#define _EFER_NX 11 /* No execute enable */
-
-#define EFER_SCE (1<<_EFER_SCE)
-#define EFER_LME (1<<EFER_LME)
-#define EFER_LMA (1<<EFER_LMA)
-#define EFER_NX (1<<_EFER_NX)
-
-/* Intel MSRs. Some also available on other CPUs */
-#define MSR_IA32_PLATFORM_ID 0x17
-
-#define MSR_IA32_PERFCTR0 0xc1
-#define MSR_IA32_PERFCTR1 0xc2
-
-#define MSR_MTRRcap 0x0fe
-#define MSR_IA32_BBL_CR_CTL 0x119
-
-#define MSR_IA32_MCG_CAP 0x179
-#define MSR_IA32_MCG_STATUS 0x17a
-#define MSR_IA32_MCG_CTL 0x17b
-
-#define MSR_IA32_EVNTSEL0 0x186
-#define MSR_IA32_EVNTSEL1 0x187
-
-#define MSR_IA32_DEBUGCTLMSR 0x1d9
-#define MSR_IA32_LASTBRANCHFROMIP 0x1db
-#define MSR_IA32_LASTBRANCHTOIP 0x1dc
-#define MSR_IA32_LASTINTFROMIP 0x1dd
-#define MSR_IA32_LASTINTTOIP 0x1de
-
-#define MSR_MTRRfix64K_00000 0x250
-#define MSR_MTRRfix16K_80000 0x258
-#define MSR_MTRRfix16K_A0000 0x259
-#define MSR_MTRRfix4K_C0000 0x268
-#define MSR_MTRRfix4K_C8000 0x269
-#define MSR_MTRRfix4K_D0000 0x26a
-#define MSR_MTRRfix4K_D8000 0x26b
-#define MSR_MTRRfix4K_E0000 0x26c
-#define MSR_MTRRfix4K_E8000 0x26d
-#define MSR_MTRRfix4K_F0000 0x26e
-#define MSR_MTRRfix4K_F8000 0x26f
-#define MSR_MTRRdefType 0x2ff
-
-#define MSR_IA32_MC0_CTL 0x400
-#define MSR_IA32_MC0_STATUS 0x401
-#define MSR_IA32_MC0_ADDR 0x402
-#define MSR_IA32_MC0_MISC 0x403
-
-#define MSR_P6_PERFCTR0 0xc1
-#define MSR_P6_PERFCTR1 0xc2
-#define MSR_P6_EVNTSEL0 0x186
-#define MSR_P6_EVNTSEL1 0x187
-
-/* K7/K8 MSRs. Not complete. See the architecture manual for a more complete list. */
-#define MSR_K7_EVNTSEL0 0xC0010000
-#define MSR_K7_PERFCTR0 0xC0010004
-#define MSR_K7_EVNTSEL1 0xC0010001
-#define MSR_K7_PERFCTR1 0xC0010005
-#define MSR_K7_EVNTSEL2 0xC0010002
-#define MSR_K7_PERFCTR2 0xC0010006
-#define MSR_K7_EVNTSEL3 0xC0010003
-#define MSR_K7_PERFCTR3 0xC0010007
-#define MSR_K8_TOP_MEM1 0xC001001A
-#define MSR_K8_TOP_MEM2 0xC001001D
-#define MSR_K8_SYSCFG 0xC0000010
-
-/* K6 MSRs */
-#define MSR_K6_EFER 0xC0000080
-#define MSR_K6_STAR 0xC0000081
-#define MSR_K6_WHCR 0xC0000082
-#define MSR_K6_UWCCR 0xC0000085
-#define MSR_K6_PSOR 0xC0000087
-#define MSR_K6_PFIR 0xC0000088
-
-/* Centaur-Hauls/IDT defined MSRs. */
-#define MSR_IDT_FCR1 0x107
-#define MSR_IDT_FCR2 0x108
-#define MSR_IDT_FCR3 0x109
-#define MSR_IDT_FCR4 0x10a
-
-#define MSR_IDT_MCR0 0x110
-#define MSR_IDT_MCR1 0x111
-#define MSR_IDT_MCR2 0x112
-#define MSR_IDT_MCR3 0x113
-#define MSR_IDT_MCR4 0x114
-#define MSR_IDT_MCR5 0x115
-#define MSR_IDT_MCR6 0x116
-#define MSR_IDT_MCR7 0x117
-#define MSR_IDT_MCR_CTRL 0x120
-
-/* VIA Cyrix defined MSRs*/
-#define MSR_VIA_FCR 0x1107
-
-/* Intel defined MSRs. */
-#define MSR_IA32_P5_MC_ADDR 0
-#define MSR_IA32_P5_MC_TYPE 1
-#define MSR_IA32_PLATFORM_ID 0x17
-#define MSR_IA32_EBL_CR_POWERON 0x2a
-
-#define MSR_IA32_APICBASE 0x1b
-#define MSR_IA32_APICBASE_BSP (1<<8)
-#define MSR_IA32_APICBASE_ENABLE (1<<11)
-#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
-
-#endif
+++ /dev/null
-#ifndef _X86_64_PAGE_H
-#define _X86_64_PAGE_H
-
-#define BUG() do { \
- printk("BUG at %s:%d\n", __FILE__, __LINE__); \
- __asm__ __volatile__("ud2"); \
-} while (0)
-
-#define __PHYSICAL_MASK 0x0000ffffffffffffUL
-#define PHYSICAL_PAGE_MASK 0x0000fffffffff000UL
-#define PTE_MASK PHYSICAL_PAGE_MASK
-
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 12
-#ifdef __ASSEMBLY__
-#define PAGE_SIZE (0x1 << PAGE_SHIFT)
-#else
-#define PAGE_SIZE (1UL << PAGE_SHIFT)
-#endif
-#define PAGE_MASK (~(PAGE_SIZE-1))
-#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
-#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
-
-#define L1_PAGETABLE_SHIFT 12
-#define L2_PAGETABLE_SHIFT 21
-#define L3_PAGETABLE_SHIFT 30
-#define L4_PAGETABLE_SHIFT 39
-#define LARGE_PFN (LARGE_PAGE_SIZE / PAGE_SIZE)
-
-#define ENTRIES_PER_L1_PAGETABLE 512
-#define ENTRIES_PER_L2_PAGETABLE 512
-#define ENTRIES_PER_L3_PAGETABLE 512
-#define ENTRIES_PER_L4_PAGETABLE 512
-
-#define KERNEL_TEXT_SIZE (40UL*1024*1024)
-#define KERNEL_TEXT_START 0xffffffff80000000UL
-
-/* Changing the next two defines should be enough to increase the kernel stack */
-/* We still hope 8K is enough, but ... */
-#define THREAD_ORDER 1
-#define THREAD_SIZE (2*PAGE_SIZE)
-
-#define INIT_TASK_SIZE THREAD_SIZE
-#define CURRENT_MASK (~(THREAD_SIZE-1))
-
-#define clear_page(_p) memset((void *)(_p), 0, PAGE_SIZE)
-#define copy_page(_t,_f) memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
-
-#ifndef __ASSEMBLY__
-#include <xen/config.h>
-typedef struct { unsigned long l1_lo; } l1_pgentry_t;
-typedef struct { unsigned long l2_lo; } l2_pgentry_t;
-typedef struct { unsigned long l3_lo; } l3_pgentry_t;
-typedef struct { unsigned long l4_lo; } l4_pgentry_t;
-typedef l1_pgentry_t *l1_pagetable_t;
-typedef l2_pgentry_t *l2_pagetable_t;
-typedef l3_pgentry_t *l3_pagetable_t;
-typedef l4_pgentry_t *l4_pagetable_t;
-typedef struct { unsigned long pt_lo; } pagetable_t;
-typedef struct { unsigned long pgprot; } pgprot_t;
-#endif /* !__ASSEMBLY__ */
-
-/* Strip type from a table entry. */
-#define l1_pgentry_val(_x) ((_x).l1_lo)
-#define l2_pgentry_val(_x) ((_x).l2_lo)
-#define l3_pgentry_val(_x) ((_x).l3_lo)
-#define l4_pgentry_val(_x) ((_x).l4_lo)
-#define pagetable_val(_x) ((_x).pt_lo)
-
-#define alloc_l1_pagetable() ((l1_pgentry_t *)get_free_page(GFP_KERNEL))
-#define alloc_l2_pagetable() ((l2_pgentry_t *)get_free_page(GFP_KERNEL))
-#define alloc_l3_pagetable() ((l3_pgentry_t *)get_free_page(GFP_KERNEL))
-#define alloc_l4_pagetable() ((l4_pgentry_t *)get_free_page(GFP_KERNEL))
-
-/* Add type to a table entry. */
-#define mk_l1_pgentry(_x) ( (l1_pgentry_t) { (_x) } )
-#define mk_l2_pgentry(_x) ( (l2_pgentry_t) { (_x) } )
-#define mk_l3_pgentry(_x) ( (l3_pgentry_t) { (_x) } )
-#define mk_l4_pgentry(_x) ( (l4_pgentry_t) { (_x) } )
-#define mk_pagetable(_x) ( (pagetable_t) { (_x) } )
-
-/* Turn a typed table entry into a page index. */
-#define l1_pgentry_to_pagenr(_x) (l1_pgentry_val(_x) >> PAGE_SHIFT)
-#define l2_pgentry_to_pagenr(_x) (l2_pgentry_val(_x) >> PAGE_SHIFT)
-#define l3_pgentry_to_pagenr(_x) (l3_pgentry_val(_x) >> PAGE_SHIFT)
-#define l4_pgentry_to_pagenr(_x) (l4_pgentry_val(_x) >> PAGE_SHIFT)
-
-/* Turn a typed table entry into a physical address. */
-#define l1_pgentry_to_phys(_x) (l1_pgentry_val(_x) & PAGE_MASK)
-#define l2_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PAGE_MASK)
-#define l3_pgentry_to_phys(_x) (l3_pgentry_val(_x) & PAGE_MASK)
-#define l4_pgentry_to_phys(_x) (l4_pgentry_val(_x) & PAGE_MASK)
-
-/* Dereference a typed level-2 entry to yield a typed level-1 table. */
-#define l2_pgentry_to_l1(_x) \
- ((l1_pgentry_t *)__va(l2_pgentry_val(_x) & PAGE_MASK))
-
-/* Dereference a typed level-4 entry to yield a typed level-3 table. */
-#define l4_pgentry_to_l3(_x) \
- ((l3_pgentry_t *)__va(l4_pgentry_val(_x) & PAGE_MASK))
-
-/* Dereference a typed level-3 entry to yield a typed level-2 table. */
-#define l3_pgentry_to_l2(_x) \
- ((l2_pgentry_t *)__va(l3_pgentry_val(_x) & PAGE_MASK))
-
-/* Given a virtual address, get an entry offset into a page table. */
-#define l1_table_offset(_a) \
- (((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1))
-#define l2_table_offset(_a) \
- (((_a) >> L2_PAGETABLE_SHIFT) & (ENTRIES_PER_L2_PAGETABLE - 1))
-#define l3_table_offset(_a) \
- (((_a) >> L3_PAGETABLE_SHIFT) & (ENTRIES_PER_L3_PAGETABLE - 1))
-#define l4_table_offset(_a) \
- ((_a) >> L4_PAGETABLE_SHIFT)
-
-/* Hypervisor table entries use zero to sugnify 'empty'. */
-#define l1_pgentry_empty(_x) (!l1_pgentry_val(_x))
-#define l2_pgentry_empty(_x) (!l2_pgentry_val(_x))
-#define l3_pgentry_empty(_x) (!l3_pgentry_val(_x))
-#define l4_pgentry_empty(_x) (!l4_pgentry_val(_x))
-
-
-#define pgprot_val(x) ((x).pgprot)
-#define __pgprot(x) ((pgprot_t) { (x) } )
-
-#define clear_user_page(page, vaddr) clear_page(page)
-#define copy_user_page(to, from, vaddr) copy_page(to, from)
-
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
-/*
- * NB. We don't currently track I/O holes in the physical RAM space.
- * For now we guess that I/O devices will be mapped in the first 1MB
- * (e.g., VGA buffers) or beyond the end of physical RAM.
- */
-#define pfn_is_ram(_pfn) (((_pfn) > 0x100) && ((_pfn) < max_page))
-
-/* High table entries are reserved by the hypervisor. */
-#define DOMAIN_ENTRIES_PER_L4_PAGETABLE \
- (HYPERVISOR_VIRT_START >> L4_PAGETABLE_SHIFT)
-#define HYPERVISOR_ENTRIES_PER_L4_PAGETABLE \
- (ENTRIES_PER_L4_PAGETABLE - DOMAIN_ENTRIES_PER_L4_PAGETABLE)
-
-#define __START_KERNEL 0xffffffff80100000
-#define __START_KERNEL_map 0xffffffff80000000
-#define __PAGE_OFFSET 0x0000010000000000
-#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
-
-#ifndef __ASSEMBLY__
-#include <asm/processor.h>
-#include <asm/fixmap.h>
-#include <asm/bitops.h>
-#include <asm/flushtlb.h>
-
-extern unsigned long vm_stack_flags, vm_stack_flags32;
-extern unsigned long vm_data_default_flags, vm_data_default_flags32;
-extern unsigned long vm_force_exec32;
-
-#define linear_pg_table ((l1_pgentry_t *)LINEAR_PT_VIRT_START)
-
-extern l2_pgentry_t idle_pg_table[ENTRIES_PER_L2_PAGETABLE];
-extern void paging_init(void);
-
-#define __flush_tlb() \
- do { \
- __asm__ __volatile__ ( \
- "movl %%cr3, %%eax; movl %%eax, %%cr3" \
- : : : "memory", "eax" ); \
- tlb_clocktick(); \
- } while ( 0 )
-
-/* Flush global pages as well. */
-
-#define __pge_off() \
- do { \
- __asm__ __volatile__( \
- "movl %0, %%cr4; # turn off PGE " \
- :: "r" (mmu_cr4_features & ~X86_CR4_PGE)); \
- } while (0)
-
-#define __pge_on() \
- do { \
- __asm__ __volatile__( \
- "movl %0, %%cr4; # turn off PGE " \
- :: "r" (mmu_cr4_features)); \
- } while (0)
-
-
-#define __flush_tlb_pge() \
- do { \
- __pge_off(); \
- __flush_tlb(); \
- __pge_on(); \
- } while (0)
-
-#define __flush_tlb_one(__addr) \
-__asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr)))
-
-#include <xen/config.h>
-
-/*
- * Tell the user there is some problem. The exception handler decodes this frame.
- */
-struct bug_frame {
- unsigned char ud2[2];
- char *filename; /* should use 32bit offset instead, but the assembler doesn't like it */
- unsigned short line;
-} __attribute__((packed));
-#define HEADER_BUG() asm volatile("ud2 ; .quad %P1 ; .short %P0" :: "i"(__LINE__), \
- "i" (__stringify(__FILE__)))
-#define PAGE_BUG(page) BUG()
-
-#endif /* ASSEMBLY */
-
-#define _PAGE_PRESENT 0x001
-#define _PAGE_RW 0x002
-#define _PAGE_USER 0x004
-#define _PAGE_PWT 0x008
-#define _PAGE_PCD 0x010
-#define _PAGE_ACCESSED 0x020
-#define _PAGE_DIRTY 0x040
-#define _PAGE_PAT 0x080
-#define _PAGE_PSE 0x080
-#define _PAGE_GLOBAL 0x100
-
-#define __PAGE_HYPERVISOR \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
-#define __PAGE_HYPERVISOR_NOCACHE \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED)
-#define __PAGE_HYPERVISOR_RO \
- (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED)
-
-#define MAKE_GLOBAL(_x) ((_x) | _PAGE_GLOBAL)
-
-#define PAGE_HYPERVISOR MAKE_GLOBAL(__PAGE_HYPERVISOR)
-#define PAGE_HYPERVISOR_RO MAKE_GLOBAL(__PAGE_HYPERVISOR_RO)
-#define PAGE_HYPERVISOR_NOCACHE MAKE_GLOBAL(__PAGE_HYPERVISOR_NOCACHE)
-
-#define mk_l4_writeable(_p) \
- (*(_p) = mk_l4_pgentry(l4_pgentry_val(*(_p)) | _PAGE_RW))
-#define mk_l4_readonly(_p) \
- (*(_p) = mk_l4_pgentry(l4_pgentry_val(*(_p)) & ~_PAGE_RW))
-#define mk_l3_writeable(_p) \
- (*(_p) = mk_l3_pgentry(l3_pgentry_val(*(_p)) | _PAGE_RW))
-#define mk_l3_readonly(_p) \
- (*(_p) = mk_l3_pgentry(l3_pgentry_val(*(_p)) & ~_PAGE_RW))
-#define mk_l2_writeable(_p) \
- (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) | _PAGE_RW))
-#define mk_l2_readonly(_p) \
- (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) & ~_PAGE_RW))
-#define mk_l1_writeable(_p) \
- (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) | _PAGE_RW))
-#define mk_l1_readonly(_p) \
- (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) & ~_PAGE_RW))
-
-/* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol.
- Otherwise you risk miscompilation. */
-#define __pa(x) (((unsigned long)(x)>=__START_KERNEL_map)?(unsigned long)(x) - (unsigned long)__START_KERNEL_map:(unsigned long)(x) - PAGE_OFFSET)
-/* __pa_symbol should use for C visible symbols, but only for them.
- This seems to be the official gcc blessed way to do such arithmetic. */
-#define __pa_symbol(x) \
- ({unsigned long v; \
- asm("" : "=r" (v) : "0" (x)); \
- v - __START_KERNEL_map; })
-#define __pa_maybe_symbol(x) \
- ({unsigned long v; \
- asm("" : "=r" (v) : "0" (x)); \
- __pa(v); })
-#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
-#ifndef CONFIG_DISCONTIGMEM
-#define virt_to_page(kaddr) (frame_table + (__pa(kaddr) >> PAGE_SHIFT))
-#define pfn_to_page(pfn) (frame_table + (pfn))
-#define page_address(_p) (__va(((_p) - frame_table) << PAGE_SHIFT))
-#define VALID_PAGE(page) (((page) - frame_table) < max_mapnr)
-#endif
-
-#ifndef __ASSEMBLY__
-static __inline__ int get_order(unsigned long size)
-{
- int order;
-
- size = (size-1) >> (PAGE_SHIFT-1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
-}
-#endif
-
-#define phys_to_pfn(phys) ((phys) >> PAGE_SHIFT)
-
-#define __VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-#define __VM_STACK_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | VM_EXEC | \
- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-#define VM_DATA_DEFAULT_FLAGS \
- ((current->thread.flags & THREAD_IA32) ? vm_data_default_flags32 : \
- vm_data_default_flags)
-#define VM_STACK_FLAGS vm_stack_flags
-
-#endif /* _X86_64_PAGE_H */
+++ /dev/null
-#ifndef _ASMx86_64_PARAM_H
-#define _ASMx86_64_PARAM_H
-
-#ifndef HZ
-#define HZ 100
-#endif
-
-#define EXEC_PAGESIZE 4096
-
-#ifndef NGROUPS
-#define NGROUPS 32
-#endif
-
-#ifndef NOGROUP
-#define NOGROUP (-1)
-#endif
-
-#define MAXHOSTNAMELEN 64 /* max length of hostname */
-
-#ifdef __KERNEL__
-# define CLOCKS_PER_SEC 100 /* frequency at which times() counts */
-#endif
-
-#endif
+++ /dev/null
-#ifndef __x8664_PCI_H
-#define __x8664_PCI_H
-
-#include <xen/config.h>
-#include <asm/io.h>
-
-
-/* Can be used to override the logic in pci_scan_bus for skipping
- already-configured bus numbers - to be used for buggy BIOSes
- or architectures with incomplete PCI setup by the loader */
-
-#ifdef CONFIG_PCI
-extern unsigned int pcibios_assign_all_busses(void);
-#else
-#define pcibios_assign_all_busses() 0
-#endif
-
-extern unsigned long pci_mem_start;
-#define PCIBIOS_MIN_IO 0x1000
-#define PCIBIOS_MIN_MEM (pci_mem_start)
-
-void pcibios_set_master(struct pci_dev *dev);
-void pcibios_penalize_isa_irq(int irq);
-struct irq_routing_table *pcibios_get_irq_routing_table(void);
-int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
-
-#include <xen/types.h>
-#include <xen/slab.h>
-#include <asm/scatterlist.h>
-#include <asm/io.h>
-#include <asm/page.h>
-
-struct pci_dev;
-extern int force_mmu;
-
-/* Allocate and map kernel buffer using consistent mode DMA for a device.
- * hwdev should be valid struct pci_dev pointer for PCI devices,
- * NULL for PCI-like buses (ISA, EISA).
- * Returns non-NULL cpu-view pointer to the buffer if successful and
- * sets *dma_addrp to the pci side dma address as well, else *dma_addrp
- * is undefined.
- */
-extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
- dma_addr_t *dma_handle);
-
-/* Free and unmap a consistent DMA buffer.
- * cpu_addr is what was returned from pci_alloc_consistent,
- * size must be the same as what as passed into pci_alloc_consistent,
- * and likewise dma_addr must be the same as what *dma_addrp was set to.
- *
- * References to the memory and mappings associated with cpu_addr/dma_addr
- * past this call are illegal.
- */
-extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
- void *vaddr, dma_addr_t dma_handle);
-
-#ifdef CONFIG_GART_IOMMU
-
-/* Map a single buffer of the indicated size for DMA in streaming mode.
- * The 32-bit bus address to use is returned.
- *
- * Once the device is given the dma address, the device owns this memory
- * until either pci_unmap_single or pci_dma_sync_single is performed.
- */
-extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
- size_t size, int direction);
-
-
-void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t addr,
- size_t size, int direction);
-
-/*
- * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical
- * to pci_map_single, but takes a struct pfn_info instead of a virtual address
- */
-
-#define pci_map_page(dev,page,offset,size,dir) \
- pci_map_single((dev), page_address(page)+(offset), (size), (dir))
-
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
- dma_addr_t ADDR_NAME;
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
- __u32 LEN_NAME;
-#define pci_unmap_addr(PTR, ADDR_NAME) \
- ((PTR)->ADDR_NAME)
-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
- (((PTR)->ADDR_NAME) = (VAL))
-#define pci_unmap_len(PTR, LEN_NAME) \
- ((PTR)->LEN_NAME)
-#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
- (((PTR)->LEN_NAME) = (VAL))
-
-static inline void pci_dma_sync_single(struct pci_dev *hwdev,
- dma_addr_t dma_handle,
- size_t size, int direction)
-{
- if (direction == PCI_DMA_NONE)
- out_of_line_bug();
-}
-
-static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
- struct scatterlist *sg,
- int nelems, int direction)
-{
- if (direction == PCI_DMA_NONE)
- out_of_line_bug();
-}
-
-/* The PCI address space does equal the physical memory
- * address space. The networking and block device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS (0)
-
-
-#else
-static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
- size_t size, int direction)
-{
- dma_addr_t addr;
-
- if (direction == PCI_DMA_NONE)
- out_of_line_bug();
- addr = virt_to_bus(ptr);
-
- /*
- * This is gross, but what should I do.
- * Unfortunately drivers do not test the return value of this.
- */
- if ((addr+size) & ~hwdev->dma_mask)
- out_of_line_bug();
- return addr;
-}
-
-static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
- size_t size, int direction)
-{
- if (direction == PCI_DMA_NONE)
- out_of_line_bug();
- /* Nothing to do */
-}
-
-static inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct pfn_info *page,
- unsigned long offset, size_t size, int direction)
-{
- dma_addr_t addr;
- if (direction == PCI_DMA_NONE)
- out_of_line_bug();
- addr = (page - frame_table) * PAGE_SIZE + offset;
- if ((addr+size) & ~hwdev->dma_mask)
- out_of_line_bug();
- return addr;
-}
-
-/* pci_unmap_{page,single} is a nop so... */
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
-#define pci_unmap_addr(PTR, ADDR_NAME) (0)
-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0)
-#define pci_unmap_len(PTR, LEN_NAME) (0)
-#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0)
-
-#define BAD_DMA_ADDRESS (-1UL)
-
-
-/* Unmap a set of streaming mode DMA translations.
- * Again, cpu read rules concerning calls here are the same as for
- * pci_unmap_single() above.
- */
-static inline void pci_unmap_sg(struct pci_dev *dev, struct scatterlist *sg,
- int nents, int dir)
-{
- if (dir == PCI_DMA_NONE)
- out_of_line_bug();
-}
-
-
-/* Map a set of buffers described by scatterlist in streaming
- * mode for DMA. This is the scather-gather version of the
- * above pci_map_single interface. Here the scatter gather list
- * elements are each tagged with the appropriate dma address
- * and length. They are obtained via sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- * DMA address/length pairs than there are SG table elements.
- * (for example via virtual mapping capabilities)
- * The routine returns the number of addr/length pairs actually
- * used, at most nents.
- *
- * Device ownership issues as mentioned above for pci_map_single are
- * the same here.
- */
-static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
- int nents, int direction)
-{
- int i;
-
- if (direction == PCI_DMA_NONE)
- out_of_line_bug();
-
- /*
- * temporary 2.4 hack
- */
- for (i = 0; i < nents; i++ ) {
- struct scatterlist *s = &sg[i];
- void *addr = s->address;
- if (addr) {
- if (s->page || s->offset)
- out_of_line_bug();
- } else if (s->page)
- addr = page_address(s->page) + s->offset;
-#if 0
- /* Invalid check, since address==0 is valid. */
- else
- BUG();
-#endif
- s->dma_address = pci_map_single(hwdev, addr, s->length, direction);
- if (unlikely(s->dma_address == BAD_DMA_ADDRESS))
- goto error;
- }
- return nents;
-
- error:
- pci_unmap_sg(hwdev, sg, i, direction);
- return 0;
-}
-
-
-/* Make physical memory consistent for a single
- * streaming mode DMA translation after a transfer.
- *
- * If you perform a pci_map_single() but wish to interrogate the
- * buffer using the cpu, yet do not wish to teardown the PCI dma
- * mapping, you must call this function before doing so. At the
- * next point you give the PCI dma address back to the card, the
- * device again owns the buffer.
- */
-static inline void pci_dma_sync_single(struct pci_dev *hwdev,
- dma_addr_t dma_handle,
- size_t size, int direction)
-{
- if (direction == PCI_DMA_NONE)
- out_of_line_bug();
- flush_write_buffers();
-}
-
-/* Make physical memory consistent for a set of streaming
- * mode DMA translations after a transfer.
- *
- * The same as pci_dma_sync_single but for a scatter-gather list,
- * same rules and usage.
- */
-static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
- struct scatterlist *sg,
- int nelems, int direction)
-{
- if (direction == PCI_DMA_NONE)
- out_of_line_bug();
- flush_write_buffers();
-}
-
-#define PCI_DMA_BUS_IS_PHYS 1
-
-#endif
-
-#define pci_unmap_page pci_unmap_single
-
-/* Return whether the given PCI device DMA address mask can
- * be supported properly. For example, if your device can
- * only drive the low 24-bits during PCI bus mastering, then
- * you would pass 0x00ffffff as the mask to this function.
- */
-static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
-{
- /*
- * we fall back to GFP_DMA when the mask isn't all 1s,
- * so we can't guarantee allocations that must be
- * within a tighter range than GFP_DMA..
- */
- if(mask < 0x00ffffff)
- return 0;
-
- return 1;
-}
-
-/* This is always fine. */
-#define pci_dac_dma_supported(pci_dev, mask) (1)
-
-static __inline__ dma64_addr_t
-pci_dac_page_to_dma(struct pci_dev *pdev, struct pfn_info *page, unsigned long offset, int direction)
-{
- return ((dma64_addr_t) page_to_bus(page) +
- (dma64_addr_t) offset);
-}
-
-static __inline__ struct pfn_info *
-pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
-{
- unsigned long poff = (dma_addr >> PAGE_SHIFT);
- return frame_table + poff;
-}
-
-static __inline__ unsigned long
-pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr)
-{
- return (dma_addr & ~PAGE_MASK);
-}
-
-static __inline__ void
-pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
-{
- flush_write_buffers();
-}
-
-/* These macros should be used after a pci_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns.
- */
-#define sg_dma_address(sg) ((sg)->dma_address)
-#define sg_dma_len(sg) ((sg)->length)
-
-/* Return the index of the PCI controller for device. */
-static inline int pci_controller_num(struct pci_dev *dev)
-{
- return 0;
-}
-
-#if 0 /* XXX Not in land of Xen XXX */
-#define HAVE_PCI_MMAP
-extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
- enum pci_mmap_state mmap_state, int write_combine);
-#endif
-
-
-#endif /* __x8664_PCI_H */
+++ /dev/null
-#ifndef X86_64_PDA_H
-#define X86_64_PDA_H
-
-#include <xen/cache.h>
-
-/* Per processor datastructure. %gs points to it while the kernel runs */
-/* To use a new field with the *_pda macros it needs to be added to tools/offset.c */
-struct x8664_pda {
- unsigned long kernelstack; /* TOS for current process */
- unsigned long oldrsp; /* user rsp for system call */
- unsigned long irqrsp; /* Old rsp for interrupts. */
- struct task_struct *pcurrent; /* Current process */
- int irqcount; /* Irq nesting counter. Starts with -1 */
- int cpunumber; /* Logical CPU number */
- /* XXX: could be a single list */
- unsigned long *pgd_quick;
- unsigned long *pmd_quick;
- unsigned long *pte_quick;
- unsigned long pgtable_cache_sz;
- char *irqstackptr; /* top of irqstack */
- unsigned long volatile *level4_pgt;
-} ____cacheline_aligned;
-
-#define PDA_STACKOFFSET (5*8)
-
-#define IRQSTACK_ORDER 2
-#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER)
-
-extern struct x8664_pda cpu_pda[];
-
-/*
- * There is no fast way to get the base address of the PDA, all the accesses
- * have to mention %fs/%gs. So it needs to be done this Torvaldian way.
- */
-#define sizeof_field(type,field) (sizeof(((type *)0)->field))
-#define typeof_field(type,field) typeof(((type *)0)->field)
-
-extern void __bad_pda_field(void);
-/* Don't use offsetof because it requires too much infrastructure */
-#define pda_offset(field) ((unsigned long)&((struct x8664_pda *)0)->field)
-
-#define pda_to_op(op,field,val) do { \
- switch (sizeof_field(struct x8664_pda, field)) { \
- case 2: asm volatile(op "w %0,%%gs:%P1" :: "r" (val), "i"(pda_offset(field)):"memory"); break; \
- case 4: asm volatile(op "l %0,%%gs:%P1" :: "r" (val), "i"(pda_offset(field)):"memory"); break; \
- case 8: asm volatile(op "q %0,%%gs:%P1" :: "r" (val), "i"(pda_offset(field)):"memory"); break; \
- default: __bad_pda_field(); \
- } \
- } while (0)
-
-
-#define pda_from_op(op,field) ({ \
- typedef typeof_field(struct x8664_pda, field) T__; T__ ret__; \
- switch (sizeof_field(struct x8664_pda, field)) { \
- case 2: asm volatile(op "w %%gs:%P1,%0":"=r" (ret__): "i" (pda_offset(field)):"memory"); break; \
- case 4: asm volatile(op "l %%gs:%P1,%0":"=r" (ret__): "i" (pda_offset(field)):"memory"); break; \
- case 8: asm volatile(op "q %%gs:%P1,%0":"=r" (ret__): "i" (pda_offset(field)):"memory"); break; \
- default: __bad_pda_field(); \
- } \
- ret__; })
-
-
-#define read_pda(field) pda_from_op("mov",field)
-#define write_pda(field,val) pda_to_op("mov",field,val)
-#define add_pda(field,val) pda_to_op("add",field,val)
-#define sub_pda(field,val) pda_to_op("sub",field,val)
-
-#endif
+++ /dev/null
-
-/*
- * pervasive debugger
- *
- * alex ho
- * 2004
- * university of cambridge computer laboratory
- */
-
-
-#ifndef __PDB_H__
-#define __PDB_H__
-
-#include <asm/ptrace.h>
-#include <xen/list.h>
-
-extern int pdb_initialized;
-extern int pdb_com_port;
-extern int pdb_high_bit;
-
-extern void initialize_pdb(void);
-
-/* Get/set values from generic debug interface. */
-extern int pdb_set_values(domid_t domain, u_char *buffer,
- unsigned long addr, int length);
-extern int pdb_get_values(domid_t domain, u_char *buffer,
- unsigned long addr, int length);
-
-/* External entry points. */
-extern int pdb_handle_exception(int exceptionVector,
- struct pt_regs *xen_regs);
-extern int pdb_serial_input(u_char c, struct pt_regs *regs);
-extern void pdb_do_debug(dom0_op_t *op);
-
-/* Breakpoints. */
-struct pdb_breakpoint
-{
- struct list_head list;
- unsigned long address;
-};
-extern void pdb_bkpt_add (unsigned long address);
-extern struct pdb_breakpoint* pdb_bkpt_search (unsigned long address);
-extern int pdb_bkpt_remove (unsigned long address);
-
-/* Conversions. */
-extern int hex (char);
-extern char *mem2hex (char *, char *, int);
-extern char *hex2mem (char *, char *, int);
-extern int hexToInt (char **ptr, int *intValue);
-
-#endif /* __PDB_H__ */
+++ /dev/null
-#ifndef _X86_64_PGALLOC_H
-#define _X86_64_PGALLOC_H
-
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <asm/processor.h>
-#include <asm/fixmap.h>
-
-/* XXX probably should be moved to flushtlb.h */
-
-/*
- * TLB flushing:
- *
- * - flush_tlb() flushes the current mm struct TLBs
- * - flush_tlb_all() flushes all processes TLBs
- * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
- */
-
-#ifndef CONFIG_SMP
-
-#define flush_tlb() __flush_tlb()
-#define flush_tlb_all() __flush_tlb()
-#define flush_tlb_all_pge() __flush_tlb_pge()
-#define local_flush_tlb() __flush_tlb()
-#define flush_tlb_cpu(_cpu) __flush_tlb()
-#define flush_tlb_mask(_mask) __flush_tlb()
-#define try_flush_tlb_mask(_mask) __flush_tlb()
-
-#else
-#include <xen/smp.h>
-
-extern int try_flush_tlb_mask(unsigned long mask);
-extern void flush_tlb_mask(unsigned long mask);
-extern void flush_tlb_all_pge(void);
-
-#define flush_tlb() __flush_tlb()
-#define flush_tlb_all() flush_tlb_mask((1 << smp_num_cpus) - 1)
-#define local_flush_tlb() __flush_tlb()
-#define flush_tlb_cpu(_cpu) flush_tlb_mask(1 << (_cpu))
-
-#endif
-
-#endif /* _X86_64_PGALLOC_H */
+++ /dev/null
-/*
- * include/asm-x86_64/processor.h
- *
- * Copyright (C) 1994 Linus Torvalds
- */
-
-#ifndef __ASM_X86_64_PROCESSOR_H
-#define __ASM_X86_64_PROCESSOR_H
-
-#include <asm/page.h>
-#include <asm/types.h>
-#include <asm/cpufeature.h>
-#include <asm/desc.h>
-#include <xen/config.h>
-#include <hypervisor-ifs/hypervisor-if.h>
-
-struct task_struct;
-
-#define TF_MASK 0x00000100
-#define IF_MASK 0x00000200
-#define IOPL_MASK 0x00003000
-#define NT_MASK 0x00004000
-#define VM_MASK 0x00020000
-#define AC_MASK 0x00040000
-#define VIF_MASK 0x00080000 /* virtual interrupt flag */
-#define VIP_MASK 0x00100000 /* virtual interrupt pending */
-#define ID_MASK 0x00200000
-
-/*
- * Default implementation of macro that returns current
- * instruction pointer ("program counter").
- */
-#define current_text_addr() ({ void *pc; asm volatile("leaq 1f(%%rip),%0\n1:":"=r"(pc)); pc; })
-
-/*
- * CPU type and hardware bug flags. Kept separately for each CPU.
- * Members of this structure are referenced in head.S, so think twice
- * before touching them. [mj]
- */
-
-struct cpuinfo_x86 {
- __u8 x86; /* CPU family */
- __u8 x86_vendor; /* CPU vendor */
- __u8 x86_model;
- __u8 x86_mask;
- int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */
- __u32 x86_capability[NCAPINTS];
- char x86_vendor_id[16];
- char x86_model_id[64];
- int x86_cache_size; /* in KB - valid for CPUS which support this
- call */
- int x86_clflush_size;
- int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/
- __u8 x86_virt_bits, x86_phys_bits;
- __u32 x86_power;
- unsigned long loops_per_jiffy;
-} ____cacheline_aligned;
-
-#define X86_VENDOR_INTEL 0
-#define X86_VENDOR_CYRIX 1
-#define X86_VENDOR_AMD 2
-#define X86_VENDOR_UMC 3
-#define X86_VENDOR_NEXGEN 4
-#define X86_VENDOR_CENTAUR 5
-#define X86_VENDOR_RISE 6
-#define X86_VENDOR_TRANSMETA 7
-#define X86_VENDOR_UNKNOWN 0xff
-
-/*
- * capabilities of CPUs
- */
-
-extern struct cpuinfo_x86 boot_cpu_data;
-extern struct tss_struct init_tss[NR_CPUS];
-
-#ifdef CONFIG_SMP
-extern struct cpuinfo_x86 cpu_data[];
-#define current_cpu_data cpu_data[smp_processor_id()]
-#else
-#define cpu_data (&boot_cpu_data)
-#define current_cpu_data boot_cpu_data
-#endif
-
-#define cpu_has_pge 1
-#define cpu_has_pse 1
-#define cpu_has_pae 1
-#define cpu_has_tsc 1
-#define cpu_has_de 1
-#define cpu_has_vme 1
-#define cpu_has_fxsr 1
-#define cpu_has_xmm 1
-#define cpu_has_apic (test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability))
-
-extern char ignore_irq13;
-
-extern void identify_cpu(struct cpuinfo_x86 *);
-extern void print_cpu_info(struct cpuinfo_x86 *);
-extern void dodgy_tsc(void);
-
-/*
- * EFLAGS bits
- */
-#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
-#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
-#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */
-#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
-#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
-#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
-#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
-#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
-#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
-#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
-#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
-#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
-#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
-#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
-#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
-#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
-#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
-
-/*
- * Generic CPUID function
- * FIXME: This really belongs to msr.h
- */
-extern inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
-{
- __asm__("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (op));
-}
-
-/*
- * CPUID functions returning a single datum
- */
-extern inline unsigned int cpuid_eax(unsigned int op)
-{
- unsigned int eax;
-
- __asm__("cpuid"
- : "=a" (eax)
- : "0" (op)
- : "bx", "cx", "dx");
- return eax;
-}
-extern inline unsigned int cpuid_ebx(unsigned int op)
-{
- unsigned int eax, ebx;
-
- __asm__("cpuid"
- : "=a" (eax), "=b" (ebx)
- : "0" (op)
- : "cx", "dx" );
- return ebx;
-}
-extern inline unsigned int cpuid_ecx(unsigned int op)
-{
- unsigned int eax, ecx;
-
- __asm__("cpuid"
- : "=a" (eax), "=c" (ecx)
- : "0" (op)
- : "bx", "dx" );
- return ecx;
-}
-extern inline unsigned int cpuid_edx(unsigned int op)
-{
- unsigned int eax, edx;
-
- __asm__("cpuid"
- : "=a" (eax), "=d" (edx)
- : "0" (op)
- : "bx", "cx");
- return edx;
-}
-
-
-/*
- * Intel CPU flags in CR0
- */
-#define X86_CR0_PE 0x00000001 /* Enable Protected Mode (RW) */
-#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor (RW) */
-#define X86_CR0_EM 0x00000004 /* Require FPU Emulation (RO) */
-#define X86_CR0_TS 0x00000008 /* Task Switched (RW) */
-#define X86_CR0_NE 0x00000020 /* Numeric Error Reporting (RW) */
-#define X86_CR0_WP 0x00010000 /* Supervisor Write Protect (RW) */
-#define X86_CR0_AM 0x00040000 /* Alignment Checking (RW) */
-#define X86_CR0_NW 0x20000000 /* Not Write-Through (RW) */
-#define X86_CR0_CD 0x40000000 /* Cache Disable (RW) */
-#define X86_CR0_PG 0x80000000 /* Paging (RW) */
-
-#define read_cr0() ({ \
- unsigned long __dummy; \
- __asm__( \
- "movq %%cr0,%0\n\t" \
- :"=r" (__dummy)); \
- __dummy; \
-})
-
-#define write_cr0(x) \
- __asm__("movq %0,%%cr0": :"r" (x));
-
-
-
-/*
- * Intel CPU features in CR4
- */
-#define X86_CR4_VME 0x0001 /* enable vm86 extensions */
-#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
-#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
-#define X86_CR4_DE 0x0008 /* enable debugging extensions */
-#define X86_CR4_PSE 0x0010 /* enable page size extensions */
-#define X86_CR4_PAE 0x0020 /* enable physical address extensions */
-#define X86_CR4_MCE 0x0040 /* Machine check enable */
-#define X86_CR4_PGE 0x0080 /* enable global pages */
-#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
-#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */
-#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */
-
-/*
- * Save the cr4 feature set we're using (ie
- * Pentium 4MB enable and PPro Global page
- * enable), so that any CPU's that boot up
- * after us can get the correct flags.
- */
-extern unsigned long mmu_cr4_features;
-
-static inline void set_in_cr4 (unsigned long mask)
-{
- mmu_cr4_features |= mask;
- __asm__("movq %%cr4,%%rax\n\t"
- "orq %0,%%rax\n\t"
- "movq %%rax,%%cr4\n"
- : : "irg" (mask)
- :"ax");
-}
-
-static inline void clear_in_cr4 (unsigned long mask)
-{
- mmu_cr4_features &= ~mask;
- __asm__("movq %%cr4,%%rax\n\t"
- "andq %0,%%rax\n\t"
- "movq %%rax,%%cr4\n"
- : : "irg" (~mask)
- :"ax");
-}
-
-/*
- * Cyrix CPU configuration register indexes
- */
-#define CX86_CCR0 0xc0
-#define CX86_CCR1 0xc1
-#define CX86_CCR2 0xc2
-#define CX86_CCR3 0xc3
-#define CX86_CCR4 0xe8
-#define CX86_CCR5 0xe9
-#define CX86_CCR6 0xea
-#define CX86_CCR7 0xeb
-#define CX86_DIR0 0xfe
-#define CX86_DIR1 0xff
-#define CX86_ARR_BASE 0xc4
-#define CX86_RCR_BASE 0xdc
-
-/*
- * Cyrix CPU indexed register access macros
- */
-
-#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); })
-
-#define setCx86(reg, data) do { \
- outb((reg), 0x22); \
- outb((data), 0x23); \
-} while (0)
-
-/*
- * Bus types
- */
-#define EISA_bus 0
-#define MCA_bus 0
-#define MCA_bus__is_a_macro
-
-
-/*
- * User space process size: 512GB - 1GB (default).
- */
-#define TASK_SIZE (0x0000007fc0000000)
-
-/* This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
- */
-#define TASK_UNMAPPED_32 0xa0000000
-#define TASK_UNMAPPED_64 (TASK_SIZE/3)
-#define TASK_UNMAPPED_BASE \
- ((current->thread.flags & THREAD_IA32) ? TASK_UNMAPPED_32 : TASK_UNMAPPED_64)
-
-/*
- * Size of io_bitmap in longwords: 32 is ports 0-0x3ff.
- */
-#define IO_BITMAP_SIZE 32
-#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
-#define INVALID_IO_BITMAP_OFFSET 0x8000
-
-struct i387_fxsave_struct {
- u16 cwd;
- u16 swd;
- u16 twd;
- u16 fop;
- u64 rip;
- u64 rdp;
- u32 mxcsr;
- u32 mxcsr_mask;
- u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
- u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 128 bytes */
- u32 padding[24];
-} __attribute__ ((aligned (16)));
-
-union i387_union {
- struct i387_fxsave_struct fxsave;
-};
-
-typedef struct {
- unsigned long seg;
-} mm_segment_t;
-
-struct tss_struct {
- unsigned short back_link,__blh;
-/* u32 reserved1; */
- u64 rsp0;
- u64 rsp1;
- u64 rsp2;
- u64 reserved2;
- u64 ist[7];
- u32 reserved3;
- u32 reserved4;
- u16 reserved5;
- u16 io_map_base;
- u32 io_bitmap[IO_BITMAP_SIZE];
-} __attribute__((packed)) ____cacheline_aligned;
-
-struct thread_struct {
- unsigned long guestos_sp;
- unsigned long guestos_ss;
- unsigned long rip;
- unsigned long rsp;
- unsigned long userrsp; /* Copy from PDA */
- unsigned long fs;
- unsigned long gs;
- unsigned short es, ds, fsindex, gsindex;
- enum {
- THREAD_IA32 = 0x0001,
- } flags;
-/* Hardware debugging registers */
- unsigned long debugreg[8]; /* %%db0-7 debug registers */
-/* floating point info */
- union i387_union i387;
-/* Trap info. */
- trap_info_t traps[256];
-};
-
-#define IDT_ENTRIES 256
-extern struct gate_struct idt_table[];
-extern struct gate_struct *idt_tables[];
-
-#define INIT_THREAD { \
- 0, 0, \
- 0, 0, 0, 0, \
- 0, 0, 0, 0, \
- 0, /* flags */ \
- { [0 ... 7] = 0 }, /* debugging registers */ \
- { { 0, }, }, /* 387 state */ \
- { {0} } /* io permissions */ \
-}
-
-#define INIT_TSS { \
- 0,0, /* back_link, __blh */ \
- 0, /* rsp0 */ \
- 0, 0, /* rsp1, rsp2 */ \
- 0, /* reserved */ \
- { [0 ... 6] = 0 }, /* ist[] */ \
- 0,0, /* reserved */ \
- 0, INVALID_IO_BITMAP_OFFSET, /* trace, bitmap */ \
- {~0, } /* ioperm */ \
-}
-
-struct mm_struct {
- /*
- * Every domain has a L1 pagetable of its own. Per-domain mappings
- * are put in this table (eg. the current GDT is mapped here).
- */
- l1_pgentry_t *perdomain_pt;
- pagetable_t pagetable;
- /* Current LDT details. */
- unsigned long ldt_base, ldt_ents, shadow_ldt_mapcnt;
- /* Next entry is passed to LGDT on domain switch. */
- char gdt[10];
-};
-
-#define IDLE0_MM \
-{ \
- perdomain_pt: 0, \
- pagetable: mk_pagetable(__pa(idle_pg_table)) \
-}
-
-/* Convenient accessor for mm.gdt. */
-#define SET_GDT_ENTRIES(_p, _e) ((*(u16 *)((_p)->mm.gdt + 0)) = (_e))
-#define SET_GDT_ADDRESS(_p, _a) ((*(u64 *)((_p)->mm.gdt + 2)) = (_a))
-#define GET_GDT_ENTRIES(_p) ((*(u16 *)((_p)->mm.gdt + 0)))
-#define GET_GDT_ADDRESS(_p) ((*(u64 *)((_p)->mm.gdt + 2)))
-
-long set_gdt(struct task_struct *p,
- unsigned long *frames,
- unsigned int entries);
-
-long set_debugreg(struct task_struct *p, int reg, unsigned long value);
-
-struct microcode {
- unsigned int hdrver;
- unsigned int rev;
- unsigned int date;
- unsigned int sig;
- unsigned int cksum;
- unsigned int ldrver;
- unsigned int pf;
- unsigned int reserved[5];
- unsigned int bits[500];
-};
-
-/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */
-#define MICROCODE_IOCFREE _IO('6',0)
-
-/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static inline void rep_nop(void)
-{
- __asm__ __volatile__("rep;nop");
-}
-
-#define cpu_relax() rep_nop()
-
-#define init_task (init_task_union.task)
-#define init_stack (init_task_union.stack)
-
-/* Avoid speculative execution by the CPU */
-extern inline void sync_core(void)
-{
- int tmp;
- asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory");
-}
-
-#define cpu_has_fpu 1
-
-#define ARCH_HAS_PREFETCH
-#define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
-
-#define prefetch(x) __builtin_prefetch((x),0)
-#define prefetchw(x) __builtin_prefetch((x),1)
-#define spin_lock_prefetch(x) prefetchw(x)
-#define cpu_relax() rep_nop()
-
-
-#endif /* __ASM_X86_64_PROCESSOR_H */
+++ /dev/null
-#ifndef _X86_64_PTRACE_H
-#define _X86_64_PTRACE_H
-
-#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
-#define R15 0
-#define R14 8
-#define R13 16
-#define R12 24
-#define RBP 36
-#define RBX 40
-/* arguments: interrupts/non tracing syscalls only save upto here*/
-#define R11 48
-#define R10 56
-#define R9 64
-#define R8 72
-#define RAX 80
-#define RCX 88
-#define RDX 96
-#define RSI 104
-#define RDI 112
-#define ORIG_RAX 120 /* = ERROR */
-/* end of arguments */
-/* cpu exception frame or undefined in case of fast syscall. */
-#define RIP 128
-#define CS 136
-#define EFLAGS 144
-#define RSP 152
-#define SS 160
-#define ARGOFFSET R11
-#endif /* __ASSEMBLY__ */
-
-/* top of stack page */
-#define FRAME_SIZE 168
-
-#define PTRACE_SETOPTIONS 21
-
-/* options set using PTRACE_SETOPTIONS */
-#define PTRACE_O_TRACESYSGOOD 0x00000001
-
-/* Dummy values for ptrace */
-#define FS 1000
-#define GS 1008
-
-#ifndef __ASSEMBLY__
-
-struct pt_regs {
- unsigned long r15;
- unsigned long r14;
- unsigned long r13;
- unsigned long r12;
- unsigned long rbp;
- unsigned long rbx;
-/* arguments: non interrupts/non tracing syscalls only save upto here*/
- unsigned long r11;
- unsigned long r10;
- unsigned long r9;
- unsigned long r8;
- unsigned long rax;
- unsigned long rcx;
- unsigned long rdx;
- unsigned long rsi;
- unsigned long rdi;
- unsigned long orig_rax;
-/* end of arguments */
-/* cpu exception frame or undefined */
- unsigned long rip;
- unsigned long cs;
- unsigned long eflags;
- unsigned long rsp;
- unsigned long ss;
-/* top of stack page */
-};
-
-#endif
-
-/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
-#define PTRACE_GETREGS 12
-#define PTRACE_SETREGS 13
-#define PTRACE_GETFPREGS 14
-#define PTRACE_SETFPREGS 15
-#define PTRACE_GETFPXREGS 18
-#define PTRACE_SETFPXREGS 19
-
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
-#define user_mode(regs) (!!((regs)->cs & 3))
-#define instruction_pointer(regs) ((regs)->rip)
-extern void show_regs(struct pt_regs *);
-
-enum {
- EF_CF = 0x00000001,
- EF_PF = 0x00000004,
- EF_AF = 0x00000010,
- EF_ZF = 0x00000040,
- EF_SF = 0x00000080,
- EF_TF = 0x00000100,
- EF_IE = 0x00000200,
- EF_DF = 0x00000400,
- EF_OF = 0x00000800,
- EF_IOPL = 0x00003000,
- EF_IOPL_RING0 = 0x00000000,
- EF_IOPL_RING1 = 0x00001000,
- EF_IOPL_RING2 = 0x00002000,
- EF_NT = 0x00004000, /* nested task */
- EF_RF = 0x00010000, /* resume */
- EF_VM = 0x00020000, /* virtual mode */
- EF_AC = 0x00040000, /* alignment */
- EF_VIF = 0x00080000, /* virtual interrupt */
- EF_VIP = 0x00100000, /* virtual interrupt pending */
- EF_ID = 0x00200000, /* id */
-};
-
-#endif
-
-#endif
+++ /dev/null
-/* include/asm-x86_64/rwlock.h
- *
- * Helpers used by both rw spinlocks and rw semaphores.
- *
- * Based in part on code from semaphore.h and
- * spinlock.h Copyright 1996 Linus Torvalds.
- *
- * Copyright 1999 Red Hat, Inc.
- * Copyright 2001,2002 SuSE labs
- *
- * Written by Benjamin LaHaise.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#ifndef _ASM_X86_64_RWLOCK_H
-#define _ASM_X86_64_RWLOCK_H
-
-#define RW_LOCK_BIAS 0x01000000
-#define RW_LOCK_BIAS_STR "0x01000000"
-
-#define __build_read_lock_ptr(rw, helper) \
- asm volatile(LOCK "subl $1,(%0)\n\t" \
- "js 2f\n" \
- "1:\n" \
- ".section .text.lock,\"ax\"\n" \
- "2:\tcall " helper "\n\t" \
- "jmp 1b\n" \
- ".previous" \
- ::"a" (rw) : "memory")
-
-#define __build_read_lock_const(rw, helper) \
- asm volatile(LOCK "subl $1,%0\n\t" \
- "js 2f\n" \
- "1:\n" \
- ".section .text.lock,\"ax\"\n" \
- "2:\tpushq %%rax\n\t" \
- "leaq %0,%%rax\n\t" \
- "call " helper "\n\t" \
- "popq %%rax\n\t" \
- "jmp 1b\n" \
- ".previous" \
- :"=m" (*(volatile int *)rw) : : "memory")
-
-#define __build_read_lock(rw, helper) do { \
- if (__builtin_constant_p(rw)) \
- __build_read_lock_const(rw, helper); \
- else \
- __build_read_lock_ptr(rw, helper); \
- } while (0)
-
-#define __build_write_lock_ptr(rw, helper) \
- asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
- "jnz 2f\n" \
- "1:\n" \
- ".section .text.lock,\"ax\"\n" \
- "2:\tcall " helper "\n\t" \
- "jmp 1b\n" \
- ".previous" \
- ::"a" (rw) : "memory")
-
-#define __build_write_lock_const(rw, helper) \
- asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
- "jnz 2f\n" \
- "1:\n" \
- ".section .text.lock,\"ax\"\n" \
- "2:\tpushq %%rax\n\t" \
- "leaq %0,%%rax\n\t" \
- "call " helper "\n\t" \
- "popq %%rax\n\t" \
- "jmp 1b\n" \
- ".previous" \
- :"=m" (*(volatile int *)rw) : : "memory")
-
-#define __build_write_lock(rw, helper) do { \
- if (__builtin_constant_p(rw)) \
- __build_write_lock_const(rw, helper); \
- else \
- __build_write_lock_ptr(rw, helper); \
- } while (0)
-
-#endif
+++ /dev/null
-#ifndef _X8664_SCATTERLIST_H
-#define _X8664_SCATTERLIST_H
-
-struct scatterlist {
- char * address; /* Location data is to be transferred to, NULL for
- * highmem page */
- struct pfn_info * page; /* Location for highmem page, if any */
- unsigned int offset;/* for highmem, page offset */
-
- dma_addr_t dma_address;
- unsigned int length;
-};
-
-#define ISA_DMA_THRESHOLD (0x00ffffff)
-
-#endif /* !(_I386_SCATTERLIST_H) */
+++ /dev/null
-#ifndef __ASM_SMP_H
-#define __ASM_SMP_H
-
-#include <xen/config.h>
-#include <asm/ptrace.h>
-
-#ifdef CONFIG_SMP
-#ifndef ASSEMBLY
-#include <asm/pda.h>
-
-/*
- * Private routines/data
- */
-
-extern void smp_alloc_memory(void);
-extern unsigned long phys_cpu_present_map;
-extern unsigned long cpu_online_map;
-extern volatile unsigned long smp_invalidate_needed;
-extern int pic_mode;
-extern void smp_flush_tlb(void);
-extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
-extern void smp_invalidate_rcv(void); /* Process an NMI */
-extern void (*mtrr_hook) (void);
-extern void zap_low_mappings (void);
-
-/*
- * On x86 all CPUs are mapped 1:1 to the APIC space.
- * This simplifies scheduling and IPI sending and
- * compresses data structures.
- */
-static inline int cpu_logical_map(int cpu)
-{
- return cpu;
-}
-static inline int cpu_number_map(int cpu)
-{
- return cpu;
-}
-
-/*
- * Some lowlevel functions might want to know about
- * the real APIC ID <-> CPU # mapping.
- */
-#define MAX_APICID 256
-extern volatile int cpu_to_physical_apicid[NR_CPUS];
-extern volatile int physical_apicid_to_cpu[MAX_APICID];
-extern volatile int cpu_to_logical_apicid[NR_CPUS];
-extern volatile int logical_apicid_to_cpu[MAX_APICID];
-
-/*
- * General functions that each host system must provide.
- */
-
-extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial udelay numbers */
-
-/*
- * This function is needed by all SMP systems. It must _always_ be valid
- * from the initial startup. We map APIC_BASE very early in page_setup(),
- * so this is correct in the x86 case.
- */
-
-#define smp_processor_id() read_pda(cpunumber)
-
-#include <asm/fixmap.h>
-#include <asm/apic.h>
-
-static __inline int hard_smp_processor_id(void)
-{
- /* we don't want to mark this access volatile - bad code generation */
- return GET_APIC_ID(*(unsigned *)(APIC_BASE+APIC_ID));
-}
-
-extern int apic_disabled;
-extern int slow_smp_processor_id(void);
-#define safe_smp_processor_id() \
- (!apic_disabled ? hard_smp_processor_id() : slow_smp_processor_id())
-
-#endif /* !ASSEMBLY */
-
-#define NO_PROC_ID 0xFF /* No processor magic marker */
-
-/*
- * This magic constant controls our willingness to transfer
- * a process across CPUs. Such a transfer incurs misses on the L1
- * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My
- * gut feeling is this will vary by board in value. For a board
- * with separate L2 cache it probably depends also on the RSS, and
- * for a board with shared L2 cache it ought to decay fast as other
- * processes are run.
- */
-
-#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */
-
-
-
-#endif
-#define INT_DELIVERY_MODE 1 /* logical delivery */
-#define TARGET_CPUS 1
-
-#ifndef CONFIG_SMP
-#define safe_smp_processor_id() 0
-#endif
-#endif
+++ /dev/null
-#ifndef __ASM_SMPBOOT_H
-#define __ASM_SMPBOOT_H
-
-/*emum for clustered_apic_mode values*/
-enum{
- CLUSTERED_APIC_NONE = 0,
- CLUSTERED_APIC_XAPIC,
- CLUSTERED_APIC_NUMAQ
-};
-
-#ifdef CONFIG_X86_CLUSTERED_APIC
-extern unsigned int apic_broadcast_id;
-extern unsigned char clustered_apic_mode;
-extern unsigned char esr_disable;
-extern unsigned char int_delivery_mode;
-extern unsigned int int_dest_addr_mode;
-extern int cyclone_setup(char*);
-
-static inline void detect_clustered_apic(char* oem, char* prod)
-{
- /*
- * Can't recognize Summit xAPICs at present, so use the OEM ID.
- */
- if (!strncmp(oem, "IBM ENSW", 8) && !strncmp(prod, "VIGIL SMP", 9)){
- clustered_apic_mode = CLUSTERED_APIC_XAPIC;
- apic_broadcast_id = APIC_BROADCAST_ID_XAPIC;
- int_dest_addr_mode = APIC_DEST_PHYSICAL;
- int_delivery_mode = dest_Fixed;
- esr_disable = 1;
- /*Start cyclone clock*/
- cyclone_setup(0);
- }
- else if (!strncmp(oem, "IBM ENSW", 8) && !strncmp(prod, "RUTHLESS SMP", 9)){
- clustered_apic_mode = CLUSTERED_APIC_XAPIC;
- apic_broadcast_id = APIC_BROADCAST_ID_XAPIC;
- int_dest_addr_mode = APIC_DEST_PHYSICAL;
- int_delivery_mode = dest_Fixed;
- esr_disable = 1;
- /*Start cyclone clock*/
- cyclone_setup(0);
- }
- else if (!strncmp(oem, "IBM NUMA", 8)){
- clustered_apic_mode = CLUSTERED_APIC_NUMAQ;
- apic_broadcast_id = APIC_BROADCAST_ID_APIC;
- int_dest_addr_mode = APIC_DEST_LOGICAL;
- int_delivery_mode = dest_LowestPrio;
- esr_disable = 1;
- }
-}
-#define INT_DEST_ADDR_MODE (int_dest_addr_mode)
-#define INT_DELIVERY_MODE (int_delivery_mode)
-#else /* CONFIG_X86_CLUSTERED_APIC */
-#define apic_broadcast_id (APIC_BROADCAST_ID_APIC)
-#define clustered_apic_mode (CLUSTERED_APIC_NONE)
-#define esr_disable (0)
-#define detect_clustered_apic(x,y)
-#define INT_DEST_ADDR_MODE (APIC_DEST_LOGICAL) /* logical delivery */
-#define INT_DELIVERY_MODE (dest_LowestPrio)
-#endif /* CONFIG_X86_CLUSTERED_APIC */
-#define BAD_APICID 0xFFu
-
-#define TRAMPOLINE_LOW phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0x8:0x467)
-#define TRAMPOLINE_HIGH phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0xa:0x469)
-
-#define boot_cpu_apicid ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?boot_cpu_logical_apicid:boot_cpu_physical_apicid)
-
-extern unsigned char raw_phys_apicid[NR_CPUS];
-
-/*
- * How to map from the cpu_present_map
- */
-static inline int cpu_present_to_apicid(int mps_cpu)
-{
- if (clustered_apic_mode == CLUSTERED_APIC_XAPIC)
- return raw_phys_apicid[mps_cpu];
- if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
- return (mps_cpu/4)*16 + (1<<(mps_cpu%4));
- return mps_cpu;
-}
-
-static inline unsigned long apicid_to_phys_cpu_present(int apicid)
-{
- if(clustered_apic_mode)
- return 1UL << (((apicid >> 4) << 2) + (apicid & 0x3));
- return 1UL << apicid;
-}
-
-#define physical_to_logical_apicid(phys_apic) ( (1ul << (phys_apic & 0x3)) | (phys_apic & 0xF0u) )
-
-/*
- * Mappings between logical cpu number and logical / physical apicid
- * The first four macros are trivial, but it keeps the abstraction consistent
- */
-extern volatile int logical_apicid_2_cpu[];
-extern volatile int cpu_2_logical_apicid[];
-extern volatile int physical_apicid_2_cpu[];
-extern volatile int cpu_2_physical_apicid[];
-
-#define logical_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid]
-#define cpu_to_logical_apicid(cpu) cpu_2_logical_apicid[cpu]
-#define physical_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
-#define cpu_to_physical_apicid(cpu) cpu_2_physical_apicid[cpu]
-#ifdef CONFIG_MULTIQUAD /* use logical IDs to bootstrap */
-#define boot_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid]
-#define cpu_to_boot_apicid(cpu) cpu_2_logical_apicid[cpu]
-#else /* !CONFIG_MULTIQUAD */ /* use physical IDs to bootstrap */
-#define boot_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
-#define cpu_to_boot_apicid(cpu) cpu_2_physical_apicid[cpu]
-#endif /* CONFIG_MULTIQUAD */
-
-#ifdef CONFIG_X86_CLUSTERED_APIC
-static inline int target_cpus(void)
-{
- static int cpu;
- switch(clustered_apic_mode){
- case CLUSTERED_APIC_NUMAQ:
- /* Broadcast intrs to local quad only. */
- return APIC_BROADCAST_ID_APIC;
- case CLUSTERED_APIC_XAPIC:
- /*round robin the interrupts*/
- cpu = (cpu+1)%smp_num_cpus;
- return cpu_to_physical_apicid(cpu);
- default:
- }
- return cpu_online_map;
-}
-#else
-#define target_cpus() (0xFF)
-#endif
-#endif
+++ /dev/null
-#ifndef __ASM_SOFTIRQ_H
-#define __ASM_SOFTIRQ_H
-
-#include <asm/atomic.h>
-#include <asm/hardirq.h>
-
-#define cpu_bh_enable(cpu) \
- do { barrier(); local_bh_count(cpu)--; } while (0)
-#define cpu_bh_disable(cpu) \
- do { local_bh_count(cpu)++; barrier(); } while (0)
-
-#define local_bh_disable() cpu_bh_disable(smp_processor_id())
-#define local_bh_enable() cpu_bh_enable(smp_processor_id())
-
-#define in_softirq() (local_bh_count(smp_processor_id()) != 0)
-
-#endif /* __ASM_SOFTIRQ_H */
+++ /dev/null
-#ifndef __ASM_SPINLOCK_H
-#define __ASM_SPINLOCK_H
-
-#include <xen/config.h>
-#include <xen/lib.h>
-#include <asm/atomic.h>
-#include <asm/rwlock.h>
-
-#if 0
-#define SPINLOCK_DEBUG 1
-#else
-#define SPINLOCK_DEBUG 0
-#endif
-
-/*
- * Your basic SMP spinlocks, allowing only a single CPU anywhere
- */
-
-typedef struct {
- volatile unsigned int lock;
-#if SPINLOCK_DEBUG
- unsigned magic;
-#endif
-} spinlock_t;
-
-#define SPINLOCK_MAGIC 0xdead4ead
-
-#if SPINLOCK_DEBUG
-#define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC
-#else
-#define SPINLOCK_MAGIC_INIT /* */
-#endif
-
-#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT }
-
-#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
-
-/*
- * Simple spin lock operations. There are two variants, one clears IRQ's
- * on the local processor, one does not.
- *
- * We make no fairness assumptions. They have a cost.
- */
-
-#define spin_is_locked(x) (*(volatile char *)(&(x)->lock) <= 0)
-#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x))
-
-#define spin_lock_string \
- "\n1:\t" \
- "lock ; decb %0\n\t" \
- "js 2f\n" \
- ".section .text.lock,\"ax\"\n" \
- "2:\t" \
- "cmpb $0,%0\n\t" \
- "rep;nop\n\t" \
- "jle 2b\n\t" \
- "jmp 1b\n" \
- ".previous"
-
-/*
- * This works. Despite all the confusion.
- */
-#define spin_unlock_string \
- "movb $1,%0"
-
-static inline int spin_trylock(spinlock_t *lock)
-{
- char oldval;
- __asm__ __volatile__(
- "xchgb %b0,%1"
- :"=q" (oldval), "=m" (lock->lock)
- :"0" (0) : "memory");
- return oldval > 0;
-}
-
-static inline void spin_lock(spinlock_t *lock)
-{
-#if SPINLOCK_DEBUG
- __label__ here;
-here:
- if (lock->magic != SPINLOCK_MAGIC) {
-printk("eip: %p\n", &&here);
- BUG();
- }
-#endif
- __asm__ __volatile__(
- spin_lock_string
- :"=m" (lock->lock) : : "memory");
-}
-
-static inline void spin_unlock(spinlock_t *lock)
-{
-#if SPINLOCK_DEBUG
- if (lock->magic != SPINLOCK_MAGIC)
- BUG();
- if (!spin_is_locked(lock))
- BUG();
-#endif
- __asm__ __volatile__(
- spin_unlock_string
- :"=m" (lock->lock) : : "memory");
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- */
-typedef struct {
- volatile unsigned int lock;
-#if SPINLOCK_DEBUG
- unsigned magic;
-#endif
-} rwlock_t;
-
-#define RWLOCK_MAGIC 0xdeaf1eed
-
-#if SPINLOCK_DEBUG
-#define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC
-#else
-#define RWLOCK_MAGIC_INIT /* */
-#endif
-
-#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
-
-#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
-
-/*
- * On x86, we implement read-write locks as a 32-bit counter
- * with the high bit (sign) being the "contended" bit.
- *
- * The inline assembly is non-obvious. Think about it.
- *
- * Changed to use the same technique as rw semaphores. See
- * semaphore.h for details. -ben
- */
-/* the spinlock helpers are in arch/x86_64/kernel/semaphore.S */
-
-static inline void read_lock(rwlock_t *rw)
-{
-#if SPINLOCK_DEBUG
- if (rw->magic != RWLOCK_MAGIC)
- BUG();
-#endif
- __build_read_lock(rw, "__read_lock_failed");
-}
-
-static inline void write_lock(rwlock_t *rw)
-{
-#if SPINLOCK_DEBUG
- if (rw->magic != RWLOCK_MAGIC)
- BUG();
-#endif
- __build_write_lock(rw, "__write_lock_failed");
-}
-
-#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
-#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
-
-static inline int write_trylock(rwlock_t *lock)
-{
- atomic_t *count = (atomic_t *)lock;
- if (atomic_sub_and_test(RW_LOCK_BIAS, count))
- return 1;
- atomic_add(RW_LOCK_BIAS, count);
- return 0;
-}
-
-#endif /* __ASM_SPINLOCK_H */
+++ /dev/null
-#ifndef _X86_64_STRING_H_
-#define _X86_64_STRING_H_
-
-#ifdef __KERNEL__
-
-/* Written 2002 by Andi Kleen */
-
-/* Only used for special circumstances. Stolen from i386/string.h */
-static inline void * __inline_memcpy(void * to, const void * from, size_t n)
-{
-unsigned long d0, d1, d2;
-__asm__ __volatile__(
- "rep ; movsl\n\t"
- "testb $2,%b4\n\t"
- "je 1f\n\t"
- "movsw\n"
- "1:\ttestb $1,%b4\n\t"
- "je 2f\n\t"
- "movsb\n"
- "2:"
- : "=&c" (d0), "=&D" (d1), "=&S" (d2)
- :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
- : "memory");
-return (to);
-}
-
-/* Even with __builtin_ the compiler may decide to use the out of line
- function. */
-
-#define __HAVE_ARCH_MEMCPY 1
-extern void *__memcpy(void *to, const void *from, size_t len);
-#define memcpy(dst,src,len) \
- ({ size_t __len = (len); \
- void *__ret; \
- if (__builtin_constant_p(len) && __len >= 64) \
- __ret = __memcpy((dst),(src),__len); \
- else \
- __ret = __builtin_memcpy((dst),(src),__len); \
- __ret; })
-
-
-#define __HAVE_ARCH_MEMSET
-#define memset __builtin_memset
-
-#define __HAVE_ARCH_MEMMOVE
-void * memmove(void * dest,const void *src,size_t count);
-
-#endif /* __KERNEL__ */
-
-#endif
+++ /dev/null
-#ifndef __ASM_SYSTEM_H
-#define __ASM_SYSTEM_H
-
-#include <xen/config.h>
-#include <asm/bitops.h>
-
-/* Clear and set 'TS' bit respectively */
-#define clts() __asm__ __volatile__ ("clts")
-#define stts() write_cr0(X86_CR0_TS|read_cr0())
-
-#define wbinvd() \
- __asm__ __volatile__ ("wbinvd": : :"memory");
-
-static inline unsigned long get_limit(unsigned long segment)
-{
- unsigned long __limit;
- __asm__("lsll %1,%0"
- :"=r" (__limit):"r" (segment));
- return __limit+1;
-}
-
-#define nop() __asm__ __volatile__ ("nop")
-
-#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
-
-#define __xg(x) ((volatile long *)(x))
-
-extern inline void set_64bit(volatile unsigned long *ptr, unsigned long val)
-{
- *ptr = val;
-}
-
-#define _set_64bit set_64bit
-
-/*
- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
- * Note 2: xchg has side effect, so that attribute volatile is necessary,
- * but generally the primitive is invalid, *ptr is output argument. --ANK
- */
-static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
-{
- switch (size) {
- case 1:
- __asm__ __volatile__("xchgb %b0,%1"
- :"=q" (x)
- :"m" (*__xg(ptr)), "0" (x)
- :"memory");
- break;
- case 2:
- __asm__ __volatile__("xchgw %w0,%1"
- :"=r" (x)
- :"m" (*__xg(ptr)), "0" (x)
- :"memory");
- break;
- case 4:
- __asm__ __volatile__("xchgl %k0,%1"
- :"=r" (x)
- :"m" (*__xg(ptr)), "0" (x)
- :"memory");
- break;
- case 8:
- __asm__ __volatile__("xchgq %0,%1"
- :"=r" (x)
- :"m" (*__xg(ptr)), "0" (x)
- :"memory");
- break;
- }
- return x;
-}
-
-/*
- * Atomic compare and exchange. Compare OLD with MEM, if identical,
- * store NEW in MEM. Return the initial value in MEM. Success is
- * indicated by comparing RETURN with OLD.
- */
-
-#define __HAVE_ARCH_CMPXCHG 1
-
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
- unsigned long new, int size)
-{
- unsigned long prev;
- switch (size) {
- case 1:
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
- : "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 2:
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
- : "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 4:
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %k1,%2"
- : "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 8:
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgq %1,%2"
- : "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- }
- return old;
-}
-
-#define cmpxchg(ptr,o,n)\
- ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
- (unsigned long)(n),sizeof(*(ptr))))
-
-
-/*
- * This function causes longword _o to be changed to _n at location _p.
- * If this access causes a fault then we return 1, otherwise we return 0.
- * If no fault occurs then _o is updated to teh value we saw at _p. If this
- * is the same as the initial value of _o then _n is written to location _p.
- */
-#define cmpxchg_user(_p,_o,_n) \
-({ \
- int _rc; \
- __asm__ __volatile__ ( \
- "1: " LOCK_PREFIX "cmpxchgq %2,%3\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl $1,%1\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 1b,3b\n" \
- ".previous" \
- : "=a" (_o), "=r" (_rc) \
- : "q" (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \
- : "memory"); \
- _rc; \
-})
-
-#ifdef CONFIG_SMP
-#define smp_mb() mb()
-#define smp_rmb() rmb()
-#define smp_wmb() wmb()
-#else
-#define smp_mb() barrier()
-#define smp_rmb() barrier()
-#define smp_wmb() barrier()
-#endif
-
-/*
- * Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
- * to devices.
- *
- * For now, "wmb()" doesn't actually do anything, as all
- * Intel CPU's follow what Intel calls a *Processor Order*,
- * in which all writes are seen in the program order even
- * outside the CPU.
- *
- * I expect future Intel CPU's to have a weaker ordering,
- * but I'd also expect them to finally get their act together
- * and add some real memory barriers if so.
- */
-#define mb() asm volatile("mfence":::"memory")
-#define rmb() asm volatile("lfence":::"memory")
-#define wmb() asm volatile("sfence":::"memory")
-#define set_mb(var, value) do { xchg(&var, value); } while (0)
-#define set_wmb(var, value) do { var = value; wmb(); } while (0)
-
-#define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0)
-
-/* interrupt control.. */
-#define __save_flags(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0)
-#define __restore_flags(x) __asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc")
-#define __cli() __asm__ __volatile__("cli": : :"memory")
-#define __sti() __asm__ __volatile__("sti": : :"memory")
-/* used in the idle loop; sti takes one instruction cycle to complete */
-#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory")
-
-/* For spinlocks etc */
-#define local_irq_save(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0)
-#define local_irq_set(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_set \n\t pushfq ; popq %0 ; sti":"=g" (x): /* no input */ :"memory"); } while (0)
-#define local_irq_restore(x) __asm__ __volatile__("# local_irq_restore \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory")
-#define local_irq_disable() __cli()
-#define local_irq_enable() __sti()
-
-#ifdef CONFIG_SMP
-
-extern void __global_cli(void);
-extern void __global_sti(void);
-extern unsigned long __global_save_flags(void);
-extern void __global_restore_flags(unsigned long);
-#define cli() __global_cli()
-#define sti() __global_sti()
-#define save_flags(x) ((x)=__global_save_flags())
-#define restore_flags(x) __global_restore_flags(x)
-
-#else
-
-#define cli() __cli()
-#define sti() __sti()
-#define save_flags(x) __save_flags(x)
-#define restore_flags(x) __restore_flags(x)
-
-#endif
-
-/* Default simics "magic" breakpoint */
-#define icebp() asm volatile("xchg %%bx,%%bx" ::: "ebx")
-
-/*
- * disable hlt during certain critical i/o operations
- */
-#define HAVE_DISABLE_HLT
-void disable_hlt(void);
-void enable_hlt(void);
-
-#endif
+++ /dev/null
-/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
- ****************************************************************************
- * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
- ****************************************************************************
- *
- * File: time.h
- * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
- *
- * Environment: Xen Hypervisor
- * Description: Architecture dependent definition of time variables
- */
-
-#ifndef _ASM_TIME_H_
-#define _ASM_TIME_H_
-
-#include <asm/types.h>
-#include <asm/msr.h>
-
-typedef s64 s_time_t; /* system time */
-
-extern int using_apic_timer;
-
-#endif /* _ASM_TIME_H_ */
+++ /dev/null
-/*
- * linux/include/asm-x8664/timex.h
- *
- * x8664 architecture timex specifications
- */
-#ifndef _ASMx8664_TIMEX_H
-#define _ASMx8664_TIMEX_H
-
-#include <xen/config.h>
-#include <asm/msr.h>
-
-#define CLOCK_TICK_RATE (vxtime_hz)
-#define FINETUNE ((((((long)LATCH * HZ - CLOCK_TICK_RATE) << SHIFT_HZ) * \
- 1000000 / CLOCK_TICK_RATE) << (SHIFT_SCALE - SHIFT_HZ)) / HZ)
-
-/*
- * We only use the low 32 bits, and we'd simply better make sure
- * that we reschedule before that wraps. Scheduling at least every
- * four billion cycles just basically sounds like a good idea,
- * regardless of how fast the machine is.
- */
-typedef unsigned long long cycles_t;
-
-extern cycles_t cacheflush_time;
-
-static inline cycles_t get_cycles (void)
-{
- unsigned long long ret;
- rdtscll(ret);
- return ret;
-}
-
-extern unsigned int cpu_khz;
-
-/*
- * Documentation on HPET can be found at:
- * http://www.intel.com/ial/home/sp/pcmmspec.htm
- * ftp://download.intel.com/ial/home/sp/mmts098.pdf
- */
-
-#define HPET_ID 0x000
-#define HPET_PERIOD 0x004
-#define HPET_CFG 0x010
-#define HPET_STATUS 0x020
-#define HPET_COUNTER 0x0f0
-#define HPET_T0_CFG 0x100
-#define HPET_T0_CMP 0x108
-#define HPET_T0_ROUTE 0x110
-
-#define HPET_ID_VENDOR 0xffff0000
-#define HPET_ID_LEGSUP 0x00008000
-#define HPET_ID_NUMBER 0x00000f00
-#define HPET_ID_REV 0x000000ff
-
-#define HPET_CFG_ENABLE 0x001
-#define HPET_CFG_LEGACY 0x002
-
-#define HPET_T0_ENABLE 0x004
-#define HPET_T0_PERIODIC 0x008
-#define HPET_T0_SETVAL 0x040
-#define HPET_T0_32BIT 0x100
-
-/*extern struct vxtime_data vxtime; */
-extern unsigned long vxtime_hz;
-extern unsigned long hpet_address;
-
-#endif
+++ /dev/null
-#ifndef _X86_64_TYPES_H
-#define _X86_64_TYPES_H
-
-typedef unsigned short umode_t;
-
-typedef unsigned long size_t;
-
-/*
- * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
- * header files exported to user space
- */
-
-typedef __signed__ char __s8;
-typedef unsigned char __u8;
-
-typedef __signed__ short __s16;
-typedef unsigned short __u16;
-
-typedef __signed__ int __s32;
-typedef unsigned int __u32;
-
-typedef __signed__ long long __s64;
-typedef unsigned long long __u64;
-
-#include <xen/config.h>
-
-typedef signed char s8;
-typedef unsigned char u8;
-
-typedef signed short s16;
-typedef unsigned short u16;
-
-typedef signed int s32;
-typedef unsigned int u32;
-
-typedef signed long long s64;
-typedef unsigned long long u64;
-
-#define BITS_PER_LONG 64
-
-typedef u64 dma64_addr_t;
-typedef u64 dma_addr_t;
-
-#endif
+++ /dev/null
-#ifndef __X86_64_UACCESS_H
-#define __X86_64_UACCESS_H
-
-/*
- * User space memory access functions
- */
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <xen/prefetch.h>
-#include <xen/errno.h>
-#include <asm/page.h>
-
-#define VERIFY_READ 0
-#define VERIFY_WRITE 1
-
-/*
- * The fs value determines whether argument validity checking should be
- * performed or not. If get_fs() == USER_DS, checking is performed, with
- * get_fs() == KERNEL_DS, checking is bypassed.
- *
- * For historical reasons, these macros are grossly misnamed.
- */
-
-#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
-
-#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFFFFFFFFFF)
-#define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
-
-#define get_ds() (KERNEL_DS)
-#define get_fs() (current->addr_limit)
-#define set_fs(x) (current->addr_limit = (x))
-
-#define segment_eq(a,b) ((a).seg == (b).seg)
-
-#define __addr_ok(addr) (!((unsigned long)(addr) & (current->addr_limit.seg)))
-
-/*
- * Uhhuh, this needs 65-bit arithmetic. We have a carry..
- */
-#define __range_not_ok(addr,size) ({ \
- unsigned long flag,sum; \
- asm("# range_ok\n\r" \
- "addq %3,%1 ; sbbq %0,%0 ; cmpq %1,%4 ; sbbq $0,%0" \
- :"=&r" (flag), "=r" (sum) \
- :"1" (addr),"g" ((long)(size)),"g" (current->addr_limit.seg)); \
- flag; })
-
-#define access_ok(type,addr,size) (__range_not_ok(addr,size) == 0)
-
-extern inline int verify_area(int type, const void * addr, unsigned long size)
-{
- return access_ok(type,addr,size) ? 0 : -EFAULT;
-}
-
-
-/*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue. No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
- *
- * All the routines below use bits of fixup code that are out of line
- * with the main instruction path. This means when everything is well,
- * we don't even have to jump over them. Further, they do not intrude
- * on our cache or tlb entries.
- */
-
-struct exception_table_entry
-{
- unsigned long insn, fixup;
-};
-
-
-/*
- * These are the main single-value transfer routines. They automatically
- * use the right size if we just have the right pointer type.
- *
- * This gets kind of ugly. We want to return _two_ values in "get_user()"
- * and yet we don't want to do any pointers, because that is too much
- * of a performance impact. Thus we have a few rather ugly macros here,
- * and hide all the ugliness from the user.
- *
- * The "__xxx" versions of the user access functions are versions that
- * do not verify the address space, that must have been done previously
- * with a separate "access_ok()" call (this is used when we do multiple
- * accesses to the same area of user memory).
- */
-
-extern void __get_user_1(void);
-extern void __get_user_2(void);
-extern void __get_user_4(void);
-extern void __get_user_8(void);
-
-#define __get_user_x(size,ret,x,ptr) \
- __asm__ __volatile__("call __get_user_" #size \
- :"=a" (ret),"=d" (x) \
- :"0" (ptr) \
- :"rbx")
-
-/* Careful: we have to cast the result to the type of the pointer for sign reasons */
-#define get_user(x,ptr) \
-({ long __val_gu; \
- int __ret_gu=1; \
- switch(sizeof (*(ptr))) { \
-+ case 1: __ret_gu=copy_from_user(&__val_gu,ptr,1);break; \
-+ case 2: __ret_gu=copy_from_user(&__val_gu,ptr,2);break; \
-+ case 4: __ret_gu=copy_from_user(&__val_gu,ptr,4);break; \
-+ case 8: __ret_gu=copy_from_user(&__val_gu,ptr,8);break; \
-+ default: __ret_gu=copy_from_user(&__val_gu,ptr,sizeof(*(ptr)));break;\
- /*case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break;*/ \
- /*case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break;*/ \
- /*case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break;*/ \
- /*case 8: __get_user_x(8,__ret_gu,__val_gu,ptr); break;*/ \
- /*default: __get_user_bad(); break;*/ \
- } \
- (x) = (__typeof__(*(ptr)))__val_gu; \
- __ret_gu; \
-})
-
-extern void __put_user_1(void);
-extern void __put_user_2(void);
-extern void __put_user_4(void);
-extern void __put_user_8(void);
-
-extern void __put_user_bad(void);
-
-#define __put_user_x(size,ret,x,ptr) \
- __asm__ __volatile__("call __put_user_" #size \
- :"=a" (ret) \
- :"0" (ptr),"d" (x) \
- :"rbx")
-
-#define put_user(x,ptr) \
- __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
-#define __get_user(x,ptr) \
- __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
-#define __put_user(x,ptr) \
- __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
-#define __put_user_nocheck(x,ptr,size) \
-({ \
- int __pu_err; \
- __put_user_size((x),(ptr),(size),__pu_err); \
- __pu_err; \
-})
-
-
-#define __put_user_check(x,ptr,size) \
-({ \
- int __pu_err = -EFAULT; \
- __typeof__(*(ptr)) *__pu_addr = (ptr); \
- if (access_ok(VERIFY_WRITE,__pu_addr,size)) \
- __put_user_size((x),__pu_addr,(size),__pu_err); \
- __pu_err; \
-})
-
-#define __put_user_size(x,ptr,size,retval) \
-do { \
- retval = 0; \
- switch (size) { \
- case 1: __put_user_asm(x,ptr,retval,"b","b","iq",-EFAULT); break;\
- case 2: __put_user_asm(x,ptr,retval,"w","w","ir",-EFAULT); break;\
- case 4: __put_user_asm(x,ptr,retval,"l","k","ir",-EFAULT); break;\
- case 8: __put_user_asm(x,ptr,retval,"q","","ir",-EFAULT); break;\
- default: __put_user_bad(); \
- } \
-} while (0)
-
-/* FIXME: this hack is definitely wrong -AK */
-struct __large_struct { unsigned long buf[100]; };
-#define __m(x) (*(struct __large_struct *)(x))
-
-/*
- * Tell gcc we read from memory instead of writing: this is because
- * we do not write to any memory gcc knows about, so there are no
- * aliasing issues.
- */
-#define __put_user_asm(x, addr, err, itype, rtype, ltype, errno) \
- __asm__ __volatile__( \
- "1: mov"itype" %"rtype"1,%2\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: mov %3,%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 8\n" \
- " .quad 1b,3b\n" \
- ".previous" \
- : "=r"(err) \
- : ltype (x), "m"(__m(addr)), "i"(errno), "0"(err))
-
-
-#define __get_user_nocheck(x,ptr,size) \
-({ \
- int __gu_err; \
- long __gu_val; \
- __get_user_size(__gu_val,(ptr),(size),__gu_err); \
- (x) = (__typeof__(*(ptr)))__gu_val; \
- __gu_err; \
-})
-
-extern int __get_user_bad(void);
-
-#define __get_user_size(x,ptr,size,retval) \
-do { \
- retval = 0; \
- switch (size) { \
- case 1: __get_user_asm(x,ptr,retval,"b","b","=q",-EFAULT); break;\
- case 2: __get_user_asm(x,ptr,retval,"w","w","=r",-EFAULT); break;\
- case 4: __get_user_asm(x,ptr,retval,"l","k","=r",-EFAULT); break;\
- case 8: __get_user_asm(x,ptr,retval,"q","","=r",-EFAULT); break;\
- default: (x) = __get_user_bad(); \
- } \
-} while (0)
-
-#define __get_user_asm(x, addr, err, itype, rtype, ltype, errno) \
- __asm__ __volatile__( \
- "1: mov"itype" %2,%"rtype"1\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: mov %3,%0\n" \
- " xor"itype" %"rtype"1,%"rtype"1\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 8\n" \
- " .quad 1b,3b\n" \
- ".previous" \
- : "=r"(err), ltype (x) \
- : "m"(__m(addr)), "i"(errno), "0"(err))
-
-/*
- * Copy To/From Userspace
- */
-
-/* Handles exceptions in both to and from, but doesn't do access_ok */
-extern unsigned long copy_user_generic(void *to, const void *from, unsigned len);
-
-extern unsigned long copy_to_user(void *to, const void *from, unsigned len);
-extern unsigned long copy_from_user(void *to, const void *from, unsigned len);
-
-static inline int __copy_from_user(void *dst, const void *src, unsigned size)
-{
- if (!__builtin_constant_p(size))
- return copy_user_generic(dst,src,size);
- int ret = 0;
- switch (size) {
- case 1:__get_user_asm(*(u8*)dst,(u8 *)src,ret,"b","b","=q",1);
- return ret;
- case 2:__get_user_asm(*(u16*)dst,(u16*)src,ret,"w","w","=r",2);
- return ret;
- case 4:__get_user_asm(*(u32*)dst,(u32*)src,ret,"l","k","=r",4);
- return ret;
- case 8:__get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",8);
- return ret;
- case 10:
- __get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",16);
- if (ret) return ret;
- __get_user_asm(*(u16*)(8+dst),(u16*)(8+src),ret,"w","w","=r",2);
- return ret;
- case 16:
- __get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",16);
- if (ret) return ret;
- __get_user_asm(*(u64*)(8+dst),(u64*)(8+src),ret,"q","","=r",8);
- return ret;
- default:
- return copy_user_generic(dst,src,size);
- }
-}
-
-static inline int __copy_to_user(void *dst, const void *src, unsigned size)
-{
- if (!__builtin_constant_p(size))
- return copy_user_generic(dst,src,size);
- int ret = 0;
- switch (size) {
- case 1:__put_user_asm(*(u8*)src,(u8 *)dst,ret,"b","b","iq",1);
- return ret;
- case 2:__put_user_asm(*(u16*)src,(u16*)dst,ret,"w","w","ir",2);
- return ret;
- case 4:__put_user_asm(*(u32*)src,(u32*)dst,ret,"l","k","ir",4);
- return ret;
- case 8:__put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",8);
- return ret;
- case 10:
- __put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",10);
- if (ret) return ret;
- asm("":::"memory");
- __put_user_asm(4[(u16*)src],4+(u16*)dst,ret,"w","w","ir",2);
- return ret;
- case 16:
- __put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",16);
- if (ret) return ret;
- asm("":::"memory");
- __put_user_asm(1[(u64*)src],1+(u64*)dst,ret,"q","","ir",8);
- return ret;
- default:
- return copy_user_generic(dst,src,size);
- }
-}
-
-long strncpy_from_user(char *dst, const char *src, long count);
-long __strncpy_from_user(char *dst, const char *src, long count);
-long strnlen_user(const char *str, long n);
-long strlen_user(const char *str);
-unsigned long clear_user(void *mem, unsigned long len);
-unsigned long __clear_user(void *mem, unsigned long len);
-
-extern unsigned long search_exception_table(unsigned long);
-
-#endif /* __X86_64_UACCESS_H */
+++ /dev/null
-#ifndef __X8664_UNALIGNED_H
-#define __X8664_UNALIGNED_H
-
-/*
- * The x86-64 can do unaligned accesses itself.
- *
- * The strange macros are there to make sure these can't
- * be misused in a way that makes them not work on other
- * architectures where unaligned accesses aren't as simple.
- */
-
-/**
- * get_unaligned - get value from possibly mis-aligned location
- * @ptr: pointer to value
- *
- * This macro should be used for accessing values larger in size than
- * single bytes at locations that are expected to be improperly aligned,
- * e.g. retrieving a u16 value from a location not u16-aligned.
- *
- * Note that unaligned accesses can be very expensive on some architectures.
- */
-#define get_unaligned(ptr) (*(ptr))
-
-/**
- * put_unaligned - put value to a possibly mis-aligned location
- * @val: value to place
- * @ptr: pointer to location
- *
- * This macro should be used for placing values larger in size than
- * single bytes at locations that are expected to be improperly aligned,
- * e.g. writing a u16 value to a location not u16-aligned.
- *
- * Note that unaligned accesses can be very expensive on some architectures.
- */
-#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) ))
-
-#endif
+++ /dev/null
-/******************************************************************************
- * arch-i386/hypervisor-if.h
- *
- * Guest OS interface to x86 32-bit Xen.
- */
-
-#ifndef __HYPERVISOR_IF_I386_H__
-#define __HYPERVISOR_IF_I386_H__
-
-/*
- * Pointers and other address fields inside interface structures are padded to
- * 64 bits. This means that field alignments aren't different between 32- and
- * 64-bit architectures.
- */
-/* NB. Multi-level macro ensures __LINE__ is expanded before concatenation. */
-#define __MEMORY_PADDING(_X) u32 __pad_ ## _X
-#define _MEMORY_PADDING(_X) __MEMORY_PADDING(_X)
-#define MEMORY_PADDING _MEMORY_PADDING(__LINE__)
-
-/*
- * SEGMENT DESCRIPTOR TABLES
- */
-/*
- * A number of GDT entries are reserved by Xen. These are not situated at the
- * start of the GDT because some stupid OSes export hard-coded selector values
- * in their ABI. These hard-coded values are always near the start of the GDT,
- * so Xen places itself out of the way.
- *
- * NB. The reserved range is inclusive (that is, both FIRST_RESERVED_GDT_ENTRY
- * and LAST_RESERVED_GDT_ENTRY are reserved).
- */
-#define NR_RESERVED_GDT_ENTRIES 40
-#define FIRST_RESERVED_GDT_ENTRY 256
-#define LAST_RESERVED_GDT_ENTRY \
- (FIRST_RESERVED_GDT_ENTRY + NR_RESERVED_GDT_ENTRIES - 1)
-
-
-/*
- * These flat segments are in the Xen-private section of every GDT. Since these
- * are also present in the initial GDT, many OSes will be able to avoid
- * installing their own GDT.
- */
-#define FLAT_RING1_CS 0x0819 /* GDT index 259 */
-#define FLAT_RING1_DS 0x0821 /* GDT index 260 */
-#define FLAT_RING3_CS 0x082b /* GDT index 261 */
-#define FLAT_RING3_DS 0x0833 /* GDT index 262 */
-
-#define FLAT_GUESTOS_CS FLAT_RING1_CS
-#define FLAT_GUESTOS_DS FLAT_RING1_DS
-#define FLAT_USER_CS FLAT_RING3_CS
-#define FLAT_USER_DS FLAT_RING3_DS
-
-/* And the trap vector is... */
-#define TRAP_INSTR "int $0x82"
-
-
-/*
- * Virtual addresses beyond this are not modifiable by guest OSes. The
- * machine->physical mapping table starts at this address, read-only.
- */
-#define HYPERVISOR_VIRT_START (0xFC000000UL)
-#ifndef machine_to_phys_mapping
-#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
-#endif
-
-#ifndef __ASSEMBLY__
-
-/* NB. Both the following are 32 bits each. */
-typedef unsigned long memory_t; /* Full-sized pointer/address/memory-size. */
-typedef unsigned long cpureg_t; /* Full-sized register. */
-
-/*
- * Send an array of these to HYPERVISOR_set_trap_table()
- */
-#define TI_GET_DPL(_ti) ((_ti)->flags & 3)
-#define TI_GET_IF(_ti) ((_ti)->flags & 4)
-#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))
-#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2))
-typedef struct {
- u8 vector; /* 0: exception vector */
- u8 flags; /* 1: 0-3: privilege level; 4: clear event enable? */
- u16 cs; /* 2: code selector */
- memory_t address; /* 4: code address */
-} PACKED trap_info_t; /* 8 bytes */
-
-typedef struct
-{
- unsigned long ebx;
- unsigned long ecx;
- unsigned long edx;
- unsigned long esi;
- unsigned long edi;
- unsigned long ebp;
- unsigned long eax;
- unsigned long ds;
- unsigned long es;
- unsigned long fs;
- unsigned long gs;
- unsigned long _unused;
- unsigned long eip;
- unsigned long cs;
- unsigned long eflags;
- unsigned long esp;
- unsigned long ss;
-} PACKED execution_context_t;
-
-typedef struct {
- u32 tsc_bits; /* 0: 32 bits read from the CPU's TSC. */
- u32 tsc_bitshift; /* 4: 'tsc_bits' uses N:N+31 of TSC. */
-} PACKED tsc_timestamp_t; /* 8 bytes */
-
-/*
- * The following is all CPU context. Note that the i387_ctxt block is filled
- * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
- */
-typedef struct {
-#define ECF_I387_VALID (1<<0)
- unsigned long flags;
- execution_context_t cpu_ctxt; /* User-level CPU registers */
- char fpu_ctxt[256]; /* User-level FPU registers */
- trap_info_t trap_ctxt[256]; /* Virtual IDT */
- unsigned int fast_trap_idx; /* "Fast trap" vector offset */
- unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
- unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
- unsigned long guestos_ss, guestos_esp; /* Virtual TSS (only SS1/ESP1) */
- unsigned long pt_base; /* CR3 (pagetable base) */
- unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */
- unsigned long event_callback_cs; /* CS:EIP of event callback */
- unsigned long event_callback_eip;
- unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */
- unsigned long failsafe_callback_eip;
-} PACKED full_execution_context_t;
-
-#define ARCH_HAS_FAST_TRAP
-
-#endif
-
-#endif
--- /dev/null
+/******************************************************************************
+ * arch-i386/hypervisor-if.h
+ *
+ * Guest OS interface to x86 32-bit Xen.
+ */
+
+#ifndef __HYPERVISOR_IF_I386_H__
+#define __HYPERVISOR_IF_I386_H__
+
+/*
+ * Pointers and other address fields inside interface structures are padded to
+ * 64 bits. This means that field alignments aren't different between 32- and
+ * 64-bit architectures.
+ */
+/* NB. Multi-level macro ensures __LINE__ is expanded before concatenation. */
+#define __MEMORY_PADDING(_X) u32 __pad_ ## _X
+#define _MEMORY_PADDING(_X) __MEMORY_PADDING(_X)
+#define MEMORY_PADDING _MEMORY_PADDING(__LINE__)
+
+/*
+ * SEGMENT DESCRIPTOR TABLES
+ */
+/*
+ * A number of GDT entries are reserved by Xen. These are not situated at the
+ * start of the GDT because some stupid OSes export hard-coded selector values
+ * in their ABI. These hard-coded values are always near the start of the GDT,
+ * so Xen places itself out of the way.
+ *
+ * NB. The reserved range is inclusive (that is, both FIRST_RESERVED_GDT_ENTRY
+ * and LAST_RESERVED_GDT_ENTRY are reserved).
+ */
+#define NR_RESERVED_GDT_ENTRIES 40
+#define FIRST_RESERVED_GDT_ENTRY 256
+#define LAST_RESERVED_GDT_ENTRY \
+ (FIRST_RESERVED_GDT_ENTRY + NR_RESERVED_GDT_ENTRIES - 1)
+
+
+/*
+ * These flat segments are in the Xen-private section of every GDT. Since these
+ * are also present in the initial GDT, many OSes will be able to avoid
+ * installing their own GDT.
+ */
+#define FLAT_RING1_CS 0x0819 /* GDT index 259 */
+#define FLAT_RING1_DS 0x0821 /* GDT index 260 */
+#define FLAT_RING3_CS 0x082b /* GDT index 261 */
+#define FLAT_RING3_DS 0x0833 /* GDT index 262 */
+
+#define FLAT_GUESTOS_CS FLAT_RING1_CS
+#define FLAT_GUESTOS_DS FLAT_RING1_DS
+#define FLAT_USER_CS FLAT_RING3_CS
+#define FLAT_USER_DS FLAT_RING3_DS
+
+/* And the trap vector is... */
+#define TRAP_INSTR "int $0x82"
+
+
+/*
+ * Virtual addresses beyond this are not modifiable by guest OSes. The
+ * machine->physical mapping table starts at this address, read-only.
+ */
+#define HYPERVISOR_VIRT_START (0xFC000000UL)
+#ifndef machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
+#endif
+
+#ifndef __ASSEMBLY__
+
+/* NB. Both the following are 32 bits each. */
+typedef unsigned long memory_t; /* Full-sized pointer/address/memory-size. */
+typedef unsigned long cpureg_t; /* Full-sized register. */
+
+/*
+ * Send an array of these to HYPERVISOR_set_trap_table()
+ */
+#define TI_GET_DPL(_ti) ((_ti)->flags & 3)
+#define TI_GET_IF(_ti) ((_ti)->flags & 4)
+#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))
+#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2))
+typedef struct {
+ u8 vector; /* 0: exception vector */
+ u8 flags; /* 1: 0-3: privilege level; 4: clear event enable? */
+ u16 cs; /* 2: code selector */
+ memory_t address; /* 4: code address */
+} PACKED trap_info_t; /* 8 bytes */
+
+typedef struct
+{
+ unsigned long ebx;
+ unsigned long ecx;
+ unsigned long edx;
+ unsigned long esi;
+ unsigned long edi;
+ unsigned long ebp;
+ unsigned long eax;
+ unsigned long ds;
+ unsigned long es;
+ unsigned long fs;
+ unsigned long gs;
+ unsigned long _unused;
+ unsigned long eip;
+ unsigned long cs;
+ unsigned long eflags;
+ unsigned long esp;
+ unsigned long ss;
+} PACKED execution_context_t;
+
+typedef struct {
+ u32 tsc_bits; /* 0: 32 bits read from the CPU's TSC. */
+ u32 tsc_bitshift; /* 4: 'tsc_bits' uses N:N+31 of TSC. */
+} PACKED tsc_timestamp_t; /* 8 bytes */
+
+/*
+ * The following is all CPU context. Note that the i387_ctxt block is filled
+ * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
+ */
+typedef struct {
+#define ECF_I387_VALID (1<<0)
+ unsigned long flags;
+ execution_context_t cpu_ctxt; /* User-level CPU registers */
+ char fpu_ctxt[256]; /* User-level FPU registers */
+ trap_info_t trap_ctxt[256]; /* Virtual IDT */
+ unsigned int fast_trap_idx; /* "Fast trap" vector offset */
+ unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
+ unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
+ unsigned long guestos_ss, guestos_esp; /* Virtual TSS (only SS1/ESP1) */
+ unsigned long pt_base; /* CR3 (pagetable base) */
+ unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */
+ unsigned long event_callback_cs; /* CS:EIP of event callback */
+ unsigned long event_callback_eip;
+ unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */
+ unsigned long failsafe_callback_eip;
+} PACKED full_execution_context_t;
+
+#define ARCH_HAS_FAST_TRAP
+
+#endif
+
+#endif
#include <xen/sched.h>
#include <asm/processor.h>
-#include <asm/pgalloc.h>
#include <asm/atomic.h>
#include <asm/desc.h>
#include <asm/flushtlb.h>